This commit is contained in:
2025-06-21 21:05:15 +00:00
parent 5e49c66e15
commit 84ed171fdf
16 changed files with 1644 additions and 2848 deletions

408
transform/aggregate.go Normal file
View File

@@ -0,0 +1,408 @@
package transform
import (
"errors"
"fmt"
"io"
"time"
"gordenko.dev/dima/diploma"
"gordenko.dev/dima/diploma/bin"
"gordenko.dev/dima/diploma/timeutil"
)
// INSTANT
type InstantPeriodsWriterOptions struct {
Dst io.Writer
GroupBy diploma.GroupBy
AggregateFuncs byte
FirstHourOfDay int
}
type InstantPeriodsWriter struct {
aggregateFuncs byte
arr []byte
responder *ChunkedResponder
groupBy diploma.GroupBy
firstHourOfDay int
time2period func(uint32) time.Time
currentPeriod time.Time
lastTimestamp uint32
endTimestamp uint32 // время показания на конец периода
min float64
max float64
total float64
entries int
}
func NewInstantPeriodsWriter(opt InstantPeriodsWriterOptions) (*InstantPeriodsWriter, error) {
if opt.Dst == nil {
return nil, errors.New("Dst option is required")
}
if opt.FirstHourOfDay < 0 || opt.FirstHourOfDay > 23 {
return nil, fmt.Errorf("wrong FirstHourOfDay option: %d", opt.FirstHourOfDay)
}
// Считаю q, чтобы заранее выделить массив для упаковки периодов
var q int
if (opt.AggregateFuncs & diploma.AggregateMin) == diploma.AggregateMin {
q++
}
if (opt.AggregateFuncs & diploma.AggregateMax) == diploma.AggregateMax {
q++
}
if (opt.AggregateFuncs & diploma.AggregateAvg) == diploma.AggregateAvg {
q++
}
if q == 0 {
return nil, errors.New("AggregateFuncs option is required")
}
// 12 - это period, since, until
// 8 - это размер float64
s := &InstantPeriodsWriter{
aggregateFuncs: opt.AggregateFuncs,
arr: make([]byte, 12+q*8),
responder: NewChunkedResponder(opt.Dst),
groupBy: opt.GroupBy,
firstHourOfDay: opt.FirstHourOfDay,
}
switch opt.GroupBy {
case diploma.GroupByHour:
s.time2period = groupByHour
case diploma.GroupByDay:
if s.firstHourOfDay > 0 {
s.time2period = s.groupByDayUsingFHD
} else {
s.time2period = groupByDay
}
case diploma.GroupByMonth:
if s.firstHourOfDay > 0 {
s.time2period = s.groupByMonthUsingFHD
} else {
s.time2period = groupByMonth
}
default:
return nil, fmt.Errorf("unknown groupBy %d option", opt.GroupBy)
}
return s, nil
}
func (s *InstantPeriodsWriter) groupByDayUsingFHD(timestamp uint32) time.Time {
tm := timeutil.FirstSecondInPeriod(time.Unix(int64(timestamp), 0), "d")
if tm.Hour() < s.firstHourOfDay {
tm = tm.AddDate(0, 0, -1)
}
return tm
}
func (s *InstantPeriodsWriter) groupByMonthUsingFHD(timestamp uint32) time.Time {
tm := timeutil.FirstSecondInPeriod(time.Unix(int64(timestamp), 0), "m")
if tm.Hour() < s.firstHourOfDay {
tm = tm.AddDate(0, 0, -1)
}
return tm
}
func (s *InstantPeriodsWriter) Feed(timestamp uint32, value float64) {
s.feed(timestamp, value, false)
}
func (s *InstantPeriodsWriter) FeedNoSend(timestamp uint32, value float64) {
s.feed(timestamp, value, true)
}
func (s *InstantPeriodsWriter) feed(timestamp uint32, value float64, isBuffer bool) {
if s.entries > 0 {
period := s.time2period(timestamp)
if period != s.currentPeriod {
// закрываю период
// готовый период
s.packPeriod(timestamp)
if isBuffer {
s.responder.BufferRecord(s.arr)
} else {
s.responder.AppendRecord(s.arr)
}
// затем
s.decrementPeriod()
//fmt.Println(" period: ", period.Format("2006-01-02 15:04:05"))
//fmt.Println("current period: ", s.currentPeriod.Format("2006-01-02 15:04:05"))
for period.Before(s.currentPeriod) {
// вставляю пустышку
s.packBlankPeriod()
if isBuffer {
s.responder.BufferRecord(s.arr)
} else {
s.responder.AppendRecord(s.arr)
}
s.decrementPeriod()
//fmt.Println(" period: ", period.Format("2006-01-02 15:04:05"))
//fmt.Println("current period: ", s.currentPeriod.Format("2006-01-02 15:04:05"))
//return
}
s.endTimestamp = timestamp
s.min = value
s.max = value
s.total = value
s.entries = 1
} else {
if value < s.min {
s.min = value
} else if value > s.max {
s.max = value
}
// для подсчета AVG
s.total += value
s.entries++
}
} else {
s.endTimestamp = timestamp
s.min = value
s.max = value
s.total = value
s.entries = 1
s.currentPeriod = s.time2period(timestamp)
}
s.lastTimestamp = timestamp
}
func (s *InstantPeriodsWriter) decrementPeriod() {
switch s.groupBy {
case diploma.GroupByHour:
s.currentPeriod = s.currentPeriod.Add(-1 * time.Hour)
//fmt.Println("decrement")
case diploma.GroupByDay:
s.currentPeriod = s.currentPeriod.AddDate(0, 0, -1)
case diploma.GroupByMonth:
s.currentPeriod = s.currentPeriod.AddDate(0, -1, 0)
}
}
func (s *InstantPeriodsWriter) packBlankPeriod() {
//period := s.currentPeriod.Format("2006-01-02 15:04:05")
//since := "0"
//until := "0"
//fmt.Printf("%s: %s - %s, %.0f - %.0f\n", period, since, until, 0.0, 0.0)
// until - это endTimestamp всегда
bin.PutUint32(s.arr[0:], uint32(s.currentPeriod.Unix()))
for i := 4; i < len(s.arr); i++ {
s.arr[i] = 0
}
}
func (s *InstantPeriodsWriter) Close() (err error) {
if s.entries > 0 {
s.packPeriod(s.lastTimestamp)
s.responder.AppendRecord(s.arr)
}
return s.responder.Flush()
}
func (s *InstantPeriodsWriter) packPeriod(timestamp uint32) {
bin.PutUint32(s.arr[0:], uint32(s.currentPeriod.Unix()))
bin.PutUint32(s.arr[4:], timestamp)
bin.PutUint32(s.arr[8:], s.endTimestamp)
pos := 12
if (s.aggregateFuncs & diploma.AggregateMin) == diploma.AggregateMin {
bin.PutFloat64(s.arr[pos:], s.min)
pos += 8
}
if (s.aggregateFuncs & diploma.AggregateMax) == diploma.AggregateMax {
bin.PutFloat64(s.arr[pos:], s.max)
pos += 8
}
if (s.aggregateFuncs & diploma.AggregateAvg) == diploma.AggregateAvg {
bin.PutFloat64(s.arr[pos:], s.total/float64(s.entries))
}
}
/*
Идея с разбивкой на периоды:
Для каждого периода нахожу одно последнее значение.
Начало периода - это конец предыдущего. Если предыдущий не строго предыдущий,
а с пропусками - на место пропусков вставляю пустышки.
Плюс такого решения - я всегда показываю реальное значение на конец периода.
*/
type CumulativePeriodsWriter struct {
arr []byte
responder *ChunkedResponder
firstHourOfDay int
currentPeriod time.Time
groupBy diploma.GroupBy
time2period func(uint32) time.Time
endTimestamp uint32
endValue float64
lastTimestamp uint32
lastValue float64
}
type CumulativePeriodsWriterOptions struct {
Dst io.Writer
GroupBy diploma.GroupBy
FirstHourOfDay int
}
func NewCumulativePeriodsWriter(opt CumulativePeriodsWriterOptions) (*CumulativePeriodsWriter, error) {
if opt.Dst == nil {
return nil, errors.New("Dst option is required")
}
// Считаю q, чтобы заранее выделить массив для упаковки периодов
if opt.FirstHourOfDay < 0 || opt.FirstHourOfDay > 23 {
return nil, fmt.Errorf("wrong firstHourOfDay option: %d", opt.FirstHourOfDay)
}
s := &CumulativePeriodsWriter{
arr: make([]byte, 28),
responder: NewChunkedResponder(opt.Dst),
firstHourOfDay: opt.FirstHourOfDay,
groupBy: opt.GroupBy,
}
s.time2period = func(timestamp uint32) time.Time {
return timeutil.FirstSecondInPeriod(time.Unix(int64(timestamp), 0), "h")
}
switch opt.GroupBy {
case diploma.GroupByHour:
s.time2period = groupByHour
case diploma.GroupByDay:
if s.firstHourOfDay > 0 {
s.time2period = s.groupByDayUsingFHD
} else {
s.time2period = groupByDay
}
case diploma.GroupByMonth:
if s.firstHourOfDay > 0 {
s.time2period = s.groupByMonthUsingFHD
} else {
s.time2period = groupByMonth
}
default:
return nil, fmt.Errorf("unknown groupBy %d option", opt.GroupBy)
}
return s, nil
}
func (s *CumulativePeriodsWriter) groupByDayUsingFHD(timestamp uint32) time.Time {
tm := timeutil.FirstSecondInPeriod(time.Unix(int64(timestamp), 0), "d")
if tm.Hour() < s.firstHourOfDay {
tm = tm.AddDate(0, 0, -1)
}
return tm
}
func (s *CumulativePeriodsWriter) groupByMonthUsingFHD(timestamp uint32) time.Time {
tm := timeutil.FirstSecondInPeriod(time.Unix(int64(timestamp), 0), "m")
if tm.Hour() < s.firstHourOfDay {
tm = tm.AddDate(0, 0, -1)
}
return tm
}
func (s *CumulativePeriodsWriter) Feed(timestamp uint32, value float64) {
s.feed(timestamp, value, false)
}
func (s *CumulativePeriodsWriter) FeedNoSend(timestamp uint32, value float64) {
s.feed(timestamp, value, true)
}
func (s *CumulativePeriodsWriter) feed(timestamp uint32, value float64, isBuffer bool) {
if s.endTimestamp > 0 {
period := s.time2period(timestamp)
if period != s.currentPeriod {
// закрываю период
s.packPeriod(timestamp, value)
if isBuffer {
s.responder.BufferRecord(s.arr)
} else {
s.responder.AppendRecord(s.arr)
}
// затем
s.decrementPeriod()
//fmt.Println(" period: ", period.Format("2006-01-02 15:04:05"))
//fmt.Println("current period: ", s.currentPeriod.Format("2006-01-02 15:04:05"))
for period.Before(s.currentPeriod) {
// вставляю пустышку
s.packBlankPeriod()
if isBuffer {
s.responder.BufferRecord(s.arr)
} else {
s.responder.AppendRecord(s.arr)
}
s.decrementPeriod()
//fmt.Println(" period: ", period.Format("2006-01-02 15:04:05"))
//fmt.Println("current period: ", s.currentPeriod.Format("2006-01-02 15:04:05"))
//return
}
s.endTimestamp = timestamp
s.endValue = value
}
} else {
s.endTimestamp = timestamp
s.endValue = value
s.currentPeriod = s.time2period(timestamp)
}
s.lastTimestamp = timestamp
s.lastValue = value
}
func (s *CumulativePeriodsWriter) decrementPeriod() {
switch s.groupBy {
case diploma.GroupByHour:
s.currentPeriod = s.currentPeriod.Add(-1 * time.Hour)
//fmt.Println("decrement")
case diploma.GroupByDay:
s.currentPeriod = s.currentPeriod.AddDate(0, 0, -1)
case diploma.GroupByMonth:
s.currentPeriod = s.currentPeriod.AddDate(0, -1, 0)
}
}
func (s *CumulativePeriodsWriter) packBlankPeriod() {
//period := s.currentPeriod.Format("2006-01-02 15:04:05")
//since := "0"
//until := "0"
//fmt.Printf("%s: %s - %s, %.0f - %.0f\n", period, since, until, 0.0, 0.0)
// until - это endTimestamp всегда
bin.PutUint32(s.arr[0:], uint32(s.currentPeriod.Unix()))
for i := 4; i < len(s.arr); i++ {
s.arr[i] = 0
}
}
func (s *CumulativePeriodsWriter) packPeriod(start uint32, startValue float64) {
//period := s.currentPeriod.Format("2006-01-02 15:04:05")
//since := time.Unix(int64(start), 0).Format("2006-01-02 15:04:05")
//until := time.Unix(int64(s.endTimestamp), 0).Format("2006-01-02 15:04:05")
//fmt.Printf("%s: %s - %s, %.0f - %.0f\n", period, since, until, startValue, s.endValue)
// until - это endTimestamp всегда
bin.PutUint32(s.arr[0:], uint32(s.currentPeriod.Unix()))
bin.PutUint32(s.arr[4:], start)
bin.PutUint32(s.arr[8:], s.endTimestamp)
bin.PutFloat64(s.arr[12:], startValue)
bin.PutFloat64(s.arr[20:], s.endValue)
}
func (s *CumulativePeriodsWriter) Close() error {
if s.endTimestamp > 0 {
if s.lastTimestamp != s.endTimestamp {
s.packPeriod(s.lastTimestamp, s.lastValue)
} else {
s.packPeriod(s.endTimestamp, s.endValue)
}
s.responder.AppendRecord(s.arr)
}
return s.responder.Flush()
}

147
transform/raw.go Normal file
View File

@@ -0,0 +1,147 @@
package transform
import (
"io"
"gordenko.dev/dima/diploma/bin"
)
// CURRENT VALUE WRITER
type CurrentValue struct {
MetricID uint32
Timestamp uint32
Value float64
}
type CurrentValueWriter struct {
arr []byte
responder *ChunkedResponder
}
func NewCurrentValueWriter(dst io.Writer) *CurrentValueWriter {
return &CurrentValueWriter{
arr: make([]byte, 16),
responder: NewChunkedResponder(dst),
}
}
func (s *CurrentValueWriter) BufferValue(m CurrentValue) {
bin.PutUint32(s.arr[0:], m.MetricID)
bin.PutUint32(s.arr[4:], m.Timestamp)
bin.PutFloat64(s.arr[8:], m.Value)
s.responder.BufferRecord(s.arr)
}
func (s *CurrentValueWriter) Close() error {
return s.responder.Flush()
}
// INSTANT MEASURE WRITER
type InstantMeasure struct {
Timestamp uint32
Value float64
}
type InstantMeasureWriter struct {
arr []byte
responder *ChunkedResponder
since uint32
}
func NewInstantMeasureWriter(dst io.Writer, since uint32) *InstantMeasureWriter {
// 12 - это timestamp, value
return &InstantMeasureWriter{
arr: make([]byte, 12),
responder: NewChunkedResponder(dst),
since: since,
}
}
func (s *InstantMeasureWriter) Feed(timestamp uint32, value float64) {
s.feed(timestamp, value, false)
}
func (s *InstantMeasureWriter) FeedNoSend(timestamp uint32, value float64) {
s.feed(timestamp, value, true)
}
func (s *InstantMeasureWriter) feed(timestamp uint32, value float64, isBuffer bool) {
if timestamp < s.since {
return
}
bin.PutUint32(s.arr[0:], timestamp)
bin.PutFloat64(s.arr[4:], value)
if isBuffer {
s.responder.BufferRecord(s.arr)
} else {
s.responder.AppendRecord(s.arr)
}
}
func (s *InstantMeasureWriter) Close() error {
return s.responder.Flush()
}
// CUMULATIVE MEASURE WRITER
type CumulativeMeasure struct {
Timestamp uint32
Value float64
Total float64
}
type CumulativeMeasureWriter struct {
arr []byte
responder *ChunkedResponder
since uint32
endTimestamp uint32
endValue float64
}
func NewCumulativeMeasureWriter(dst io.Writer, since uint32) *CumulativeMeasureWriter {
// 20 - это timestamp, value, total
return &CumulativeMeasureWriter{
arr: make([]byte, 20),
responder: NewChunkedResponder(dst),
since: since,
}
}
func (s *CumulativeMeasureWriter) Feed(timestamp uint32, value float64) {
s.feed(timestamp, value, false)
}
func (s *CumulativeMeasureWriter) FeedNoSend(timestamp uint32, value float64) {
s.feed(timestamp, value, true)
}
func (s *CumulativeMeasureWriter) feed(timestamp uint32, value float64, isBuffer bool) {
if s.endTimestamp > 0 {
s.pack(s.endValue - value)
if isBuffer {
s.responder.BufferRecord(s.arr)
} else {
s.responder.AppendRecord(s.arr)
}
}
s.endTimestamp = timestamp
s.endValue = value
}
func (s *CumulativeMeasureWriter) pack(total float64) {
bin.PutUint32(s.arr[0:], s.endTimestamp)
bin.PutFloat64(s.arr[4:], s.endValue)
bin.PutFloat64(s.arr[12:], total)
}
func (s *CumulativeMeasureWriter) Close() error {
if s.endTimestamp >= s.since {
// endTimestamp внутри заданного периода. Других показаний нет,
// поэтому время добавляю, но накопленную сумму ставлю 0.
s.pack(0)
// Если < since - ничего делать не нужно, ибо накопленная сумма уже добавлена
}
return s.responder.Flush()
}

105
transform/responder.go Normal file
View File

@@ -0,0 +1,105 @@
package transform
import (
"bytes"
"fmt"
"io"
"gordenko.dev/dima/diploma/bin"
"gordenko.dev/dima/diploma/proto"
)
// CHUNKED RESPONDER
var endMsg = []byte{
proto.RespEndOfValue, // end of stream
}
type ChunkedResponder struct {
recordsQty int
buf *bytes.Buffer
dst io.Writer
}
func NewChunkedResponder(dst io.Writer) *ChunkedResponder {
s := &ChunkedResponder{
recordsQty: 0,
buf: bytes.NewBuffer(nil),
dst: dst,
}
s.buf.Write([]byte{
proto.RespPartOfValue, // message type
0, 0, 0, 0, // records qty
})
return s
}
func (s *ChunkedResponder) BufferRecord(rec []byte) {
s.buf.Write(rec)
s.recordsQty++
}
func (s *ChunkedResponder) AppendRecord(rec []byte) error {
s.buf.Write(rec)
s.recordsQty++
if s.buf.Len() < 1500 {
return nil
}
if err := s.sendBuffered(); err != nil {
return err
}
s.buf.Write([]byte{
proto.RespPartOfValue, // message type
0, 0, 0, 0, // records qty
})
s.recordsQty = 0
return nil
}
func (s *ChunkedResponder) Flush() error {
if s.recordsQty > 0 {
if err := s.sendBuffered(); err != nil {
return err
}
}
if _, err := s.dst.Write(endMsg); err != nil {
return err
}
//fmt.Printf("sent endMsg %d\n", endMsg)
return nil
}
func (s *ChunkedResponder) sendBuffered() (err error) {
msg := s.buf.Bytes()
bin.PutUint32(msg[1:], uint32(s.recordsQty))
//fmt.Printf("put uint16: %d\n", msg[:3])
//fmt.Printf("send %d records\n", s.recordsQty)
//fmt.Printf("send buffered: %d, qty: %d\n", msg, s.recordsQty)
n, err := s.dst.Write(msg)
if err != nil {
return
}
if n != len(msg) {
return fmt.Errorf("incomplete write %d bytes instead of %d", n, len(msg))
}
s.buf.Reset()
return
}
// Для Aggregation пишем функцию определения периода и пуляем фактические периоды
//
// By default net/http.Server uses 4KB buffers, which are flushed to client with chunked responses.
// These buffers may result in visible overhead for responses exceeding a few megabytes.
// So allocate 64Kb buffers.
// bw: bufio.NewWriterSize(w, 64*1024),

19
transform/transform.go Normal file
View File

@@ -0,0 +1,19 @@
package transform
import (
"time"
"gordenko.dev/dima/diploma/timeutil"
)
func groupByHour(timestamp uint32) time.Time {
return timeutil.FirstSecondInPeriod(time.Unix(int64(timestamp), 0), "h")
}
func groupByDay(timestamp uint32) time.Time {
return timeutil.FirstSecondInPeriod(time.Unix(int64(timestamp), 0), "d")
}
func groupByMonth(timestamp uint32) time.Time {
return timeutil.FirstSecondInPeriod(time.Unix(int64(timestamp), 0), "m")
}