This commit is contained in:
2025-06-03 05:04:18 +03:00
parent 0f50873f0f
commit fbb30f31e8
54 changed files with 13234 additions and 0 deletions

1058
database/api.go Normal file

File diff suppressed because it is too large Load Diff

460
database/database.go Normal file
View File

@@ -0,0 +1,460 @@
package database
import (
"errors"
"fmt"
"hash/crc32"
"io"
"log"
"net"
"os"
"path/filepath"
"regexp"
"sync"
"time"
"gordenko.dev/dima/diploma"
"gordenko.dev/dima/diploma/atree"
"gordenko.dev/dima/diploma/atree/redo"
"gordenko.dev/dima/diploma/bin"
"gordenko.dev/dima/diploma/chunkenc"
"gordenko.dev/dima/diploma/conbuf"
"gordenko.dev/dima/diploma/freelist"
"gordenko.dev/dima/diploma/recovery"
"gordenko.dev/dima/diploma/txlog"
)
func JoinSnapshotFileName(dir string, logNumber int) string {
return filepath.Join(dir, fmt.Sprintf("%d.snapshot", logNumber))
}
type metricLockEntry struct {
XLock bool
RLocks int
WaitQueue []any
}
type Database struct {
mutex sync.Mutex
workerSignalCh chan struct{}
workerQueue []any
rLocksToRelease []uint32
metrics map[uint32]*_metric
metricLockEntries map[uint32]*metricLockEntry
dataFreeList *freelist.FreeList
indexFreeList *freelist.FreeList
dir string
databaseName string
redoDir string
txlog *txlog.Writer
atree *atree.Atree
tcpPort int
logfile *os.File
logger *log.Logger
exitCh chan struct{}
waitGroup *sync.WaitGroup
}
type Options struct {
TCPPort int
Dir string
DatabaseName string
RedoDir string
Logfile *os.File
ExitCh chan struct{}
WaitGroup *sync.WaitGroup
}
func New(opt Options) (_ *Database, err error) {
if opt.TCPPort <= 0 {
return nil, errors.New("TCPPort option is required")
}
if opt.Dir == "" {
return nil, errors.New("Dir option is required")
}
if opt.DatabaseName == "" {
return nil, errors.New("DatabaseName option is required")
}
if opt.RedoDir == "" {
return nil, errors.New("RedoDir option is required")
}
if opt.Logfile == nil {
return nil, errors.New("Logfile option is required")
}
if opt.ExitCh == nil {
return nil, errors.New("ExitCh option is required")
}
if opt.WaitGroup == nil {
return nil, errors.New("WaitGroup option is required")
}
s := &Database{
workerSignalCh: make(chan struct{}, 1),
dir: opt.Dir,
databaseName: opt.DatabaseName,
redoDir: opt.RedoDir,
metrics: make(map[uint32]*_metric),
metricLockEntries: make(map[uint32]*metricLockEntry),
dataFreeList: freelist.New(),
indexFreeList: freelist.New(),
tcpPort: opt.TCPPort,
logfile: opt.Logfile,
logger: log.New(opt.Logfile, "", log.LstdFlags),
exitCh: opt.ExitCh,
waitGroup: opt.WaitGroup,
}
return s, nil
}
func (s *Database) ListenAndServe() (err error) {
listener, err := net.Listen("tcp", fmt.Sprintf(":%d", s.tcpPort))
if err != nil {
return fmt.Errorf("net.Listen: %s; port=%d", err, s.tcpPort)
}
s.atree, err = atree.New(atree.Options{
Dir: s.dir,
DatabaseName: s.databaseName,
RedoDir: s.redoDir,
DataFreeList: s.dataFreeList,
IndexFreeList: s.indexFreeList,
})
if err != nil {
return fmt.Errorf("atree.New: %s", err)
}
s.atree.Run()
go s.worker()
s.recovery()
s.logger.Println("database started")
for {
// Listen for an incoming connection.
conn, err := listener.Accept()
if err != nil {
s.logger.Printf("listener.Accept: %s\n", err)
time.Sleep(time.Second)
} else {
go s.handleTCPConn(conn)
}
}
}
func (s *Database) recovery() {
advisor, err := recovery.NewRecoveryAdvisor(recovery.RecoveryAdvisorOptions{
Dir: s.dir,
VerifySnapshot: s.verifySnapshot,
})
if err != nil {
panic(err)
}
recipe, err := advisor.GetRecipe()
if err != nil {
diploma.Abort(diploma.GetRecoveryRecipeFailed, err)
}
var logNumber int
if recipe != nil {
if recipe.Snapshot != "" {
err = s.loadSnapshot(recipe.Snapshot)
if err != nil {
diploma.Abort(diploma.LoadSnapshotFailed, err)
}
}
for _, changesFileName := range recipe.Changes {
err = s.replayChanges(changesFileName)
if err != nil {
diploma.Abort(diploma.ReplayChangesFailed, err)
}
}
logNumber = recipe.LogNumber
}
s.txlog, err = txlog.NewWriter(txlog.WriterOptions{
Dir: s.dir,
LogNumber: logNumber,
AppendToWorkerQueue: s.appendJobToWorkerQueue,
ExitCh: s.exitCh,
WaitGroup: s.waitGroup,
})
if err != nil {
diploma.Abort(diploma.CreateChangesWriterFailed, err)
}
go s.txlog.Run()
fileNames, err := s.searchREDOFiles()
if err != nil {
diploma.Abort(diploma.SearchREDOFilesFailed, err)
}
if len(fileNames) > 0 {
for _, fileName := range fileNames {
err = s.replayREDOFile(fileName)
if err != nil {
diploma.Abort(diploma.ReplayREDOFileFailed, err)
}
}
for _, fileName := range fileNames {
err = os.Remove(fileName)
if err != nil {
diploma.Abort(diploma.RemoveREDOFileFailed, err)
}
}
}
if recipe != nil {
if recipe.CompleteSnapshot {
err = s.dumpSnapshot(logNumber)
if err != nil {
diploma.Abort(diploma.DumpSnapshotFailed, err)
}
}
for _, fileName := range recipe.ToDelete {
err = os.Remove(fileName)
if err != nil {
diploma.Abort(diploma.RemoveRecipeFileFailed, err)
}
}
}
}
func (s *Database) searchREDOFiles() ([]string, error) {
var (
reREDO = regexp.MustCompile(`a\d+\.redo`)
fileNames []string
)
entries, err := os.ReadDir(s.redoDir)
if err != nil {
return nil, err
}
for _, entry := range entries {
if entry.Type().IsRegular() {
baseName := entry.Name()
if reREDO.MatchString(baseName) {
fileNames = append(fileNames, filepath.Join(s.redoDir, baseName))
}
}
}
return fileNames, nil
}
func (s *Database) replayREDOFile(fileName string) error {
redoFile, err := redo.ReadREDOFile(redo.ReadREDOFileReq{
FileName: fileName,
DataPageSize: atree.DataPageSize,
IndexPageSize: atree.IndexPageSize,
})
if err != nil {
return fmt.Errorf("can't read REDO file %s: %s", fileName, err)
}
metric, ok := s.metrics[redoFile.MetricID]
if !ok {
return fmt.Errorf("has REDOFile, metric %d not found", redoFile.MetricID)
}
if metric.Until < redoFile.Timestamp {
waitCh := make(chan struct{})
s.atree.ApplyREDO(atree.WriteTask{
DataPage: redoFile.DataPage,
IndexPages: redoFile.IndexPages,
})
<-waitCh
waitCh = s.txlog.WriteAppendedMeasureWithOverflow(
txlog.AppendedMeasureWithOverflow{
MetricID: redoFile.MetricID,
Timestamp: redoFile.Timestamp,
Value: redoFile.Value,
IsDataPageReused: redoFile.IsDataPageReused,
DataPageNo: redoFile.DataPage.PageNo,
IsRootChanged: redoFile.IsRootChanged,
RootPageNo: redoFile.RootPageNo,
ReusedIndexPages: redoFile.ReusedIndexPages,
},
fileName,
false,
)
<-waitCh
}
return nil
}
func (s *Database) verifySnapshot(fileName string) (_ bool, err error) {
file, err := os.Open(fileName)
if err != nil {
return
}
defer file.Close()
stat, err := file.Stat()
if err != nil {
return
}
if stat.Size() <= 4 {
return false, nil
}
var (
payloadSize = stat.Size() - 4
hash = crc32.NewIEEE()
)
_, err = io.CopyN(hash, file, payloadSize)
if err != nil {
return
}
calculatedCRC := hash.Sum32()
storedCRC, err := bin.ReadUint32(file)
if err != nil {
return
}
if storedCRC != calculatedCRC {
return false, fmt.Errorf("strored CRC %d not equal calculated CRC %d",
storedCRC, calculatedCRC)
}
return true, nil
}
func (s *Database) replayChanges(fileName string) error {
walReader, err := txlog.NewReader(txlog.ReaderOptions{
FileName: fileName,
BufferSize: 1024 * 1024,
})
if err != nil {
return err
}
for {
lsn, records, done, err := walReader.ReadPacket()
if err != nil {
return err
}
_ = lsn
if done {
return nil
}
for _, record := range records {
if err = s.replayChangesRecord(record); err != nil {
return err
}
}
}
}
func (s *Database) replayChangesRecord(untyped any) error {
switch rec := untyped.(type) {
case txlog.AddedMetric:
var (
values diploma.ValueCompressor
timestampsBuf = conbuf.New(nil)
valuesBuf = conbuf.New(nil)
)
if rec.MetricType == diploma.Cumulative {
values = chunkenc.NewReverseCumulativeDeltaCompressor(
valuesBuf, 0, byte(rec.FracDigits))
} else {
values = chunkenc.NewReverseInstantDeltaCompressor(
valuesBuf, 0, byte(rec.FracDigits))
}
s.metrics[rec.MetricID] = &_metric{
MetricType: rec.MetricType,
FracDigits: byte(rec.FracDigits),
TimestampsBuf: timestampsBuf,
ValuesBuf: valuesBuf,
Timestamps: chunkenc.NewReverseTimeDeltaOfDeltaCompressor(timestampsBuf, 0),
Values: values,
}
case txlog.DeletedMetric:
delete(s.metrics, rec.MetricID)
if len(rec.FreeDataPages) > 0 {
s.dataFreeList.AddPages(rec.FreeDataPages)
}
if len(rec.FreeIndexPages) > 0 {
s.indexFreeList.AddPages(rec.FreeIndexPages)
}
case txlog.AppendedMeasure:
metric, ok := s.metrics[rec.MetricID]
if ok {
metric.Timestamps.Append(rec.Timestamp)
metric.Values.Append(rec.Value)
if metric.Since == 0 {
metric.Since = rec.Timestamp
metric.SinceValue = rec.Value
}
metric.Until = rec.Timestamp
metric.UntilValue = rec.Value
}
case txlog.AppendedMeasures:
metric, ok := s.metrics[rec.MetricID]
if ok {
for _, measure := range rec.Measures {
metric.Timestamps.Append(measure.Timestamp)
metric.Values.Append(measure.Value)
if metric.Since == 0 {
metric.Since = measure.Timestamp
metric.SinceValue = measure.Value
}
metric.Until = measure.Timestamp
metric.UntilValue = measure.Value
}
}
case txlog.AppendedMeasureWithOverflow:
metric, ok := s.metrics[rec.MetricID]
if ok {
metric.ReinitBy(rec.Timestamp, rec.Value)
if rec.IsRootChanged {
metric.RootPageNo = rec.RootPageNo
}
metric.LastPageNo = rec.DataPageNo
// delete free pages
if rec.IsDataPageReused {
s.dataFreeList.DeletePages([]uint32{
rec.DataPageNo,
})
}
if len(rec.ReusedIndexPages) > 0 {
s.indexFreeList.DeletePages(rec.ReusedIndexPages)
}
}
case txlog.DeletedMeasures:
metric, ok := s.metrics[rec.MetricID]
if ok {
metric.DeleteMeasures()
if len(rec.FreeDataPages) > 0 {
s.dataFreeList.AddPages(rec.FreeDataPages)
}
if len(rec.FreeDataPages) > 0 {
s.indexFreeList.AddPages(rec.FreeIndexPages)
}
}
default:
diploma.Abort(diploma.UnknownTxLogRecordTypeBug,
fmt.Errorf("bug: unknown record type %T in TransactionLog", rec))
}
return nil
}

50
database/helpers.go Normal file
View File

@@ -0,0 +1,50 @@
package database
import (
"errors"
"io/fs"
"os"
"time"
)
func isFileExist(fileName string) (bool, error) {
_, err := os.Stat(fileName)
if err != nil {
if errors.Is(err, fs.ErrNotExist) {
return false, nil
} else {
return false, err
}
} else {
return true, nil
}
}
func (s *Database) appendJobToWorkerQueue(job any) {
s.mutex.Lock()
s.workerQueue = append(s.workerQueue, job)
s.mutex.Unlock()
select {
case s.workerSignalCh <- struct{}{}:
default:
}
}
func (s *Database) metricRUnlock(metricID uint32) {
s.mutex.Lock()
s.rLocksToRelease = append(s.rLocksToRelease, metricID)
s.mutex.Unlock()
select {
case s.workerSignalCh <- struct{}{}:
default:
}
}
func correctToFHD(since, until uint32, firstHourOfDay int) (uint32, uint32) {
duration := time.Duration(firstHourOfDay) * time.Hour
since = uint32(time.Unix(int64(since), 0).Add(duration).Unix())
until = uint32(time.Unix(int64(until), 0).Add(duration).Unix())
return since, until
}

71
database/metric.go Normal file
View File

@@ -0,0 +1,71 @@
package database
import (
octopus "gordenko.dev/dima/diploma"
"gordenko.dev/dima/diploma/chunkenc"
"gordenko.dev/dima/diploma/conbuf"
)
// METRIC
type _metric struct {
MetricType octopus.MetricType
FracDigits byte
RootPageNo uint32
LastPageNo uint32
SinceValue float64
Since uint32
UntilValue float64
Until uint32
TimestampsBuf *conbuf.ContinuousBuffer
ValuesBuf *conbuf.ContinuousBuffer
Timestamps octopus.TimestampCompressor
Values octopus.ValueCompressor
}
func (s *_metric) ReinitBy(timestamp uint32, value float64) {
s.TimestampsBuf = conbuf.New(nil)
s.ValuesBuf = conbuf.New(nil)
//
s.Timestamps = chunkenc.NewReverseTimeDeltaOfDeltaCompressor(
s.TimestampsBuf, 0)
if s.MetricType == octopus.Cumulative {
s.Values = chunkenc.NewReverseCumulativeDeltaCompressor(
s.ValuesBuf, 0, s.FracDigits)
} else {
s.Values = chunkenc.NewReverseInstantDeltaCompressor(
s.ValuesBuf, 0, s.FracDigits)
}
s.Timestamps.Append(timestamp)
s.Values.Append(value)
s.Since = timestamp
s.SinceValue = value
s.Until = timestamp
s.UntilValue = value
}
func (s *_metric) DeleteMeasures() {
s.TimestampsBuf = conbuf.New(nil)
s.ValuesBuf = conbuf.New(nil)
//
s.Timestamps = chunkenc.NewReverseTimeDeltaOfDeltaCompressor(
s.TimestampsBuf, 0)
if s.MetricType == octopus.Cumulative {
s.Values = chunkenc.NewReverseCumulativeDeltaCompressor(
s.ValuesBuf, 0, s.FracDigits)
} else {
s.Values = chunkenc.NewReverseInstantDeltaCompressor(
s.ValuesBuf, 0, s.FracDigits)
}
s.RootPageNo = 0
s.LastPageNo = 0
s.Since = 0
s.SinceValue = 0
s.Until = 0
s.UntilValue = 0
}

1736
database/proc.go Normal file

File diff suppressed because it is too large Load Diff

287
database/snapshot.go Normal file
View File

@@ -0,0 +1,287 @@
package database
import (
"fmt"
"hash/crc32"
"io"
"os"
"path/filepath"
octopus "gordenko.dev/dima/diploma"
"gordenko.dev/dima/diploma/atree"
"gordenko.dev/dima/diploma/bin"
"gordenko.dev/dima/diploma/chunkenc"
"gordenko.dev/dima/diploma/conbuf"
"gordenko.dev/dima/diploma/freelist"
)
/*
Формат:
//lsn - varuint (останній LSN, що змінив дані у RAM)
metricsQty - varuint
[metric]*
где metric - це:
metricID - 4b
metricType - 1b
fracDigits - 1b
rootPageNo - 4b
lastPageNo - 4b
since - 4b
sinceValue - 8b
until - 4b
untilValue - 8b
timestamps size - 2b
values size - 2b
timestams payload - Nb
values payload - Nb
dataFreeList size - varuint
dataFreeList - Nb
indexFreeList size - varuint
indexFreeList - Nb
CRC32 - 4b
*/
const metricHeaderSize = 42
func (s *Database) dumpSnapshot(logNumber int) (err error) {
var (
fileName = filepath.Join(s.dir, fmt.Sprintf("%d.snapshot", logNumber))
hasher = crc32.NewIEEE()
prefix = make([]byte, metricHeaderSize)
)
file, err := os.OpenFile(fileName, os.O_CREATE|os.O_WRONLY, 0770)
if err != nil {
return
}
dst := io.MultiWriter(file, hasher)
_, err = bin.WriteVarUint64(dst, uint64(len(s.metrics)))
if err != nil {
return
}
for metricID, metric := range s.metrics {
tSize := metric.Timestamps.Size()
vSize := metric.Values.Size()
bin.PutUint32(prefix[0:], metricID)
prefix[4] = byte(metric.MetricType)
prefix[5] = metric.FracDigits
bin.PutUint32(prefix[6:], metric.RootPageNo)
bin.PutUint32(prefix[10:], metric.LastPageNo)
bin.PutUint32(prefix[14:], metric.Since)
bin.PutFloat64(prefix[18:], metric.SinceValue)
bin.PutUint32(prefix[26:], metric.Until)
bin.PutFloat64(prefix[30:], metric.UntilValue)
bin.PutUint16(prefix[38:], uint16(tSize))
bin.PutUint16(prefix[40:], uint16(vSize))
_, err = dst.Write(prefix)
if err != nil {
return
}
// copy timestamps
remaining := tSize
for _, buf := range metric.TimestampsBuf.Chunks() {
if remaining < len(buf) {
buf = buf[:remaining]
}
_, err = dst.Write(buf)
if err != nil {
return
}
remaining -= len(buf)
if remaining == 0 {
break
}
}
// copy values
remaining = vSize
for _, buf := range metric.ValuesBuf.Chunks() {
if remaining < len(buf) {
buf = buf[:remaining]
}
_, err = dst.Write(buf)
if err != nil {
return
}
remaining -= len(buf)
if remaining == 0 {
break
}
}
}
// free data pages
err = freeListWriteTo(s.dataFreeList, dst)
if err != nil {
return
}
// free index pages
err = freeListWriteTo(s.indexFreeList, dst)
if err != nil {
return
}
bin.WriteUint32(file, hasher.Sum32())
err = file.Sync()
if err != nil {
return
}
err = file.Close()
if err != nil {
return
}
prevLogNumber := logNumber - 1
prevChanges := filepath.Join(s.dir, fmt.Sprintf("%d.changes", prevLogNumber))
prevSnapshot := filepath.Join(s.dir, fmt.Sprintf("%d.snapshot", prevLogNumber))
isExist, err := isFileExist(prevChanges)
if err != nil {
return
}
if isExist {
err = os.Remove(prevChanges)
if err != nil {
octopus.Abort(octopus.DeletePrevChangesFileFailed, err)
}
}
isExist, err = isFileExist(prevSnapshot)
if err != nil {
return
}
if isExist {
err = os.Remove(prevSnapshot)
if err != nil {
octopus.Abort(octopus.DeletePrevSnapshotFileFailed, err)
}
}
return
}
func (s *Database) loadSnapshot(fileName string) (err error) {
var (
hasher = crc32.NewIEEE()
metricsQty int
header = make([]byte, metricHeaderSize)
body = make([]byte, atree.DataPageSize)
)
file, err := os.Open(fileName)
if err != nil {
return
}
src := io.TeeReader(file, hasher)
u64, _, err := bin.ReadVarUint64(src)
if err != nil {
return
}
metricsQty = int(u64)
for range metricsQty {
var metric _metric
err = bin.ReadNInto(src, header)
if err != nil {
return
}
metricID := bin.GetUint32(header[0:])
metric.MetricType = octopus.MetricType(header[4])
metric.FracDigits = header[5]
metric.RootPageNo = bin.GetUint32(header[6:])
metric.LastPageNo = bin.GetUint32(header[10:])
metric.Since = bin.GetUint32(header[14:])
metric.SinceValue = bin.GetFloat64(header[18:])
metric.Until = bin.GetUint32(header[26:])
metric.UntilValue = bin.GetFloat64(header[30:])
tSize := bin.GetUint16(header[38:])
vSize := bin.GetUint16(header[40:])
buf := body[:tSize]
err = bin.ReadNInto(src, buf)
if err != nil {
return
}
metric.TimestampsBuf = conbuf.NewFromBuffer(buf)
buf = body[:vSize]
err = bin.ReadNInto(src, buf)
if err != nil {
return
}
metric.ValuesBuf = conbuf.NewFromBuffer(buf)
metric.Timestamps = chunkenc.NewReverseTimeDeltaOfDeltaCompressor(
metric.TimestampsBuf, int(tSize))
if metric.MetricType == octopus.Cumulative {
metric.Values = chunkenc.NewReverseCumulativeDeltaCompressor(
metric.ValuesBuf, int(vSize), metric.FracDigits)
} else {
metric.Values = chunkenc.NewReverseInstantDeltaCompressor(
metric.ValuesBuf, int(vSize), metric.FracDigits)
}
s.metrics[metricID] = &metric
}
err = restoreFreeList(s.dataFreeList, src)
if err != nil {
return fmt.Errorf("restore dataFreeList: %s", err)
}
err = restoreFreeList(s.indexFreeList, src)
if err != nil {
return fmt.Errorf("restore indexFreeList: %s", err)
}
calculatedChecksum := hasher.Sum32()
writtenChecksum, err := bin.ReadUint32(file)
if err != nil {
return
}
if calculatedChecksum != writtenChecksum {
return fmt.Errorf("calculated checksum %d not equal written checksum %d", calculatedChecksum, writtenChecksum)
}
return
}
// HELPERS
func freeListWriteTo(freeList *freelist.FreeList, dst io.Writer) error {
serialized, err := freeList.Serialize()
if err != nil {
octopus.Abort(octopus.FailedFreeListSerialize, err)
}
_, err = bin.WriteVarUint64(dst, uint64(len(serialized)))
if err != nil {
return err
}
_, err = dst.Write(serialized)
if err != nil {
return err
}
return nil
}
func restoreFreeList(freeList *freelist.FreeList, src io.Reader) error {
size, _, err := bin.ReadVarUint64(src)
if err != nil {
return err
}
serialized, err := bin.ReadN(src, int(size))
if err != nil {
return err
}
freeList.Restore(serialized)
return nil
}