You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
497 lines
12 KiB
497 lines
12 KiB
package atree
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
|
|
"gordenko.dev/dima/diploma"
|
|
"gordenko.dev/dima/diploma/atree/redo"
|
|
"gordenko.dev/dima/diploma/bin"
|
|
)
|
|
|
|
const (
|
|
filePerm = 0770
|
|
|
|
// index page
|
|
indexRecordsQtyIdx = IndexPageSize - 7
|
|
isDataPageNumbersIdx = IndexPageSize - 5
|
|
indexCRC32Idx = IndexPageSize - 4
|
|
|
|
// data page
|
|
timestampsSizeIdx = DataPageSize - 12
|
|
valuesSizeIdx = DataPageSize - 10
|
|
prevPageIdx = DataPageSize - 8
|
|
dataCRC32Idx = DataPageSize - 4
|
|
|
|
timestampSize = 4
|
|
pairSize = timestampSize + PageNoSize
|
|
indexFooterIdx = indexRecordsQtyIdx
|
|
dataFooterIdx = timestampsSizeIdx
|
|
|
|
DataPageSize = 8192
|
|
IndexPageSize = 1024
|
|
PageNoSize = 4
|
|
//
|
|
DataPagePayloadSize int = dataFooterIdx
|
|
)
|
|
|
|
type FreeList interface {
|
|
ReservePage() uint32
|
|
}
|
|
|
|
type _page struct {
|
|
PageNo uint32
|
|
Buf []byte
|
|
ReferenceCount int
|
|
}
|
|
|
|
type Atree struct {
|
|
redoDir string
|
|
indexFreelist FreeList
|
|
dataFreelist FreeList
|
|
dataFile *os.File
|
|
indexFile *os.File
|
|
mutex sync.Mutex
|
|
allocatedIndexPagesQty uint32
|
|
allocatedDataPagesQty uint32
|
|
indexPages map[uint32]*_page
|
|
dataPages map[uint32]*_page
|
|
indexWaits map[uint32][]chan readResult
|
|
dataWaits map[uint32][]chan readResult
|
|
indexPagesToRead []uint32
|
|
dataPagesToRead []uint32
|
|
readSignalCh chan struct{}
|
|
writeSignalCh chan struct{}
|
|
writeTasksQueue []WriteTask
|
|
}
|
|
|
|
type Options struct {
|
|
Dir string
|
|
RedoDir string
|
|
DatabaseName string
|
|
DataFreeList FreeList
|
|
IndexFreeList FreeList
|
|
}
|
|
|
|
func New(opt Options) (*Atree, error) {
|
|
if opt.Dir == "" {
|
|
return nil, errors.New("Dir option is required")
|
|
}
|
|
if opt.RedoDir == "" {
|
|
return nil, errors.New("RedoDir option is required")
|
|
}
|
|
if opt.DatabaseName == "" {
|
|
return nil, errors.New("DatabaseName option is required")
|
|
}
|
|
if opt.DataFreeList == nil {
|
|
return nil, errors.New("DataFreeList option is required")
|
|
}
|
|
if opt.IndexFreeList == nil {
|
|
return nil, errors.New("IndexFreeList option is required")
|
|
}
|
|
// открываю или создаю dbName.data и dbName.index файлы
|
|
var (
|
|
indexFileName = filepath.Join(opt.Dir, opt.DatabaseName+".index")
|
|
dataFileName = filepath.Join(opt.Dir, opt.DatabaseName+".data")
|
|
|
|
indexFile *os.File
|
|
dataFile *os.File
|
|
allocatedIndexPagesQty uint32
|
|
allocatedDataPagesQty uint32
|
|
)
|
|
|
|
// При создании data файла сразу создается индекс, поэтому корректное
|
|
// состояние БД: либо оба файла есть, либо ни одного файла нет.
|
|
isIndexExist, err := isFileExist(indexFileName)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("check index file is exist: %s", err)
|
|
}
|
|
|
|
isDataExist, err := isFileExist(dataFileName)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("check data file is exist: %s", err)
|
|
}
|
|
|
|
if isIndexExist {
|
|
if isDataExist {
|
|
// открываю оба файла
|
|
indexFile, allocatedIndexPagesQty, err = openFile(indexFileName, IndexPageSize)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("open index file: %s", err)
|
|
}
|
|
|
|
dataFile, allocatedDataPagesQty, err = openFile(dataFileName, DataPageSize)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("open data file: %s", err)
|
|
}
|
|
} else {
|
|
// нет data файла
|
|
return nil, errors.New("not found data file")
|
|
}
|
|
} else {
|
|
if isDataExist {
|
|
// index файла нет
|
|
return nil, errors.New("not found index file")
|
|
} else {
|
|
// нет обоих файлов
|
|
indexFile, err = os.OpenFile(indexFileName, os.O_CREATE|os.O_RDWR, filePerm)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
dataFile, err = os.OpenFile(dataFileName, os.O_CREATE|os.O_RDWR, filePerm)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
}
|
|
|
|
tree := &Atree{
|
|
redoDir: opt.RedoDir,
|
|
indexFreelist: opt.IndexFreeList,
|
|
dataFreelist: opt.DataFreeList,
|
|
indexFile: indexFile,
|
|
dataFile: dataFile,
|
|
allocatedIndexPagesQty: allocatedIndexPagesQty,
|
|
allocatedDataPagesQty: allocatedDataPagesQty,
|
|
indexPages: make(map[uint32]*_page),
|
|
dataPages: make(map[uint32]*_page),
|
|
indexWaits: make(map[uint32][]chan readResult),
|
|
dataWaits: make(map[uint32][]chan readResult),
|
|
readSignalCh: make(chan struct{}, 1),
|
|
writeSignalCh: make(chan struct{}, 1),
|
|
}
|
|
|
|
return tree, nil
|
|
}
|
|
|
|
func (s *Atree) Run() {
|
|
go s.pageWriter()
|
|
go s.pageReader()
|
|
}
|
|
|
|
// FIND
|
|
|
|
func (s *Atree) findDataPage(rootPageNo uint32, timestamp uint32) (uint32, []byte, error) {
|
|
indexPageNo := rootPageNo
|
|
for {
|
|
buf, err := s.fetchIndexPage(indexPageNo)
|
|
if err != nil {
|
|
return 0, nil, fmt.Errorf("fetchIndexPage(%d): %s", indexPageNo, err)
|
|
}
|
|
|
|
foundPageNo := findPageNo(buf, timestamp)
|
|
s.releaseIndexPage(indexPageNo)
|
|
|
|
if buf[isDataPageNumbersIdx] == 1 {
|
|
buf, err := s.fetchDataPage(foundPageNo)
|
|
if err != nil {
|
|
return 0, nil, fmt.Errorf("fetchDataPage(%d): %s", foundPageNo, err)
|
|
}
|
|
return foundPageNo, buf, nil
|
|
}
|
|
// вглубь
|
|
indexPageNo = foundPageNo
|
|
}
|
|
}
|
|
|
|
type pathLeg struct {
|
|
PageNo uint32
|
|
Data []byte
|
|
}
|
|
|
|
type pathToDataPage struct {
|
|
Legs []pathLeg
|
|
LastPageNo uint32
|
|
}
|
|
|
|
func (s *Atree) findPathToLastPage(rootPageNo uint32) (_ pathToDataPage, err error) {
|
|
var (
|
|
pageNo = rootPageNo
|
|
legs []pathLeg
|
|
)
|
|
|
|
for {
|
|
var buf []byte
|
|
buf, err = s.fetchIndexPage(pageNo)
|
|
if err != nil {
|
|
err = fmt.Errorf("FetchIndexPage(%d): %s", pageNo, err)
|
|
return
|
|
}
|
|
|
|
legs = append(legs, pathLeg{
|
|
PageNo: pageNo,
|
|
Data: buf,
|
|
// childIdx не нужен
|
|
})
|
|
|
|
foundPageNo := getLastPageNo(buf)
|
|
|
|
if buf[isDataPageNumbersIdx] == 1 {
|
|
return pathToDataPage{
|
|
Legs: legs,
|
|
LastPageNo: foundPageNo,
|
|
}, nil
|
|
}
|
|
// вглубь
|
|
pageNo = foundPageNo
|
|
}
|
|
}
|
|
|
|
// APPEND DATA PAGE
|
|
|
|
type AppendDataPageReq struct {
|
|
MetricID uint32
|
|
Timestamp uint32
|
|
Value float64
|
|
Since uint32
|
|
RootPageNo uint32
|
|
PrevPageNo uint32
|
|
TimestampsChunks [][]byte
|
|
TimestampsSize uint16
|
|
ValuesChunks [][]byte
|
|
ValuesSize uint16
|
|
}
|
|
|
|
func (s *Atree) AppendDataPage(req AppendDataPageReq) (_ redo.Report, err error) {
|
|
var (
|
|
flags byte
|
|
dataPagesToRelease []uint32
|
|
indexPagesToRelease []uint32
|
|
)
|
|
|
|
newDataPage := s.allocDataPage()
|
|
dataPagesToRelease = append(dataPagesToRelease, newDataPage.PageNo)
|
|
|
|
chunksToDataPage(newDataPage.Data, chunksToDataPageReq{
|
|
PrevPageNo: req.PrevPageNo,
|
|
TimestampsChunks: req.TimestampsChunks,
|
|
TimestampsSize: req.TimestampsSize,
|
|
ValuesChunks: req.ValuesChunks,
|
|
ValuesSize: req.ValuesSize,
|
|
})
|
|
|
|
redoWriter, err := redo.NewWriter(redo.WriterOptions{
|
|
Dir: s.redoDir,
|
|
MetricID: req.MetricID,
|
|
Timestamp: req.Timestamp,
|
|
Value: req.Value,
|
|
IsDataPageReused: newDataPage.IsReused,
|
|
DataPageNo: newDataPage.PageNo,
|
|
Page: newDataPage.Data,
|
|
})
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
if req.RootPageNo > 0 {
|
|
var path pathToDataPage
|
|
path, err = s.findPathToLastPage(req.RootPageNo)
|
|
if err != nil {
|
|
return
|
|
}
|
|
for _, leg := range path.Legs {
|
|
indexPagesToRelease = append(indexPagesToRelease, leg.PageNo)
|
|
}
|
|
|
|
if path.LastPageNo != req.PrevPageNo {
|
|
diploma.Abort(
|
|
diploma.WrongPrevPageNo,
|
|
fmt.Errorf("bug: last pageNo %d in tree != prev pageNo %d in _metric",
|
|
path.LastPageNo, req.PrevPageNo),
|
|
)
|
|
}
|
|
|
|
newPageNo := newDataPage.PageNo
|
|
lastIdx := len(path.Legs) - 1
|
|
|
|
for legIdx := lastIdx; legIdx >= 0; legIdx-- {
|
|
leg := path.Legs[legIdx]
|
|
|
|
ok := appendPair(leg.Data, req.Since, newPageNo)
|
|
if ok {
|
|
err = redoWriter.AppendIndexPage(leg.PageNo, leg.Data, 0)
|
|
if err != nil {
|
|
return
|
|
}
|
|
break
|
|
}
|
|
|
|
newIndexPage := s.allocIndexPage()
|
|
indexPagesToRelease = append(indexPagesToRelease, newIndexPage.PageNo)
|
|
appendPair(newIndexPage.Data, req.Since, newPageNo)
|
|
// ставлю мітку що всі pageNo на сторінці - це data pageNo
|
|
if legIdx == lastIdx {
|
|
newIndexPage.Data[isDataPageNumbersIdx] = 1
|
|
}
|
|
|
|
flags = 0
|
|
if newIndexPage.IsReused {
|
|
flags |= redo.FlagReused
|
|
}
|
|
err = redoWriter.AppendIndexPage(newIndexPage.PageNo, newIndexPage.Data, flags)
|
|
if err != nil {
|
|
return
|
|
}
|
|
//
|
|
newPageNo = newIndexPage.PageNo
|
|
|
|
if legIdx == 0 {
|
|
newRoot := s.allocIndexPage()
|
|
indexPagesToRelease = append(indexPagesToRelease, newRoot.PageNo)
|
|
appendPair(newRoot.Data, getSince(leg.Data), leg.PageNo) // old rootPageNo
|
|
appendPair(newRoot.Data, req.Since, newIndexPage.PageNo)
|
|
|
|
// Фиксирую новый root в REDO логе
|
|
flags = redo.FlagNewRoot
|
|
if newRoot.IsReused {
|
|
flags |= redo.FlagReused
|
|
}
|
|
err = redoWriter.AppendIndexPage(newRoot.PageNo, newRoot.Data, flags)
|
|
if err != nil {
|
|
return
|
|
}
|
|
break
|
|
}
|
|
}
|
|
} else {
|
|
newRoot := s.allocIndexPage()
|
|
indexPagesToRelease = append(indexPagesToRelease, newRoot.PageNo)
|
|
newRoot.Data[isDataPageNumbersIdx] = 1
|
|
appendPair(newRoot.Data, req.Since, newDataPage.PageNo)
|
|
|
|
flags = redo.FlagNewRoot
|
|
if newRoot.IsReused {
|
|
flags |= redo.FlagReused
|
|
}
|
|
err = redoWriter.AppendIndexPage(newRoot.PageNo, newRoot.Data, flags)
|
|
if err != nil {
|
|
return
|
|
}
|
|
}
|
|
|
|
err = redoWriter.Close()
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
// На данний момен схема - наступна. Всі сторінки - data та index - зафіксовані в кеші.
|
|
// Отже запис на диск пройде максимально швидко. Після цього ReferenceCount кожної
|
|
// сторінки зменшиться на 1. Оскільки на метрику утримується XLock, сторінки мають
|
|
// ReferenceCount = 1 (немає інших читачів).
|
|
waitCh := make(chan struct{})
|
|
|
|
task := WriteTask{
|
|
WaitCh: waitCh,
|
|
DataPage: redo.PageToWrite{
|
|
PageNo: newDataPage.PageNo,
|
|
Data: newDataPage.Data,
|
|
},
|
|
IndexPages: redoWriter.IndexPagesToWrite(),
|
|
}
|
|
|
|
s.appendWriteTaskToQueue(task)
|
|
|
|
<-waitCh
|
|
|
|
for _, pageNo := range dataPagesToRelease {
|
|
s.releaseDataPage(pageNo)
|
|
}
|
|
for _, pageNo := range indexPagesToRelease {
|
|
s.releaseIndexPage(pageNo)
|
|
}
|
|
return redoWriter.GetReport(), nil
|
|
}
|
|
|
|
// DELETE
|
|
|
|
type PageLists struct {
|
|
DataPages []uint32
|
|
IndexPages []uint32
|
|
}
|
|
|
|
type Level struct {
|
|
PageNo uint32
|
|
PageData []byte
|
|
Idx int
|
|
ChildQty int
|
|
}
|
|
|
|
func (s *Atree) GetAllPages(rootPageNo uint32) (_ PageLists, err error) {
|
|
var (
|
|
dataPages []uint32
|
|
indexPages []uint32
|
|
levels []*Level
|
|
)
|
|
|
|
buf, err := s.fetchIndexPage(rootPageNo)
|
|
if err != nil {
|
|
err = fmt.Errorf("fetchIndexPage(%d): %s", rootPageNo, err)
|
|
return
|
|
}
|
|
indexPages = append(indexPages, rootPageNo)
|
|
|
|
if buf[isDataPageNumbersIdx] == 1 {
|
|
pageNumbers := listPageNumbers(buf)
|
|
dataPages = append(dataPages, pageNumbers...)
|
|
|
|
s.releaseIndexPage(rootPageNo)
|
|
|
|
return PageLists{
|
|
DataPages: dataPages,
|
|
IndexPages: indexPages,
|
|
}, nil
|
|
}
|
|
|
|
levels = append(levels, &Level{
|
|
PageNo: rootPageNo,
|
|
PageData: buf,
|
|
Idx: 0,
|
|
ChildQty: bin.GetUint16AsInt(buf[indexRecordsQtyIdx:]),
|
|
})
|
|
|
|
for {
|
|
if len(levels) == 0 {
|
|
return PageLists{
|
|
DataPages: dataPages,
|
|
IndexPages: indexPages,
|
|
}, nil
|
|
}
|
|
|
|
lastIdx := len(levels) - 1
|
|
level := levels[lastIdx]
|
|
|
|
if level.Idx < level.ChildQty {
|
|
pageNo := getPageNo(level.PageData, level.Idx)
|
|
level.Idx++
|
|
|
|
var buf []byte
|
|
buf, err = s.fetchIndexPage(pageNo)
|
|
if err != nil {
|
|
err = fmt.Errorf("fetchIndexPage(%d): %s", pageNo, err)
|
|
return
|
|
}
|
|
indexPages = append(indexPages, pageNo)
|
|
|
|
if buf[isDataPageNumbersIdx] == 1 {
|
|
pageNumbers := listPageNumbers(buf)
|
|
dataPages = append(dataPages, pageNumbers...)
|
|
|
|
s.releaseIndexPage(pageNo)
|
|
} else {
|
|
levels = append(levels, &Level{
|
|
PageNo: pageNo,
|
|
PageData: buf,
|
|
Idx: 0,
|
|
ChildQty: bin.GetUint16AsInt(buf[indexRecordsQtyIdx:]),
|
|
})
|
|
}
|
|
} else {
|
|
s.releaseIndexPage(level.PageNo)
|
|
levels = levels[:lastIdx]
|
|
}
|
|
}
|
|
}
|
|
|