You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
430 lines
8.6 KiB
430 lines
8.6 KiB
package atree
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"hash/crc32"
|
|
"io/fs"
|
|
"math"
|
|
"os"
|
|
|
|
diploma "gordenko.dev/dima/diploma"
|
|
"gordenko.dev/dima/diploma/atree/redo"
|
|
"gordenko.dev/dima/diploma/bin"
|
|
)
|
|
|
|
type AllocatedPage struct {
|
|
PageNo uint32
|
|
Data []byte
|
|
IsReused bool
|
|
}
|
|
|
|
type readResult struct {
|
|
Data []byte
|
|
Err error
|
|
}
|
|
|
|
// INDEX PAGES
|
|
|
|
func (s *Atree) DeleteIndexPages(pageNumbers []uint32) {
|
|
s.mutex.Lock()
|
|
for _, pageNo := range pageNumbers {
|
|
delete(s.indexPages, pageNo)
|
|
}
|
|
s.mutex.Unlock()
|
|
}
|
|
|
|
func (s *Atree) fetchIndexPage(pageNo uint32) ([]byte, error) {
|
|
s.mutex.Lock()
|
|
p, ok := s.indexPages[pageNo]
|
|
if ok {
|
|
p.ReferenceCount++
|
|
s.mutex.Unlock()
|
|
return p.Buf, nil
|
|
}
|
|
|
|
resultCh := make(chan readResult, 1)
|
|
s.indexWaits[pageNo] = append(s.indexWaits[pageNo], resultCh)
|
|
if len(s.indexWaits[pageNo]) == 1 {
|
|
s.indexPagesToRead = append(s.indexPagesToRead, pageNo)
|
|
s.mutex.Unlock()
|
|
|
|
select {
|
|
case s.readSignalCh <- struct{}{}:
|
|
default:
|
|
}
|
|
} else {
|
|
s.mutex.Unlock()
|
|
}
|
|
|
|
result := <-resultCh
|
|
if result.Err == nil {
|
|
result.Err = s.verifyCRC(result.Data, IndexPageSize)
|
|
}
|
|
return result.Data, result.Err
|
|
}
|
|
|
|
func (s *Atree) releaseIndexPage(pageNo uint32) {
|
|
s.mutex.Lock()
|
|
defer s.mutex.Unlock()
|
|
|
|
p, ok := s.indexPages[pageNo]
|
|
if ok {
|
|
if p.ReferenceCount > 0 {
|
|
p.ReferenceCount--
|
|
return
|
|
} else {
|
|
diploma.Abort(
|
|
diploma.ReferenceCountBug,
|
|
fmt.Errorf("call releaseIndexPage on page %d with reference count = %d",
|
|
pageNo, p.ReferenceCount),
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Atree) allocIndexPage() AllocatedPage {
|
|
var (
|
|
allocated = AllocatedPage{
|
|
Data: make([]byte, IndexPageSize),
|
|
}
|
|
)
|
|
|
|
allocated.PageNo = s.indexFreelist.ReservePage()
|
|
if allocated.PageNo > 0 {
|
|
allocated.IsReused = true
|
|
|
|
s.mutex.Lock()
|
|
} else {
|
|
s.mutex.Lock()
|
|
if s.allocatedIndexPagesQty == math.MaxUint32 {
|
|
diploma.Abort(diploma.MaxAtreeSizeExceeded,
|
|
errors.New("no space in Atree index"))
|
|
}
|
|
s.allocatedIndexPagesQty++
|
|
allocated.PageNo = s.allocatedIndexPagesQty
|
|
}
|
|
|
|
s.indexPages[allocated.PageNo] = &_page{
|
|
PageNo: allocated.PageNo,
|
|
Buf: allocated.Data,
|
|
ReferenceCount: 1,
|
|
}
|
|
s.mutex.Unlock()
|
|
return allocated
|
|
}
|
|
|
|
// DATA PAGES
|
|
|
|
func (s *Atree) DeleteDataPages(pageNumbers []uint32) {
|
|
s.mutex.Lock()
|
|
for _, pageNo := range pageNumbers {
|
|
delete(s.dataPages, pageNo)
|
|
}
|
|
s.mutex.Unlock()
|
|
}
|
|
|
|
func (s *Atree) fetchDataPage(pageNo uint32) ([]byte, error) {
|
|
s.mutex.Lock()
|
|
p, ok := s.dataPages[pageNo]
|
|
if ok {
|
|
p.ReferenceCount++
|
|
s.mutex.Unlock()
|
|
return p.Buf, nil
|
|
}
|
|
|
|
resultCh := make(chan readResult, 1)
|
|
s.dataWaits[pageNo] = append(s.dataWaits[pageNo], resultCh)
|
|
if len(s.dataWaits[pageNo]) == 1 {
|
|
s.dataPagesToRead = append(s.dataPagesToRead, pageNo)
|
|
s.mutex.Unlock()
|
|
|
|
select {
|
|
case s.readSignalCh <- struct{}{}:
|
|
default:
|
|
}
|
|
} else {
|
|
s.mutex.Unlock()
|
|
}
|
|
result := <-resultCh
|
|
if result.Err == nil {
|
|
result.Err = s.verifyCRC(result.Data, DataPageSize)
|
|
}
|
|
return result.Data, result.Err
|
|
}
|
|
|
|
func (s *Atree) releaseDataPage(pageNo uint32) {
|
|
s.mutex.Lock()
|
|
defer s.mutex.Unlock()
|
|
|
|
p, ok := s.dataPages[pageNo]
|
|
if ok {
|
|
if p.ReferenceCount > 0 {
|
|
p.ReferenceCount--
|
|
return
|
|
} else {
|
|
diploma.Abort(
|
|
diploma.ReferenceCountBug,
|
|
fmt.Errorf("call releaseDataPage on page %d with reference count = %d",
|
|
pageNo, p.ReferenceCount),
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Atree) allocDataPage() AllocatedPage {
|
|
var (
|
|
allocated = AllocatedPage{
|
|
Data: make([]byte, DataPageSize),
|
|
}
|
|
)
|
|
|
|
allocated.PageNo = s.dataFreelist.ReservePage()
|
|
if allocated.PageNo > 0 {
|
|
allocated.IsReused = true
|
|
s.mutex.Lock()
|
|
} else {
|
|
s.mutex.Lock()
|
|
if s.allocatedDataPagesQty == math.MaxUint32 {
|
|
diploma.Abort(diploma.MaxAtreeSizeExceeded,
|
|
errors.New("no space in Atree index"))
|
|
}
|
|
s.allocatedDataPagesQty++
|
|
allocated.PageNo = s.allocatedDataPagesQty
|
|
}
|
|
|
|
s.dataPages[allocated.PageNo] = &_page{
|
|
PageNo: allocated.PageNo,
|
|
Buf: allocated.Data,
|
|
ReferenceCount: 1,
|
|
}
|
|
s.mutex.Unlock()
|
|
return allocated
|
|
}
|
|
|
|
// READ
|
|
|
|
func (s *Atree) pageReader() {
|
|
for {
|
|
select {
|
|
case <-s.readSignalCh:
|
|
s.readPages()
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Atree) readPages() {
|
|
s.mutex.Lock()
|
|
if len(s.indexPagesToRead) == 0 && len(s.dataPagesToRead) == 0 {
|
|
s.mutex.Unlock()
|
|
return
|
|
}
|
|
indexPagesToRead := s.indexPagesToRead
|
|
s.indexPagesToRead = nil
|
|
dataPagesToRead := s.dataPagesToRead
|
|
s.dataPagesToRead = nil
|
|
s.mutex.Unlock()
|
|
|
|
for _, pageNo := range dataPagesToRead {
|
|
buf := make([]byte, DataPageSize)
|
|
off := (pageNo - 1) * DataPageSize
|
|
n, err := s.dataFile.ReadAt(buf, int64(off))
|
|
if n != DataPageSize {
|
|
err = fmt.Errorf("read %d instead of %d", n, DataPageSize)
|
|
}
|
|
|
|
s.mutex.Lock()
|
|
resultChannels := s.dataWaits[pageNo]
|
|
delete(s.dataWaits, pageNo)
|
|
|
|
if err != nil {
|
|
s.mutex.Unlock()
|
|
for _, resultCh := range resultChannels {
|
|
resultCh <- readResult{
|
|
Err: err,
|
|
}
|
|
}
|
|
} else {
|
|
s.dataPages[pageNo] = &_page{
|
|
PageNo: pageNo,
|
|
Buf: buf,
|
|
ReferenceCount: len(resultChannels),
|
|
}
|
|
s.mutex.Unlock()
|
|
for _, resultCh := range resultChannels {
|
|
resultCh <- readResult{
|
|
Data: buf,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for _, pageNo := range indexPagesToRead {
|
|
buf := make([]byte, IndexPageSize)
|
|
off := (pageNo - 1) * IndexPageSize
|
|
n, err := s.indexFile.ReadAt(buf, int64(off))
|
|
if n != IndexPageSize {
|
|
err = fmt.Errorf("read %d instead of %d", n, IndexPageSize)
|
|
}
|
|
|
|
s.mutex.Lock()
|
|
resultChannels := s.indexWaits[pageNo]
|
|
delete(s.indexWaits, pageNo)
|
|
|
|
if err != nil {
|
|
s.mutex.Unlock()
|
|
for _, resultCh := range resultChannels {
|
|
resultCh <- readResult{
|
|
Err: err,
|
|
}
|
|
}
|
|
} else {
|
|
s.indexPages[pageNo] = &_page{
|
|
PageNo: pageNo,
|
|
Buf: buf,
|
|
ReferenceCount: len(resultChannels),
|
|
}
|
|
s.mutex.Unlock()
|
|
for _, resultCh := range resultChannels {
|
|
resultCh <- readResult{
|
|
Data: buf,
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
// WRITE
|
|
|
|
func (s *Atree) pageWriter() {
|
|
for {
|
|
select {
|
|
case <-s.writeSignalCh:
|
|
err := s.writeTasks()
|
|
if err != nil {
|
|
diploma.Abort(diploma.WriteToAtreeFailed, err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
type WriteTask struct {
|
|
WaitCh chan struct{}
|
|
DataPage redo.PageToWrite
|
|
IndexPages []redo.PageToWrite
|
|
}
|
|
|
|
func (s *Atree) appendWriteTaskToQueue(task WriteTask) {
|
|
s.mutex.Lock()
|
|
s.writeTasksQueue = append(s.writeTasksQueue, task)
|
|
s.mutex.Unlock()
|
|
|
|
select {
|
|
case s.writeSignalCh <- struct{}{}:
|
|
default:
|
|
}
|
|
}
|
|
|
|
func (s *Atree) writeTasks() error {
|
|
s.mutex.Lock()
|
|
tasks := s.writeTasksQueue
|
|
s.writeTasksQueue = nil
|
|
s.mutex.Unlock()
|
|
|
|
for _, task := range tasks {
|
|
// data page
|
|
p := task.DataPage
|
|
if len(p.Data) != DataPageSize {
|
|
return fmt.Errorf("wrong data page %d size: %d",
|
|
p.PageNo, len(p.Data))
|
|
}
|
|
off := (p.PageNo - 1) * DataPageSize
|
|
n, err := s.dataFile.WriteAt(p.Data, int64(off))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if n != len(p.Data) {
|
|
return fmt.Errorf("write %d instead of %d", n, len(p.Data))
|
|
}
|
|
|
|
// index pages
|
|
for _, p := range task.IndexPages {
|
|
if len(p.Data) != IndexPageSize {
|
|
return fmt.Errorf("wrong index page %d size: %d",
|
|
p.PageNo, len(p.Data))
|
|
}
|
|
bin.PutUint32(p.Data[indexCRC32Idx:], crc32.ChecksumIEEE(p.Data[:indexCRC32Idx]))
|
|
|
|
off := (p.PageNo - 1) * IndexPageSize
|
|
n, err := s.indexFile.WriteAt(p.Data, int64(off))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if n != len(p.Data) {
|
|
return fmt.Errorf("write %d instead of %d", n, len(p.Data))
|
|
}
|
|
}
|
|
close(task.WaitCh)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// IO
|
|
|
|
func isFileExist(fileName string) (bool, error) {
|
|
_, err := os.Stat(fileName)
|
|
if err != nil {
|
|
if errors.Is(err, fs.ErrNotExist) {
|
|
return false, nil
|
|
} else {
|
|
return false, err
|
|
}
|
|
} else {
|
|
return true, nil
|
|
}
|
|
}
|
|
|
|
func openFile(fileName string, pageSize int) (_ *os.File, _ uint32, err error) {
|
|
file, err := os.OpenFile(fileName, os.O_RDWR, filePerm)
|
|
if err != nil {
|
|
return
|
|
}
|
|
fi, err := file.Stat()
|
|
if err != nil {
|
|
return
|
|
}
|
|
fileSize := fi.Size()
|
|
|
|
if (fileSize % int64(pageSize)) > 0 {
|
|
err = fmt.Errorf("the file size %d is not a multiple of the page size %d",
|
|
fileSize, pageSize)
|
|
return
|
|
}
|
|
|
|
allocatedPagesQty := fileSize / int64(pageSize)
|
|
if allocatedPagesQty > math.MaxUint32 {
|
|
err = fmt.Errorf("allocated pages %d is > max pages %d",
|
|
allocatedPagesQty, math.MaxUint32)
|
|
return
|
|
}
|
|
|
|
return file, uint32(allocatedPagesQty), nil
|
|
}
|
|
|
|
func (s *Atree) ApplyREDO(task WriteTask) {
|
|
s.appendWriteTaskToQueue(task)
|
|
}
|
|
|
|
func (s *Atree) verifyCRC(data []byte, pageSize int) error {
|
|
var (
|
|
pos = pageSize - 4
|
|
calculatedCRC = crc32.ChecksumIEEE(data[:pos])
|
|
storedCRC = bin.GetUint32(data[pos:])
|
|
)
|
|
if calculatedCRC != storedCRC {
|
|
return fmt.Errorf("calculatedCRC %d not equal storedCRC %d",
|
|
calculatedCRC, storedCRC)
|
|
}
|
|
return nil
|
|
}
|
|
|