github.com/scottcagno/storage@v1.8.0/pkg/_junk/_lsmtree/sstable/sstable.go (about)

     1  package sstable
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"io"
     7  	"os"
     8  	"path/filepath"
     9  	"sort"
    10  	"strconv"
    11  )
    12  
    13  const (
    14  	filePrefix      = "sst-"
    15  	dataFileSuffix  = ".dat"
    16  	indexFileSuffix = ".idx"
    17  )
    18  
    19  var TombstoneEntry = []byte(nil)
    20  
    21  var (
    22  	ErrFileClosed         = errors.New("error: file is closed")
    23  	ErrIndexEntryNotFound = errors.New("error: index entry not found")
    24  	ErrEmpty              = errors.New("error: empty")
    25  	ErrSSIndexNotFound    = errors.New("error: ssindex not found")
    26  	ErrFileIsEmpty        = errors.New("error: file is empty")
    27  	ErrSSTableNotFound    = errors.New("error: sstable not found")
    28  )
    29  
    30  func DataFileNameFromIndex(index int64) string {
    31  	hexa := strconv.FormatInt(index, 16)
    32  	return fmt.Sprintf("%s%010s%s", filePrefix, hexa, dataFileSuffix)
    33  }
    34  
    35  func IndexFromDataFileName(name string) (int64, error) {
    36  	hexa := name[len(filePrefix) : len(name)-len(dataFileSuffix)]
    37  	return strconv.ParseInt(hexa, 16, 32)
    38  }
    39  
    40  type sstDataEntry struct {
    41  	key   string
    42  	value []byte
    43  }
    44  
    45  func (e *sstDataEntry) String() string {
    46  	return fmt.Sprintf("sstDataEntry.key=%q, sstDataEntry.value=%s", e.key, e.value)
    47  }
    48  
    49  type Batch struct {
    50  	data []*sstDataEntry
    51  }
    52  
    53  func (b *Batch) String() string {
    54  	var ss string
    55  	for i := range b.data {
    56  		ss += fmt.Sprintf("b.data[%d].key=%q, value=%q\n", i, b.data[i].key, b.data[i].value)
    57  	}
    58  	return ss
    59  }
    60  
    61  func NewBatch() *Batch {
    62  	return &Batch{
    63  		data: make([]*sstDataEntry, 0),
    64  	}
    65  }
    66  
    67  func (b *Batch) Write(key string, value []byte) {
    68  	b.data = append(b.data, &sstDataEntry{key: key, value: value})
    69  }
    70  
    71  func (b *Batch) WriteDataEntry(de *sstDataEntry) {
    72  	b.data = append(b.data, de)
    73  }
    74  
    75  // Len [implementing sort interface]
    76  func (b *Batch) Len() int {
    77  	return len(b.data)
    78  }
    79  
    80  // Less [implementing sort interface]
    81  func (b *Batch) Less(i, j int) bool {
    82  	return b.data[i].key < b.data[j].key
    83  }
    84  
    85  // Swap [implementing sort interface]
    86  func (b *Batch) Swap(i, j int) {
    87  	b.data[i], b.data[j] = b.data[j], b.data[i]
    88  }
    89  
    90  type SSTable struct {
    91  	//lock  sync.RWMutex
    92  	path  string   // path is the filepath for the data
    93  	file  *os.File // file is the file descriptor for the data
    94  	open  bool     // open reports the status of the file
    95  	index *SSIndex // SSIndex is an SSTableIndex file
    96  
    97  	readOnly bool
    98  }
    99  
   100  func (sst *SSTable) SSTablePath() string {
   101  	return sst.path
   102  }
   103  
   104  func (sst *SSTable) SSIndexPath() string {
   105  	return sst.index.path
   106  }
   107  
   108  func CreateSSTable(base string, index int64) (*SSTable, error) {
   109  	// make sure we are working with absolute paths
   110  	base, err := filepath.Abs(base)
   111  	if err != nil {
   112  		return nil, err
   113  	}
   114  	// sanitize any path separators
   115  	base = filepath.ToSlash(base)
   116  	// create any directories if they are not there
   117  	err = os.MkdirAll(base, os.ModeDir)
   118  	if err != nil {
   119  		return nil, err
   120  	}
   121  	// create new data file path
   122  	path := filepath.Join(base, DataFileNameFromIndex(index))
   123  	// check to make sure file doesn't exist
   124  	_, err = os.Stat(path)
   125  	if os.IsExist(err) {
   126  		return nil, err
   127  	}
   128  	// create new data file
   129  	file, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0666)
   130  	if err != nil {
   131  		return nil, err
   132  	}
   133  	// init sstable index
   134  	ssi, err := OpenSSIndex(base, index)
   135  	if err != nil {
   136  		return nil, err
   137  	}
   138  	// init and return SSTable
   139  	sst := &SSTable{
   140  		path:  path, // path is the filepath for the data
   141  		file:  file, // file is the file descriptor for the data
   142  		open:  true, // open reports the status of the file
   143  		index: ssi,  // SSIndex is an SSTableIndex file
   144  	}
   145  	return sst, nil
   146  }
   147  
   148  func OpenSSTable(base string, index int64) (*SSTable, error) {
   149  	// make sure we are working with absolute paths
   150  	base, err := filepath.Abs(base)
   151  	if err != nil {
   152  		return nil, err
   153  	}
   154  	// sanitize any path separators
   155  	base = filepath.ToSlash(base)
   156  	// create new data file path
   157  	path := filepath.Join(base, DataFileNameFromIndex(index))
   158  	// check to make sure file exists
   159  	_, err = os.Stat(path)
   160  	if os.IsNotExist(err) {
   161  		return nil, err
   162  	}
   163  	// open data file
   164  	file, err := os.OpenFile(path, os.O_RDWR, 0666)
   165  	if err != nil {
   166  		return nil, err
   167  	}
   168  	// init sstable index
   169  	ssi, err := OpenSSIndex(base, index)
   170  	if err != nil {
   171  		return nil, err
   172  	}
   173  	// init and return SSTable
   174  	sst := &SSTable{
   175  		path:     path, // path is the filepath for the data
   176  		file:     file, // file is the file descriptor for the data
   177  		open:     true, // open reports the status of the file
   178  		index:    ssi,  // SSIndex is an SSTableIndex file
   179  		readOnly: true,
   180  	}
   181  	return sst, nil
   182  }
   183  
   184  func (sst *SSTable) errorCheckFileAndIndex() error {
   185  	// make sure file is not closed
   186  	if !sst.open {
   187  		return ErrFileClosed
   188  	}
   189  	// make sure index is open
   190  	if sst.index == nil {
   191  		return ErrSSIndexNotFound
   192  	}
   193  	return nil
   194  }
   195  
   196  func (sst *SSTable) ReadEntry(key string) (*sstDataEntry, error) {
   197  	// error check
   198  	err := sst.errorCheckFileAndIndex()
   199  	if err != nil {
   200  		return nil, err
   201  	}
   202  	// check index for entry offset
   203  	//offset, err := sst.index.GetEntryOffset(key)
   204  	//if err != nil {
   205  	//	return nil, err
   206  	//}
   207  	// use index to find and return data entry, passing sst's underlying file descriptor
   208  	de, err := sst.index.ReadDataEntry(sst.file, key)
   209  	if err != nil {
   210  		return nil, err
   211  	}
   212  	// read and decode data entry at provided offset
   213  	//de, err := DecodeDataEntryAt(sst.file, offset)
   214  	//if err != nil {
   215  	//	return nil, err
   216  	//}
   217  	// return data entry
   218  	return de, nil
   219  }
   220  
   221  func (sst *SSTable) Scan(iter func(de *sstDataEntry) bool) error {
   222  	// error check
   223  	err := sst.errorCheckFileAndIndex()
   224  	if err != nil {
   225  		return err
   226  	}
   227  	for {
   228  		// decode next data entry
   229  		de, err := DecodeDataEntry(sst.file)
   230  		if err != nil {
   231  			if err == io.EOF || err == io.ErrUnexpectedEOF {
   232  				break
   233  			}
   234  			return err
   235  		}
   236  		if !iter(de) {
   237  			break
   238  		}
   239  	}
   240  	return nil
   241  }
   242  
   243  func (sst *SSTable) ReadEntryAt(offset int64) (*sstDataEntry, error) {
   244  	// error check
   245  	err := sst.errorCheckFileAndIndex()
   246  	if err != nil {
   247  		return nil, err
   248  	}
   249  	de, err := sst.index.ReadDataEntryAt(sst.file, offset)
   250  	if err != nil {
   251  		return nil, err
   252  	}
   253  	// return data entry
   254  	return de, nil
   255  }
   256  
   257  func (sst *SSTable) WriteEntry(de *sstDataEntry) error {
   258  	// error check
   259  	err := sst.errorCheckFileAndIndex()
   260  	if err != nil {
   261  		return err
   262  	}
   263  	// write entry to data file
   264  	offset, err := EncodeDataEntry(sst.file, de)
   265  	if err != nil {
   266  		return err
   267  	}
   268  	// write entry to index
   269  	err = sst.index.WriteIndexEntry(de.key, offset)
   270  	if err != nil {
   271  		return err
   272  	}
   273  	return nil
   274  }
   275  
   276  func (sst *SSTable) WriteBatch(b *Batch) error {
   277  	// error check
   278  	err := sst.errorCheckFileAndIndex()
   279  	if err != nil {
   280  		return err
   281  	}
   282  	// error check batch
   283  	if b == nil {
   284  		return ErrEmpty
   285  	}
   286  	// check to see if batch is sorted
   287  	if !sort.IsSorted(b) {
   288  		// if not, sort
   289  		sort.Stable(b)
   290  	}
   291  	// range batch and write
   292  	for i := range b.data {
   293  		// entry
   294  		de := b.data[i]
   295  		// write entry to data file
   296  		offset, err := EncodeDataEntry(sst.file, de)
   297  		if err != nil {
   298  			return err
   299  		}
   300  		// write entry info to index file
   301  		err = sst.index.WriteIndexEntry(de.key, offset)
   302  		if err != nil {
   303  			return err
   304  		}
   305  	}
   306  	return nil
   307  }
   308  
   309  func rebuildSSIndexFromSSTable(sst *SSTable) error {
   310  	// local ssi var dec
   311  	ssi := sst.index
   312  	// close any open files
   313  	err := ssi.Close()
   314  	if err != nil {
   315  		return err
   316  	}
   317  	// truncate file (instead of removing)
   318  	err = os.Truncate(ssi.path, 0)
   319  	if err != nil {
   320  		return err
   321  	}
   322  	// re-open file
   323  	ssi.file, err = os.OpenFile(ssi.path, os.O_CREATE|os.O_RDWR, 0666)
   324  	if err != nil {
   325  		return err
   326  	}
   327  	ssi.open = true
   328  	// read and decode entries
   329  	for {
   330  		// decode next data entry
   331  		de, err := DecodeDataEntry(sst.file)
   332  		if err != nil {
   333  			if err == io.EOF || err == io.ErrUnexpectedEOF {
   334  				break
   335  			}
   336  			return err
   337  		}
   338  		// get offset of data file reader for index
   339  		offset, err := sst.file.Seek(0, io.SeekCurrent)
   340  		if err != nil {
   341  			return err
   342  		}
   343  		// write index entry to file
   344  		err = ssi.WriteIndexEntry(de.key, offset)
   345  		if err != nil {
   346  			return err
   347  		}
   348  	}
   349  	return nil
   350  }
   351  
   352  func RebuildSSTableIndex(base string, index int64) error {
   353  	// make sure we are working with absolute paths
   354  	base, err := filepath.Abs(base)
   355  	if err != nil {
   356  		return err
   357  	}
   358  	// sanitize any path separators
   359  	base = filepath.ToSlash(base)
   360  	// create new data file path
   361  	path := filepath.Join(base, DataFileNameFromIndex(index))
   362  	// check to make sure file exists
   363  	_, err = os.Stat(path)
   364  	if os.IsNotExist(err) {
   365  		return ErrSSTableNotFound
   366  	}
   367  	// open sstable if it exists
   368  	sst, err := OpenSSTable(base, index)
   369  	if err != nil {
   370  		return err
   371  	}
   372  	// close local index
   373  	err = sst.index.Close()
   374  	if err != nil {
   375  		return err
   376  	}
   377  	// re-generate index from data table
   378  	err = rebuildSSIndexFromSSTable(sst)
   379  	if err != nil {
   380  		return err
   381  	}
   382  	// close sstable
   383  	err = sst.Close()
   384  	if err != nil {
   385  		return err
   386  	}
   387  	return nil
   388  }
   389  
   390  func (sst *SSTable) Close() error {
   391  	if sst.open {
   392  		err := sst.file.Sync()
   393  		if err != nil {
   394  			return err
   395  		}
   396  		err = sst.file.Close()
   397  		if err != nil {
   398  			return err
   399  		}
   400  	}
   401  	if sst.index != nil {
   402  		err := sst.index.Close()
   403  		if err != nil {
   404  			return err
   405  		}
   406  	}
   407  	sst.open = false
   408  	return nil
   409  }