github.com/scottcagno/storage@v1.8.0/pkg/lsmtree/sstable.go (about)

     1  package lsmtree
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"io"
     7  	"io/fs"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  )
    12  
    13  type ssTableIndex struct {
    14  	first []byte
    15  	last  []byte
    16  	count int
    17  	data  []*Index
    18  }
    19  
    20  func newSSTableIndex(index []*Index) *ssTableIndex {
    21  	if index == nil || len(index) < 1 {
    22  		return &ssTableIndex{
    23  			first: nil,
    24  			last:  nil,
    25  			count: 0,
    26  			data:  make([]*Index, 0),
    27  		}
    28  	}
    29  	return &ssTableIndex{
    30  		first: index[0].Key,
    31  		last:  index[len(index)-1].Key,
    32  		count: len(index),
    33  		data:  index,
    34  	}
    35  }
    36  
    37  func (ssti *ssTableIndex) Len() int {
    38  	return len(ssti.data)
    39  }
    40  
    41  func (ssti *ssTableIndex) close() {
    42  	ssti.first = nil
    43  	ssti.last = nil
    44  	ssti.count = 0
    45  	for i := range ssti.data {
    46  		ssti.data[i] = nil
    47  	}
    48  	ssti.data = nil
    49  }
    50  
    51  type ssTable struct {
    52  	path  string
    53  	fd    *os.File
    54  	index *ssTableIndex
    55  }
    56  
    57  func createSSTable(dir string, memt *rbTree) error {
    58  	// create level-0 path for newly flushed ss-tables
    59  	path := filepath.Join(dir, levelToDir(0))
    60  	// read the base dir for this level
    61  	files, err := os.ReadDir(path)
    62  	if err != nil {
    63  		return err
    64  	}
    65  	// init seq
    66  	var seq int64
    67  	// count the files to get the sequence number
    68  	for _, file := range files {
    69  		// if the file is a sst-table data file, increment
    70  		if !file.IsDir() && strings.HasSuffix(file.Name(), dataFileSuffix) {
    71  			seq++
    72  		}
    73  	}
    74  	// create a new data file
    75  	dataFile, err := openDataFile(path, seq, os.O_CREATE|os.O_WRONLY)
    76  	// get data file name
    77  	//dataFileName := filepath.Join(path, toDataFileName(seq))
    78  	// open data file
    79  	//dataFile, err := os.OpenFile(dataFileName, os.O_CREATE|os.O_RDWR, 0666)
    80  	if err != nil {
    81  		return err
    82  	}
    83  	// remember to close
    84  	defer func(dataFile *os.File) {
    85  		err := dataFile.Close()
    86  		if err != nil {
    87  			panic("closing dataFile: " + err.Error())
    88  		}
    89  	}(dataFile)
    90  
    91  	// create a new index file
    92  	indexFile, err := openIndexFile(path, seq, os.O_CREATE|os.O_WRONLY)
    93  	// get index file name
    94  	//indexFileName := filepath.Join(path, toIndexFileName(seq))
    95  	// open index file
    96  	//indexFile, err := os.OpenFile(indexFileName, os.O_CREATE|os.O_RDWR, 0666)
    97  	if err != nil {
    98  		return err
    99  	}
   100  	// remember to close
   101  	defer func(indexFile *os.File) {
   102  		err := indexFile.Close()
   103  		if err != nil {
   104  			panic("closing indexFile: " + err.Error())
   105  		}
   106  	}(indexFile)
   107  	// range mem-table and write entries and indexes
   108  	memt.rangeFront(func(e *Entry) bool {
   109  		// write entry to data file
   110  		offset, err := writeEntry(dataFile, e)
   111  		if err != nil {
   112  			// for now, just panic
   113  			panic(err)
   114  		}
   115  		// write index to index file
   116  		_, err = writeIndex(indexFile, &Index{
   117  			Key:    e.Key,
   118  			Offset: offset,
   119  		})
   120  		if err != nil {
   121  			// for now, just panic
   122  			panic(err)
   123  		}
   124  		return true
   125  	})
   126  	// sync data file
   127  	err = dataFile.Sync()
   128  	if err != nil {
   129  		return err
   130  	}
   131  	// sync index file
   132  	err = indexFile.Sync()
   133  	if err != nil {
   134  		return err
   135  	}
   136  	return nil
   137  }
   138  
   139  func openSSTable(path string, seq int64) (*ssTable, error) {
   140  	// open index file
   141  	indexFile, err := openIndexFile(path, seq, os.O_RDONLY)
   142  	if err != nil {
   143  		return nil, err
   144  	}
   145  	// create an index set
   146  	var index []*Index
   147  	// load up the ss-table-index entries
   148  	for {
   149  		// read index entry from the index file
   150  		i, err := readIndex(indexFile)
   151  		if err != nil {
   152  			if err == io.EOF || err == io.ErrUnexpectedEOF {
   153  				break
   154  			}
   155  			// make sure we close!
   156  			err = indexFile.Close()
   157  			if err != nil {
   158  				return nil, err
   159  			}
   160  			return nil, err
   161  		}
   162  		// add index to the index set
   163  		index = append(index, i)
   164  	}
   165  	// close index file
   166  	err = indexFile.Close()
   167  	if err != nil {
   168  		return nil, err
   169  	}
   170  	// make ss-table instance to return
   171  	sst := &ssTable{
   172  		path:  toDataFileName(seq),
   173  		fd:    nil,
   174  		index: newSSTableIndex(index),
   175  	}
   176  	// return ss-table instance
   177  	return sst, nil
   178  }
   179  
   180  func (sst *ssTable) keyInRange(key []byte) bool {
   181  	// error check
   182  	if key == nil {
   183  		return false
   184  	}
   185  	// return boolean reporting key being between the lo and hi values
   186  	return isBetween(sst.index.first, key, sst.index.last)
   187  }
   188  
   189  func isBetween(lo, key, hi []byte) bool {
   190  	return bytes.Compare(lo, key) <= 0 && bytes.Compare(hi, key) >= 0
   191  }
   192  
   193  func locateSSTable(base string, key []byte) (string, error) {
   194  	// initialize vars for return
   195  	var sstPath string
   196  	// start walking the directory tree from the supplied base
   197  	err := filepath.WalkDir(base, func(path string, de fs.DirEntry, err error) error {
   198  		// handle path error
   199  		if err != nil {
   200  			fmt.Fprintf(os.Stderr, "prevent panic by handling failure accessing a path %q: %v\n", path, err)
   201  			return err
   202  		}
   203  		// we found a ss-table index file
   204  		if !de.IsDir() && strings.HasPrefix(de.Name(), dataFileSuffix) {
   205  			// open index file
   206  			dataFile, err := os.OpenFile(path, os.O_RDONLY, 0666)
   207  			if err != nil {
   208  				return err
   209  			}
   210  			// read through the index file entries
   211  			for {
   212  				// read index entry from the index file
   213  				e, err := readEntry(dataFile)
   214  				if err != nil {
   215  					if err == io.EOF || err == io.ErrUnexpectedEOF {
   216  						break
   217  					}
   218  					// make sure we close!
   219  					err = dataFile.Close()
   220  					if err != nil {
   221  						return err
   222  					}
   223  					return err
   224  				}
   225  				// see if we have a match
   226  				if bytes.Contains(e.Key, key) {
   227  					sstPath = path
   228  					break
   229  				}
   230  			}
   231  			// close index file
   232  			err = dataFile.Close()
   233  			if err != nil {
   234  				return err
   235  			}
   236  		}
   237  		return nil
   238  	})
   239  	if err != nil {
   240  		return "", err
   241  	}
   242  	// got one?
   243  	return sstPath, nil
   244  }
   245  
   246  /*
   247  func searchInSSTablesOLD(base string, key []byte) (*Entry, error) {
   248  	// read the base dir for this level
   249  	dirs, err := os.ReadDir(base)
   250  	if err != nil {
   251  		return nil, err
   252  	}
   253  	// iterate dirs
   254  	for _, dir := range dirs {
   255  		// skip anything that is not a directory
   256  		if !dir.IsDir() {
   257  			continue
   258  		}
   259  		// now let us read the files within this level
   260  		files, err := os.ReadDir(dir.Name())
   261  		if err != nil {
   262  			return nil, err
   263  		}
   264  		// visit each file
   265  		for _, file := range files {
   266  			// if the file is not a ss-table data file, continue
   267  			if file.IsDir() || !strings.HasSuffix(file.Name(), dataFileSuffix) {
   268  				continue // skip to the next file
   269  			}
   270  			// get the sequence from the data file name
   271  			seq, err := fromDataFileName(file.Name())
   272  			if err != nil {
   273  				return nil, err
   274  			}
   275  			// if the file is a ss-table, open it
   276  			sst, err := openSSTable(dir.Name(), seq)
   277  			if err != nil {
   278  				return nil, err
   279  			}
   280  			// perform prelim check to see if the provided
   281  			// key may fall in the range of this table
   282  			if ok := sst.keyInRange(key); !ok {
   283  				// if the key is not in the range, we can
   284  				// skip to the next table straight away
   285  				continue
   286  			}
   287  			// if the key does fall in the range than there
   288  			// is a very high chance that it will be found
   289  			// within this table. perform a search on the
   290  			// ss-table for the provided key and return
   291  			e, err := searchSSTable(sst.path, key)
   292  			if err != nil {
   293  				return nil, err
   294  			}
   295  			// check and return found entry
   296  			if e != nil && !e.hasTombstone() {
   297  				return e, nil
   298  			}
   299  		}
   300  	}
   301  	return nil, ErrNotFound
   302  }
   303  
   304  func searchSSTableOLD(dir string, key []byte) (*Entry, error) {
   305  	// read the base dir for this level
   306  	dirs, err := os.ReadDir(sstm.baseDir)
   307  	if err != nil {
   308  		return err
   309  	}
   310  	// iterate dirs
   311  	for _, dir := range dirs {
   312  		// skip anything that is not a directory
   313  		if !dir.IsDir() {
   314  			continue
   315  		}
   316  		// get level
   317  		level, err := dirToLevel(dir.Name())
   318  		if err != nil {
   319  			return err
   320  		}
   321  		// add level to levels
   322  		if _, ok := sstm.level[level]; !ok {
   323  			sstm.level[level] = 0
   324  		}
   325  		// now let us add the file count within those levels
   326  		files, err := os.ReadDir(dir.Name())
   327  		if err != nil {
   328  			return err
   329  		}
   330  		// count the files
   331  		for _, file := range files {
   332  			// if the file is a sst-table data file, increment
   333  			if !file.IsDir() && strings.HasSuffix(file.Name(), dataFileSuffix) {
   334  				sstm.level[level]++
   335  				sstm.sstcount++
   336  			}
   337  		}
   338  	}
   339  	return nil
   340  }
   341  */
   342  
   343  func (sst *ssTable) ReadAt(offset int64) (*Entry, error) {
   344  	// error check
   345  	if sst.fd == nil {
   346  		return nil, ErrFileClosed
   347  	}
   348  	// use offset to read entry
   349  	e, err := readEntryAt(sst.fd, offset)
   350  	if err != nil {
   351  		return nil, err
   352  	}
   353  	// make sure entry checksum is good
   354  	err = checkCRC(e, checksum(append(e.Key, e.Value...)))
   355  	if err != nil {
   356  		return nil, err
   357  	}
   358  	// return entry
   359  	return e, nil
   360  }
   361  
   362  /*
   363  func getLevelFromSize(size int64) int {
   364  	switch {
   365  	case size > 0<<20 && size < 1<<21: // level-0	(2 MB) max=4
   366  		return 0
   367  	case size > 1<<22 && size < 1<<23: // level-1   (8 MB) max=4
   368  		return 1
   369  	case size > 1<<24 && size < 1<<25: // level-2  (32 MB) max=4
   370  		return 2
   371  	case size > 1<<26 && size < 1<<27: // level-3 (128 MB) max=4
   372  		return 3
   373  	default:
   374  		return 4 // oddballs that will need gc for sure
   375  	}
   376  }
   377  */