github.com/scottcagno/storage@v1.8.0/pkg/lsmt/sstable/ss-table-index.go (about)

     1  package sstable
     2  
     3  import (
     4  	"fmt"
     5  	"github.com/scottcagno/storage/pkg/lsmt/binary"
     6  	"github.com/scottcagno/storage/pkg/lsmt/trees/rbtree"
     7  	"io"
     8  	"math"
     9  	"os"
    10  	"path/filepath"
    11  	"strconv"
    12  )
    13  
    14  func IndexFileNameFromIndex(index int64) string {
    15  	hexa := strconv.FormatInt(index, 16)
    16  	return fmt.Sprintf("%s%010s%s", filePrefix, hexa, indexFileSuffix)
    17  }
    18  
    19  func IndexFromIndexFileName(name string) (int64, error) {
    20  	hexa := name[len(filePrefix) : len(name)-len(indexFileSuffix)]
    21  	return strconv.ParseInt(hexa, 16, 32)
    22  }
    23  
    24  type SSTIndex struct {
    25  	path  string
    26  	file  *os.File
    27  	open  bool
    28  	first string
    29  	last  string
    30  	data  []*binary.Index
    31  }
    32  
    33  func OpenSSTIndex(base string, index int64) (*SSTIndex, error) {
    34  	// make sure we are working with absolute paths
    35  	base, err := filepath.Abs(base)
    36  	if err != nil {
    37  		return nil, err
    38  	}
    39  	// sanitize any path separators
    40  	base = filepath.ToSlash(base)
    41  	// create new gindex file path
    42  	path := filepath.Join(base, IndexFileNameFromIndex(index))
    43  	// open (or create) gindex file
    44  	file, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0666)
    45  	if err != nil {
    46  		return nil, err
    47  	}
    48  	// init and return SSTIndex
    49  	ssi := &SSTIndex{
    50  		path: path,
    51  		file: file,
    52  		open: true,
    53  	}
    54  	// load sst data gindex info
    55  	err = ssi.LoadSSIndexData()
    56  	if err != nil {
    57  		return nil, err
    58  	}
    59  	return ssi, nil
    60  }
    61  
    62  func (ssi *SSTIndex) LoadSSIndexData() error {
    63  	// check to make sure file exists
    64  	_, err := os.Stat(ssi.path)
    65  	if os.IsNotExist(err) {
    66  		return err
    67  	}
    68  	// open file to read header
    69  	fd, err := os.OpenFile(ssi.path, os.O_RDONLY, 0666)
    70  	if err != nil {
    71  		return err
    72  	}
    73  	// read and decode gindex entries
    74  	for {
    75  		// decode next gindex entry
    76  		i, err := binary.DecodeIndex(fd)
    77  		if err != nil {
    78  			if err == io.EOF || err == io.ErrUnexpectedEOF {
    79  				break
    80  			}
    81  			// make sure we close!
    82  			err = fd.Close()
    83  			if err != nil {
    84  				return err
    85  			}
    86  			return err
    87  		}
    88  		// add gindex entry to sst gindex
    89  		ssi.data = append(ssi.data, i)
    90  	}
    91  	// make sure we close!
    92  	err = fd.Close()
    93  	if err != nil {
    94  		return err
    95  	}
    96  	// update sst first and last and then return
    97  	if len(ssi.data) > 0 {
    98  		ssi.first = string(ssi.data[0].Key)
    99  		ssi.last = string(ssi.data[len(ssi.data)-1].Key)
   100  	}
   101  	return nil
   102  }
   103  
   104  func (ssi *SSTIndex) errorCheckFileAndIndex() error {
   105  	// make sure file is not closed
   106  	if !ssi.open {
   107  		return binary.ErrFileClosed
   108  	}
   109  	// make sure gindex is loaded
   110  	if ssi.data == nil {
   111  		err := ssi.LoadSSIndexData()
   112  		if err != nil {
   113  			return err
   114  		}
   115  	}
   116  	return nil
   117  }
   118  
   119  func (ssi *SSTIndex) Write(key []byte, offset int64) error {
   120  	// error check
   121  	err := ssi.errorCheckFileAndIndex()
   122  	if err != nil {
   123  		return err
   124  	}
   125  	// create new gindex
   126  	i := &binary.Index{Key: key, Offset: offset}
   127  	// write entry info to gindex file
   128  	_, err = binary.EncodeIndex(ssi.file, i)
   129  	if err != nil {
   130  		return err
   131  	}
   132  	// add to gindex
   133  	ssi.data = append(ssi.data, i)
   134  	// check last
   135  	last := len(ssi.data) - 1
   136  	if ssi.last != string(ssi.data[last].Key) {
   137  		ssi.last = string(ssi.data[last].Key)
   138  	}
   139  	return nil
   140  }
   141  
   142  func (ssi *SSTIndex) searchDataIndex(key string) int {
   143  	// declare for later
   144  	i, j := 0, len(ssi.data)
   145  	// otherwise, perform binary search
   146  	for i < j {
   147  		h := i + (j-i)/2
   148  		if key >= string(ssi.data[h].Key) {
   149  			i = h + 1
   150  		} else {
   151  			j = h
   152  		}
   153  	}
   154  	return i - 1
   155  }
   156  
   157  func (ssi *SSTIndex) Find(key string) (*binary.Index, error) {
   158  	// error check
   159  	err := ssi.errorCheckFileAndIndex()
   160  	if err != nil {
   161  		return nil, err
   162  	}
   163  	// attempt to find key
   164  	at := ssi.searchDataIndex(key)
   165  	if at == -1 {
   166  		return nil, ErrSSTIndexNotFound
   167  	}
   168  	// check gindex for entry offset
   169  	i := ssi.data[at]
   170  	if i == nil || i.Offset == -1 {
   171  		return nil, ErrSSTIndexNotFound
   172  	}
   173  	// return data entry
   174  	return i, nil
   175  }
   176  
   177  func (ssi *SSTIndex) Scan(iter func(k string, off int64) bool) {
   178  	for n := range ssi.data {
   179  		i := ssi.data[n]
   180  		if !iter(string(i.Key), i.Offset) {
   181  			continue
   182  		}
   183  	}
   184  }
   185  
   186  func calculateSparseRatio(n int64) int64 {
   187  	if n < 1 {
   188  		return 0
   189  	}
   190  	if n == 1 {
   191  		n++
   192  	}
   193  	return int64(math.Log2(float64(n)))
   194  }
   195  
   196  func (ssi *SSTIndex) GenerateAndGetSparseIndex() ([]*binary.Index, error) {
   197  	if !ssi.open {
   198  		return nil, binary.ErrFileClosed
   199  	}
   200  	var sparseSet []*binary.Index
   201  	count := int64(len(ssi.data))
   202  	ratio := calculateSparseRatio(count)
   203  	for i := int64(0); i < count; i++ {
   204  		if i%(count/ratio) == 0 {
   205  			sparseSet = append(sparseSet, ssi.data[i])
   206  		}
   207  	}
   208  	return sparseSet, nil
   209  }
   210  
   211  func (ssi *SSTIndex) GenerateAndPutSparseIndex(sparseIndex *rbtree.RBTree) error {
   212  	if !ssi.open {
   213  		return binary.ErrFileClosed
   214  	}
   215  	index, err := ssi.GetIndexNumber()
   216  	if err != nil {
   217  		return err
   218  	}
   219  	count := int64(len(ssi.data))
   220  	ratio := calculateSparseRatio(count)
   221  	for i := int64(0); i < count; i++ {
   222  		if i%(count/ratio) == 0 {
   223  			sparseIndex.Put(spiEntry{
   224  				Key:        string(ssi.data[i].Key),
   225  				SSTIndex:   index,
   226  				IndexEntry: ssi.data[i],
   227  			})
   228  		}
   229  	}
   230  	return nil
   231  }
   232  
   233  func (ssi *SSTIndex) GetIndexNumber() (int64, error) {
   234  	index, err := IndexFromIndexFileName(filepath.Base(ssi.file.Name()))
   235  	if err != nil {
   236  		return -1, err
   237  	}
   238  	return index, nil
   239  }
   240  
   241  func (ssi *SSTIndex) Len() int {
   242  	return len(ssi.data)
   243  }
   244  
   245  func (ssi *SSTIndex) Close() error {
   246  	if !ssi.open {
   247  		return nil
   248  	}
   249  	err := ssi.file.Sync()
   250  	if err != nil {
   251  		return err
   252  	}
   253  	err = ssi.file.Close()
   254  	if err != nil {
   255  		return err
   256  	}
   257  	ssi.open = false
   258  	return nil
   259  }