github.com/scottcagno/storage@v1.8.0/pkg/lsmt/sstable/_ss-table-manager.go (about)

     1  package sstable
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"github.com/scottcagno/storage/pkg/_junk/_memtable"
     7  	"github.com/scottcagno/storage/pkg/lsmt/binary"
     8  	"github.com/scottcagno/storage/pkg/lsmt/trees/rbtree"
     9  	"log"
    10  	"os"
    11  	"path/filepath"
    12  	"sort"
    13  	"strings"
    14  	"sync"
    15  )
    16  
    17  type KeyRange struct {
    18  	index int64
    19  	first string
    20  	last  string
    21  }
    22  
    23  func (kr *KeyRange) InKeyRange(k string) bool {
    24  	return kr.first <= k && k <= kr.last
    25  }
    26  
    27  func (kr *KeyRange) String() string {
    28  	return fmt.Sprintf("kr.gindex=%d, kr.first=%q, kr.last=%q", kr.index, kr.first, kr.last)
    29  }
    30  
    31  type KeyRangeSlice []*KeyRange
    32  
    33  func (krs KeyRangeSlice) Len() int {
    34  	return len(krs)
    35  }
    36  
    37  func (krs KeyRangeSlice) Less(i, j int) bool {
    38  	return krs[i].first < krs[j].first
    39  }
    40  
    41  func (krs KeyRangeSlice) Swap(i, j int) {
    42  	krs[i], krs[j] = krs[j], krs[i]
    43  }
    44  
    45  type SSTManager1 struct {
    46  	lock    sync.RWMutex
    47  	base    string
    48  	inrange []*KeyRange
    49  	//sparse    map[int64]*SparseIndex
    50  	gindex    int64
    51  	cachedSST *SSTable
    52  	keyIndex  *rbtree.RBTree
    53  }
    54  
    55  // https://play.golang.org/p/m_cJtw4wWMc
    56  
    57  // OpenSSTManager opens and returns a SSTManager, which allows you to
    58  // perform operations across all the ss-table and ss-table-indexes,
    59  // hopefully without too much hassle
    60  func OpenSSTManager1(base string) (*SSTManager1, error) {
    61  	// make sure we are working with absolute paths
    62  	base, err := filepath.Abs(base)
    63  	if err != nil {
    64  		return nil, err
    65  	}
    66  	// sanitize any path separators
    67  	base = filepath.ToSlash(base)
    68  	// create any directories if they are not there
    69  	err = os.MkdirAll(base, os.ModeDir)
    70  	if err != nil {
    71  		return nil, err
    72  	}
    73  	// create ss-table-manager instance
    74  	sstm := &SSTManager1{
    75  		base:    base,
    76  		inrange: make([]*KeyRange, 0),
    77  		//sparse:  make(map[int64]*SparseIndex, 0),
    78  		keyIndex: rbtree.NewRBTree(),
    79  	}
    80  	// read the ss-table directory
    81  	files, err := os.ReadDir(base)
    82  	if err != nil {
    83  		return nil, err
    84  	}
    85  	// lock
    86  	sstm.lock.RLock()
    87  	defer sstm.lock.RUnlock()
    88  	// go over all the files
    89  	for _, file := range files {
    90  		// skip all non ss-tables
    91  		if file.IsDir() || !strings.HasSuffix(file.Name(), dataFileSuffix) {
    92  			continue
    93  		}
    94  		// get ss-table id from file name
    95  		index, err := IndexFromDataFileName(file.Name())
    96  		if err != nil {
    97  			return nil, err
    98  		}
    99  		// open the ss-table-gindex
   100  		ssi, err := OpenSSTIndex(sstm.base, index)
   101  		if err != nil {
   102  			return nil, err
   103  		}
   104  		// create a new key-range "gindex"
   105  		kr := &KeyRange{
   106  			index: index,     // gindex of the ss-table
   107  			first: ssi.first, // first key in the ss-table
   108  			last:  ssi.last,  // last key in the ss-table
   109  		}
   110  		// add to keyIndex
   111  		sstm.keyIndex.Put(sparseIndexEntry{
   112  			LastKey:  ssi.last,
   113  			SSTIndex: index,
   114  		})
   115  
   116  		fmt.Println(file.Name(), ssi.file.Name(), ssi.first, ssi.last)
   117  
   118  		// add it to our key in-range index
   119  		sstm.inrange = append(sstm.inrange, kr)
   120  		// populate sparse index
   121  		//sstm.sparse[index] = makeNewSparseIndex(index, ssi)
   122  		// close gindex
   123  		err = ssi.Close()
   124  		if err != nil {
   125  			return nil, err
   126  		}
   127  	}
   128  	// update the last global gindex
   129  	//sstm.gindex = sstm.getLastGIndex()
   130  	e, ok := sstm.keyIndex.Max()
   131  	if !ok {
   132  		sstm.gindex = 0
   133  	} else {
   134  		sstm.gindex = e.(sparseIndexEntry).SSTIndex
   135  	}
   136  	//log.Println(sstm.inrange, len(sstm.inrange), sstm.gindex)
   137  
   138  	fmt.Printf("KeyIndex: %s\n", sstm.keyIndex)
   139  
   140  	return sstm, nil
   141  }
   142  
   143  func (sstm *SSTManager1) GetLastKey() (string, error) {
   144  	e, ok := sstm.keyIndex.Max()
   145  	if !ok {
   146  		return "", ErrSSTIndexNotFound
   147  	}
   148  	return e.(sparseIndexEntry).LastKey, nil
   149  }
   150  
   151  func (sstm *SSTManager1) getLastGIndex() int64 {
   152  	if len(sstm.inrange) == 0 {
   153  		return 0
   154  	}
   155  	return sstm.inrange[len(sstm.inrange)-1].index
   156  }
   157  
   158  func (sstm *SSTManager1) addKeyRange(first, last string) {
   159  	kr := &KeyRange{index: sstm.gindex, first: first, last: last}
   160  	sstm.inrange = append(sstm.inrange, kr)
   161  }
   162  
   163  // FlushMemtableToSSTable takes a pointer to a memtable and writes it to disk as an ss-table
   164  func (sstm *SSTManager1) FlushMemtableToSSTable(memt *memtable.Memtable) error {
   165  	// lock
   166  	sstm.lock.Lock()
   167  	defer sstm.lock.Unlock()
   168  	// make new batch
   169  	batch := sstm.NewBatch()
   170  	// iterate mem-table entries
   171  	memt.Scan(func(me rbtree.RBEntry) bool {
   172  		// and write each entry to the batch
   173  		batch.WriteEntry(me.(memtable.MemtableEntry).Entry)
   174  		return true
   175  	})
   176  	// reset memtable asap
   177  	err := memt.Reset()
   178  	if err != nil {
   179  		return err
   180  	}
   181  	// open new ss-table
   182  	sst, err := OpenSSTable(sstm.base, sstm.gindex+1)
   183  	if err != nil {
   184  		return err
   185  	}
   186  	// write batch to ss-table
   187  	err = sst.WriteBatch(batch)
   188  	if err != nil {
   189  		return err
   190  	}
   191  	// save for later
   192  	first, last := sst.index.first, sst.index.last
   193  	// flush and close ss-table
   194  	err = sst.Close()
   195  	if err != nil {
   196  		return err
   197  	}
   198  	// in the clear, increment gindex
   199  	sstm.gindex++
   200  	// add new entry to sparse index
   201  	sstm.addKeyRange(first, last)
   202  	// return
   203  	return nil
   204  }
   205  
   206  func (sstm *SSTManager1) NewBatch() *binary.Batch {
   207  	return new(binary.Batch)
   208  }
   209  
   210  func (sstm *SSTManager1) FlushBatchToSSTable(batch *binary.Batch) error {
   211  	// lock
   212  	sstm.lock.Lock()
   213  	defer sstm.lock.Unlock()
   214  	// open new ss-table
   215  	sst, err := OpenSSTable(sstm.base, sstm.gindex+1)
   216  	if err != nil {
   217  		return err
   218  	}
   219  	// write batch to ss-table
   220  	err = sst.WriteBatch(batch)
   221  	if err != nil {
   222  		return err
   223  	}
   224  	// save for later
   225  	first, last := sst.index.first, sst.index.last
   226  	// add new sparse index
   227  	//sstm.sparse[sstm.gindex+1] = makeNewSparseIndex(sstm.gindex+1, sst.index)
   228  	// flush and close ss-table
   229  	err = sst.Close()
   230  	if err != nil {
   231  		return err
   232  	}
   233  	// in the clear, increment gindex
   234  	sstm.gindex++
   235  	// add new entry to key in-range index
   236  	sstm.addKeyRange(first, last)
   237  	return nil
   238  }
   239  
   240  func (sstm *SSTManager1) isInRange(k string) (int64, error) { //(*SparseIndex, error) {
   241  	if len(sstm.inrange) == 1 {
   242  		return sstm.getLastGIndex(), nil
   243  	}
   244  	keys := KeyRangeSlice(sstm.inrange)
   245  	sort.Sort(keys)
   246  	n := sort.Search(keys.Len(),
   247  		func(i int) bool {
   248  			return sstm.inrange[i].first <= k && k <= sstm.inrange[i].last
   249  		})
   250  	log.Println("DEBUG >> N=", n, len(sstm.inrange))
   251  	if n < 0 {
   252  		return -1, ErrSSTIndexNotFound
   253  	}
   254  
   255  	//	if i < len(data) && data[i] == x {
   256  	//		// x is present at data[i]
   257  	//	} else {
   258  	//		// x is not present in data,
   259  	//		// but i is the index where it would be inserted.
   260  	//	}
   261  
   262  	//for _, kr := range sstm.inrange {
   263  	//	if !kr.InKeyRange(k) {
   264  	//		continue
   265  	//	}
   266  	//	return kr.index, nil
   267  	//spi, ok := sstm.sparse[kr.index]
   268  	//if !ok {
   269  	//	continue
   270  	//}
   271  	//return spi, nil
   272  	//}
   273  	return int64(n), nil
   274  }
   275  
   276  func (sstm *SSTManager1) Get(k string) (*binary.Entry, error) {
   277  	// read lock
   278  	sstm.lock.RLock()
   279  	defer sstm.lock.RUnlock()
   280  	// search "sparse index"
   281  	e, ok := sstm.keyIndex.GetNearMin(sparseIndexEntry{LastKey: k})
   282  	if !ok {
   283  		if e.(sparseIndexEntry).LastKey < k {
   284  			log.Panicf("[HMMMM] >>> %s, searching key: %q\n", e, k)
   285  			return nil, ErrSSTIndexNotFound
   286  		}
   287  	}
   288  	// get the table path index
   289  	sstIndex := e.(sparseIndexEntry).SSTIndex
   290  	// open ss-table for reading
   291  	sst, err := OpenSSTable(sstm.base, sstIndex)
   292  	if err != nil {
   293  		return nil, err
   294  	}
   295  	// read data by key (performs search using ssi inside read)
   296  	de, err := sst.Read(k)
   297  	if err != nil {
   298  		return nil, err
   299  	}
   300  	// close ss-table
   301  	err = sst.Close()
   302  	if err != nil {
   303  		return nil, err
   304  	}
   305  	// return entry
   306  	return de, nil
   307  }
   308  
   309  func (sstm *SSTManager1) GetOLD(k string) (*binary.Entry, error) {
   310  	// read lock
   311  	sstm.lock.RLock()
   312  	defer sstm.lock.RUnlock()
   313  	// search sparse index
   314  	index, err := sstm.isInRange(k)
   315  	if err != nil {
   316  		return nil, err
   317  	}
   318  	if index == -1 {
   319  		return nil, ErrSSTIndexNotFound
   320  	}
   321  	// get table path index, and relative offset
   322  	//index, offset := spi.Search(k)
   323  	// open ss-table for reading
   324  	sst, err := OpenSSTable(sstm.base, index)
   325  	if err != nil {
   326  		return nil, err
   327  	}
   328  	// scan starting at location until we find match
   329  	//var de *binary.Entry
   330  	//err = sst.ScanAt(offset, func(e *binary.Entry) bool {
   331  	//	if string(e.Key) == k {
   332  	//		de = e
   333  	//		// got match, lets break
   334  	//		return false
   335  	//	}
   336  	//	return true
   337  	//})
   338  	de, err := sst.Read(k)
   339  	if err != nil {
   340  		return nil, err
   341  	}
   342  	// close ss-table
   343  	err = sst.Close()
   344  	if err != nil {
   345  		return nil, err
   346  	}
   347  	// return entry
   348  	return de, nil
   349  }
   350  
   351  func (sstm *SSTManager1) GetEntryIndex(k string) (*binary.Index, error) {
   352  	// read lock
   353  	sstm.lock.RLock()
   354  	defer sstm.lock.RUnlock()
   355  	// search sparse index
   356  	index, err := sstm.isInRange(k)
   357  	if err != nil {
   358  		return nil, err
   359  	}
   360  	if index == -1 {
   361  		return nil, ErrSSTIndexNotFound
   362  	}
   363  	// open ss-table-index for reading
   364  	sst, err := OpenSSTable(sstm.base, index)
   365  	if err != nil {
   366  		return nil, err
   367  	}
   368  	// read index data
   369  	di, err := sst.ReadIndex(k)
   370  	if err != nil {
   371  		return nil, err
   372  	}
   373  	// close ss-table
   374  	err = sst.Close()
   375  	if err != nil {
   376  		return nil, err
   377  	}
   378  	// return entry
   379  	return di, nil
   380  }
   381  
   382  func (sstm *SSTManager1) ListSSTables() []string {
   383  	// read lock
   384  	sstm.lock.RLock()
   385  	defer sstm.lock.RUnlock()
   386  	files, err := os.ReadDir(sstm.base)
   387  	if err != nil {
   388  		return nil
   389  	}
   390  	var ssts []string
   391  	for _, file := range files {
   392  		if file.IsDir() || !strings.HasSuffix(file.Name(), dataFileSuffix) {
   393  			continue
   394  		}
   395  		ssts = append(ssts, file.Name())
   396  	}
   397  
   398  	return ssts
   399  }
   400  
   401  func (sstm *SSTManager1) ListSSTIndexes() []string {
   402  	// read lock
   403  	sstm.lock.RLock()
   404  	defer sstm.lock.RUnlock()
   405  	files, err := os.ReadDir(sstm.base)
   406  	if err != nil {
   407  		return nil
   408  	}
   409  	var ssti []string
   410  	for _, file := range files {
   411  		if file.IsDir() || !strings.HasSuffix(file.Name(), indexFileSuffix) {
   412  			continue
   413  		}
   414  		ssti = append(ssti, file.Name())
   415  	}
   416  	return ssti
   417  }
   418  
   419  func (sstm *SSTManager1) CompactSSTables(index int64) error {
   420  	// lock
   421  	sstm.lock.Lock()
   422  	defer sstm.lock.Unlock()
   423  	// load sstable
   424  	sst, err := OpenSSTable(sstm.base, index)
   425  	if err != nil {
   426  		return err
   427  	}
   428  	// make batch
   429  	batch := binary.NewBatch()
   430  	// iterate
   431  	err = sst.Scan(func(e *binary.Entry) bool {
   432  		// add any data entries that are not tombstones to batch
   433  		if e.Value != nil && !bytes.Equal(e.Value, Tombstone) {
   434  			batch.WriteEntry(e)
   435  		}
   436  		return true
   437  	})
   438  	if err != nil {
   439  		return err
   440  	}
   441  	// get path
   442  	tpath, ipath := sst.path, sst.index.path
   443  	// close sstable
   444  	err = sst.Close()
   445  	if err != nil {
   446  		return err
   447  	}
   448  	// remove old table
   449  	err = os.Remove(tpath)
   450  	if err != nil {
   451  		return err
   452  	}
   453  	// remove old gindex
   454  	err = os.Remove(ipath)
   455  	if err != nil {
   456  		return err
   457  	}
   458  	// open new sstable to write to
   459  	sst, err = OpenSSTable(sstm.base, index)
   460  	if err != nil {
   461  		return err
   462  	}
   463  	// write batch to table
   464  	err = sst.WriteBatch(batch)
   465  	// flush and close sstable
   466  	err = sst.Close()
   467  	if err != nil {
   468  		return err
   469  	}
   470  	return nil
   471  }
   472  
   473  func (sstm *SSTManager1) MergeSSTables(iA, iB int64) error {
   474  	// lock
   475  	sstm.lock.Lock()
   476  	defer sstm.lock.Unlock()
   477  	// load sstable A
   478  	sstA, err := OpenSSTable(sstm.base, iA)
   479  	if err != nil {
   480  		return err
   481  	}
   482  	// and sstable B
   483  	sstB, err := OpenSSTable(sstm.base, iB)
   484  	if err != nil {
   485  		return err
   486  	}
   487  	// make batch to write data to
   488  	batch := binary.NewBatch()
   489  	// pass tables to the merge writer
   490  	err = mergeWriter(sstA, sstB, batch)
   491  	if err != nil {
   492  		return err
   493  	}
   494  	// close table A
   495  	err = sstA.Close()
   496  	if err != nil {
   497  		return err
   498  	}
   499  	// close table B
   500  	err = sstB.Close()
   501  	if err != nil {
   502  		return err
   503  	}
   504  	// open new sstable to write to
   505  	sstC, err := OpenSSTable(sstm.base, iB+1)
   506  	if err != nil {
   507  		return err
   508  	}
   509  	// write batch to table
   510  	err = sstC.WriteBatch(batch)
   511  	// flush and close sstable
   512  	err = sstC.Close()
   513  	if err != nil {
   514  		return err
   515  	}
   516  	return nil
   517  }
   518  
   519  func (sstm *SSTManager1) Close() error {
   520  
   521  	return nil
   522  }
   523  
   524  func mergeWriter(sstA, sstB *SSTable, batch *binary.Batch) error {
   525  
   526  	i, j := 0, 0
   527  	n1, n2 := sstA.index.Len(), sstB.index.Len()
   528  
   529  	var err error
   530  	var de *binary.Entry
   531  	for i < n1 && j < n2 {
   532  		if bytes.Compare(sstA.index.data[i].Key, sstB.index.data[j].Key) == 0 {
   533  			// read entry from sstB
   534  			de, err = sstB.ReadAt(sstB.index.data[j].Offset)
   535  			if err != nil {
   536  				return err
   537  			}
   538  			// write entry to batch
   539  			batch.WriteEntry(de)
   540  			i++
   541  			j++
   542  			continue
   543  		}
   544  		if bytes.Compare(sstA.index.data[i].Key, sstB.index.data[j].Key) == -1 {
   545  			// read entry from sstA
   546  			de, err = sstA.ReadAt(sstA.index.data[i].Offset)
   547  			if err != nil {
   548  				return err
   549  			}
   550  			// write entry to batch
   551  			batch.WriteEntry(de)
   552  			i++
   553  			continue
   554  		}
   555  		if bytes.Compare(sstB.index.data[j].Key, sstA.index.data[i].Key) == -1 {
   556  			// read entry from sstB
   557  			de, err = sstB.ReadAt(sstB.index.data[j].Offset)
   558  			if err != nil {
   559  				return err
   560  			}
   561  			// write entry to batch
   562  			batch.WriteEntry(de)
   563  			j++
   564  			continue
   565  		}
   566  	}
   567  
   568  	// print remaining
   569  	for i < n1 {
   570  		// read entry from sstA
   571  		de, err = sstA.ReadAt(sstA.index.data[i].Offset)
   572  		if err != nil {
   573  			return err
   574  		}
   575  		// write entry to batch
   576  		batch.WriteEntry(de)
   577  		i++
   578  	}
   579  
   580  	// print remaining
   581  	for j < n2 {
   582  		// read entry from sstB
   583  		de, err = sstB.ReadAt(sstB.index.data[j].Offset)
   584  		if err != nil {
   585  			return err
   586  		}
   587  		// write entry to batch
   588  		batch.WriteEntry(de)
   589  		j++
   590  	}
   591  
   592  	// return error free
   593  	return nil
   594  }