github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/internal/manifest/level_metadata.go (about)

     1  // Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package manifest
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  
    11  	"github.com/zuoyebang/bitalostable/internal/base"
    12  )
    13  
    14  // LevelMetadata contains metadata for all of the files within
    15  // a level of the LSM.
    16  type LevelMetadata struct {
    17  	level int
    18  	tree  btree
    19  }
    20  
    21  // clone makes a copy of the level metadata, implicitly increasing the ref
    22  // count of every file contained within lm.
    23  func (lm *LevelMetadata) clone() LevelMetadata {
    24  	return LevelMetadata{
    25  		level: lm.level,
    26  		tree:  lm.tree.clone(),
    27  	}
    28  }
    29  
    30  func (lm *LevelMetadata) release() (obsolete []*FileMetadata) {
    31  	return lm.tree.release()
    32  }
    33  
    34  func makeLevelMetadata(cmp Compare, level int, files []*FileMetadata) LevelMetadata {
    35  	bcmp := btreeCmpSeqNum
    36  	if level > 0 {
    37  		bcmp = btreeCmpSmallestKey(cmp)
    38  	}
    39  	var lm LevelMetadata
    40  	lm.level = level
    41  	lm.tree, _ = makeBTree(bcmp, files)
    42  	return lm
    43  }
    44  
    45  func makeBTree(cmp btreeCmp, files []*FileMetadata) (btree, LevelSlice) {
    46  	var t btree
    47  	t.cmp = cmp
    48  	for _, f := range files {
    49  		t.insert(f)
    50  	}
    51  	return t, LevelSlice{iter: t.iter(), length: t.length}
    52  }
    53  
    54  // Empty indicates whether there are any files in the level.
    55  func (lm *LevelMetadata) Empty() bool {
    56  	return lm.tree.length == 0
    57  }
    58  
    59  // Len returns the number of files within the level.
    60  func (lm *LevelMetadata) Len() int {
    61  	return lm.tree.length
    62  }
    63  
    64  // Iter constructs a LevelIterator over the entire level.
    65  func (lm *LevelMetadata) Iter() LevelIterator {
    66  	return LevelIterator{iter: lm.tree.iter()}
    67  }
    68  
    69  // Slice constructs a slice containing the entire level.
    70  func (lm *LevelMetadata) Slice() LevelSlice {
    71  	return LevelSlice{iter: lm.tree.iter(), length: lm.tree.length}
    72  }
    73  
    74  // Find finds the provided file in the level if it exists.
    75  func (lm *LevelMetadata) Find(cmp base.Compare, m *FileMetadata) *LevelFile {
    76  	iter := lm.Iter()
    77  	if lm.level != 0 {
    78  		// If lm holds files for levels >0, we can narrow our search by binary
    79  		// searching by bounds.
    80  		o := overlaps(iter, cmp, m.Smallest.UserKey,
    81  			m.Largest.UserKey, m.Largest.IsExclusiveSentinel())
    82  		iter = o.Iter()
    83  	}
    84  	for f := iter.First(); f != nil; f = iter.Next() {
    85  		if f == m {
    86  			lf := iter.Take()
    87  			return &lf
    88  		}
    89  	}
    90  	return nil
    91  }
    92  
    93  // Annotation lazily calculates and returns the annotation defined by
    94  // Annotator. The Annotator is used as the key for pre-calculated
    95  // values, so equal Annotators must be used to avoid duplicate computations
    96  // and cached annotations. Annotation must not be called concurrently, and in
    97  // practice this is achieved by requiring callers to hold DB.mu.
    98  func (lm *LevelMetadata) Annotation(annotator Annotator) interface{} {
    99  	if lm.Empty() {
   100  		return annotator.Zero(nil)
   101  	}
   102  	v, _ := lm.tree.root.annotation(annotator)
   103  	return v
   104  }
   105  
   106  // InvalidateAnnotation clears any cached annotations defined by Annotator. The
   107  // Annotator is used as the key for pre-calculated values, so equal Annotators
   108  // must be used to clear the appropriate cached annotation. InvalidateAnnotation
   109  // must not be called concurrently, and in practice this is achieved by
   110  // requiring callers to hold DB.mu.
   111  func (lm *LevelMetadata) InvalidateAnnotation(annotator Annotator) {
   112  	if lm.Empty() {
   113  		return
   114  	}
   115  	lm.tree.root.invalidateAnnotation(annotator)
   116  }
   117  
   118  // LevelFile holds a file's metadata along with its position
   119  // within a level of the LSM.
   120  type LevelFile struct {
   121  	*FileMetadata
   122  	slice LevelSlice
   123  }
   124  
   125  // Slice constructs a LevelSlice containing only this file.
   126  func (lf LevelFile) Slice() LevelSlice {
   127  	return lf.slice
   128  }
   129  
   130  // NewLevelSliceSeqSorted constructs a LevelSlice over the provided files,
   131  // sorted by the L0 sequence number sort order.
   132  // TODO(jackson): Can we improve this interface or avoid needing to export
   133  // a slice constructor like this?
   134  func NewLevelSliceSeqSorted(files []*FileMetadata) LevelSlice {
   135  	tr, slice := makeBTree(btreeCmpSeqNum, files)
   136  	tr.release()
   137  	return slice
   138  }
   139  
   140  // NewLevelSliceKeySorted constructs a LevelSlice over the provided files,
   141  // sorted by the files smallest keys.
   142  // TODO(jackson): Can we improve this interface or avoid needing to export
   143  // a slice constructor like this?
   144  func NewLevelSliceKeySorted(cmp base.Compare, files []*FileMetadata) LevelSlice {
   145  	tr, slice := makeBTree(btreeCmpSmallestKey(cmp), files)
   146  	tr.release()
   147  	return slice
   148  }
   149  
   150  // NewLevelSliceSpecificOrder constructs a LevelSlice over the provided files,
   151  // ordering the files by their order in the provided slice. It's used in
   152  // tests.
   153  // TODO(jackson): Update tests to avoid requiring this and remove it.
   154  func NewLevelSliceSpecificOrder(files []*FileMetadata) LevelSlice {
   155  	tr, slice := makeBTree(btreeCmpSpecificOrder(files), files)
   156  	tr.release()
   157  	return slice
   158  }
   159  
   160  // LevelSlice contains a slice of the files within a level of the LSM.
   161  // A LevelSlice is immutable once created, but may be used to construct a
   162  // mutable LevelIterator over the slice's files.
   163  type LevelSlice struct {
   164  	iter   iterator
   165  	length int
   166  	// start and end form the inclusive bounds of a slice of files within a
   167  	// level of the LSM. They may be nil if the entire B-Tree backing iter is
   168  	// accessible.
   169  	start *iterator
   170  	end   *iterator
   171  }
   172  
   173  // Each invokes fn for each element in the slice.
   174  func (ls LevelSlice) Each(fn func(*FileMetadata)) {
   175  	iter := ls.Iter()
   176  	for f := iter.First(); f != nil; f = iter.Next() {
   177  		fn(f)
   178  	}
   179  }
   180  
   181  // String implements fmt.Stringer.
   182  func (ls LevelSlice) String() string {
   183  	var buf bytes.Buffer
   184  	ls.Each(func(f *FileMetadata) {
   185  		if buf.Len() > 0 {
   186  			fmt.Fprintf(&buf, " ")
   187  		}
   188  		fmt.Fprint(&buf, f)
   189  	})
   190  	return buf.String()
   191  }
   192  
   193  // Empty indicates whether the slice contains any files.
   194  func (ls *LevelSlice) Empty() bool {
   195  	return emptyWithBounds(ls.iter, ls.start, ls.end)
   196  }
   197  
   198  // Iter constructs a LevelIterator that iterates over the slice.
   199  func (ls *LevelSlice) Iter() LevelIterator {
   200  	return LevelIterator{
   201  		start: ls.start,
   202  		end:   ls.end,
   203  		iter:  ls.iter.clone(),
   204  	}
   205  }
   206  
   207  // Len returns the number of files in the slice. Its runtime is constant.
   208  func (ls *LevelSlice) Len() int {
   209  	return ls.length
   210  }
   211  
   212  // SizeSum sums the size of all files in the slice. Its runtime is linear in
   213  // the length of the slice.
   214  func (ls *LevelSlice) SizeSum() uint64 {
   215  	var sum uint64
   216  	iter := ls.Iter()
   217  	for f := iter.First(); f != nil; f = iter.Next() {
   218  		sum += f.Size
   219  	}
   220  	return sum
   221  }
   222  
   223  // Reslice constructs a new slice backed by the same underlying level, with
   224  // new start and end positions. Reslice invokes the provided function, passing
   225  // two LevelIterators: one positioned to i's inclusive start and one
   226  // positioned to i's inclusive end. The resliceFunc may move either iterator
   227  // forward or backwards, including beyond the callee's original bounds to
   228  // capture additional files from the underlying level. Reslice constructs and
   229  // returns a new LevelSlice with the final bounds of the iterators after
   230  // calling resliceFunc.
   231  func (ls LevelSlice) Reslice(resliceFunc func(start, end *LevelIterator)) LevelSlice {
   232  	if ls.iter.r == nil {
   233  		return ls
   234  	}
   235  	var start, end LevelIterator
   236  	if ls.start == nil {
   237  		start.iter = ls.iter.clone()
   238  		start.iter.first()
   239  	} else {
   240  		start.iter = ls.start.clone()
   241  	}
   242  	if ls.end == nil {
   243  		end.iter = ls.iter.clone()
   244  		end.iter.last()
   245  	} else {
   246  		end.iter = ls.end.clone()
   247  	}
   248  	resliceFunc(&start, &end)
   249  
   250  	s := LevelSlice{
   251  		iter:  start.iter.clone(),
   252  		start: &start.iter,
   253  		end:   &end.iter,
   254  	}
   255  	// Calculate the new slice's length.
   256  	iter := s.Iter()
   257  	for f := iter.First(); f != nil; f = iter.Next() {
   258  		s.length++
   259  	}
   260  	return s
   261  }
   262  
   263  // KeyType is used to specify the type of keys we're looking for in
   264  // LevelIterator positioning operations. Files not containing any keys of the
   265  // desired type are skipped.
   266  type KeyType int8
   267  
   268  const (
   269  	// KeyTypePointAndRange denotes a search among the entire keyspace, including
   270  	// both point keys and range keys. No sstables are skipped.
   271  	KeyTypePointAndRange KeyType = iota
   272  	// KeyTypePoint denotes a search among the point keyspace. SSTables with no
   273  	// point keys will be skipped. Note that the point keyspace includes rangedels.
   274  	KeyTypePoint
   275  	// KeyTypeRange denotes a search among the range keyspace. SSTables with no
   276  	// range keys will be skipped.
   277  	KeyTypeRange
   278  )
   279  
   280  type keyTypeAnnotator struct{}
   281  
   282  var _ Annotator = keyTypeAnnotator{}
   283  
   284  func (k keyTypeAnnotator) Zero(dst interface{}) interface{} {
   285  	var val *KeyType
   286  	if dst != nil {
   287  		val = dst.(*KeyType)
   288  	} else {
   289  		val = new(KeyType)
   290  	}
   291  	*val = KeyTypePoint
   292  	return val
   293  }
   294  
   295  func (k keyTypeAnnotator) Accumulate(m *FileMetadata, dst interface{}) (interface{}, bool) {
   296  	v := dst.(*KeyType)
   297  	switch *v {
   298  	case KeyTypePoint:
   299  		if m.HasRangeKeys {
   300  			*v = KeyTypePointAndRange
   301  		}
   302  	case KeyTypePointAndRange:
   303  		// Do nothing.
   304  	default:
   305  		panic("unexpected key type")
   306  	}
   307  	return v, true
   308  }
   309  
   310  func (k keyTypeAnnotator) Merge(src interface{}, dst interface{}) interface{} {
   311  	v := dst.(*KeyType)
   312  	srcVal := src.(*KeyType)
   313  	switch *v {
   314  	case KeyTypePoint:
   315  		if *srcVal == KeyTypePointAndRange {
   316  			*v = KeyTypePointAndRange
   317  		}
   318  	case KeyTypePointAndRange:
   319  		// Do nothing.
   320  	default:
   321  		panic("unexpected key type")
   322  	}
   323  	return v
   324  }
   325  
   326  // LevelIterator iterates over a set of files' metadata. Its zero value is an
   327  // empty iterator.
   328  type LevelIterator struct {
   329  	iter   iterator
   330  	start  *iterator
   331  	end    *iterator
   332  	filter KeyType
   333  }
   334  
   335  func (i LevelIterator) String() string {
   336  	var buf bytes.Buffer
   337  	iter := i.iter.clone()
   338  	iter.first()
   339  	iter.prev()
   340  	if i.iter.pos == -1 {
   341  		fmt.Fprint(&buf, "(<start>)*")
   342  	}
   343  	iter.next()
   344  	for ; iter.valid(); iter.next() {
   345  		if buf.Len() > 0 {
   346  			fmt.Fprint(&buf, "   ")
   347  		}
   348  
   349  		if i.start != nil && cmpIter(iter, *i.start) == 0 {
   350  			fmt.Fprintf(&buf, " [ ")
   351  		}
   352  		isCurrentPos := cmpIter(iter, i.iter) == 0
   353  		if isCurrentPos {
   354  			fmt.Fprint(&buf, " ( ")
   355  		}
   356  		fmt.Fprint(&buf, iter.cur().String())
   357  		if isCurrentPos {
   358  			fmt.Fprint(&buf, " )*")
   359  		}
   360  		if i.end != nil && cmpIter(iter, *i.end) == 0 {
   361  			fmt.Fprintf(&buf, " ]")
   362  		}
   363  	}
   364  	if i.iter.n != nil && i.iter.pos >= i.iter.n.count {
   365  		if buf.Len() > 0 {
   366  			fmt.Fprint(&buf, "   ")
   367  		}
   368  		fmt.Fprint(&buf, "(<end>)*")
   369  	}
   370  	return buf.String()
   371  }
   372  
   373  // Clone copies the iterator, returning an independent iterator at the same
   374  // position.
   375  func (i *LevelIterator) Clone() LevelIterator {
   376  	if i.iter.r == nil {
   377  		return *i
   378  	}
   379  	// The start and end iterators are not cloned and are treated as
   380  	// immutable.
   381  	return LevelIterator{
   382  		iter:   i.iter.clone(),
   383  		start:  i.start,
   384  		end:    i.end,
   385  		filter: i.filter,
   386  	}
   387  }
   388  
   389  // Current returns the item at the current iterator position.
   390  func (i *LevelIterator) Current() *FileMetadata {
   391  	if !i.iter.valid() {
   392  		return nil
   393  	}
   394  	return i.iter.cur()
   395  }
   396  
   397  func (i *LevelIterator) empty() bool {
   398  	return emptyWithBounds(i.iter, i.start, i.end)
   399  }
   400  
   401  // Filter clones the iterator and sets the desired KeyType as the key to filter
   402  // files on.
   403  func (i *LevelIterator) Filter(keyType KeyType) LevelIterator {
   404  	l := i.Clone()
   405  	l.filter = keyType
   406  	return l
   407  }
   408  
   409  func emptyWithBounds(i iterator, start, end *iterator) bool {
   410  	// If i.r is nil, the iterator was constructed from an empty btree.
   411  	// If the end bound is before the start bound, the bounds represent an
   412  	// empty slice of the B-Tree.
   413  	return i.r == nil || (start != nil && end != nil && cmpIter(*end, *start) < 0)
   414  }
   415  
   416  // First seeks to the first file in the iterator and returns it.
   417  func (i *LevelIterator) First() *FileMetadata {
   418  	if i.empty() {
   419  		return nil
   420  	}
   421  	if i.start != nil {
   422  		i.iter = i.start.clone()
   423  	} else {
   424  		i.iter.first()
   425  	}
   426  	if !i.iter.valid() {
   427  		return nil
   428  	}
   429  	return i.filteredNextFile(i.iter.cur())
   430  }
   431  
   432  // Last seeks to the last file in the iterator and returns it.
   433  func (i *LevelIterator) Last() *FileMetadata {
   434  	if i.empty() {
   435  		return nil
   436  	}
   437  	if i.end != nil {
   438  		i.iter = i.end.clone()
   439  	} else {
   440  		i.iter.last()
   441  	}
   442  	if !i.iter.valid() {
   443  		return nil
   444  	}
   445  	return i.filteredPrevFile(i.iter.cur())
   446  }
   447  
   448  // Next advances the iterator to the next file and returns it.
   449  func (i *LevelIterator) Next() *FileMetadata {
   450  	i.iter.next()
   451  	if !i.iter.valid() {
   452  		return nil
   453  	}
   454  	if i.end != nil && cmpIter(i.iter, *i.end) > 0 {
   455  		return nil
   456  	}
   457  	return i.filteredNextFile(i.iter.cur())
   458  }
   459  
   460  // Prev moves the iterator the previous file and returns it.
   461  func (i *LevelIterator) Prev() *FileMetadata {
   462  	i.iter.prev()
   463  	if !i.iter.valid() {
   464  		return nil
   465  	}
   466  	if i.start != nil && cmpIter(i.iter, *i.start) < 0 {
   467  		return nil
   468  	}
   469  	return i.filteredPrevFile(i.iter.cur())
   470  }
   471  
   472  // SeekGE seeks to the first file in the iterator's file set with a largest
   473  // user key greater than or equal to the provided user key. The iterator must
   474  // have been constructed from L1+, because it requires the underlying files to
   475  // be sorted by user keys and non-overlapping.
   476  func (i *LevelIterator) SeekGE(cmp Compare, userKey []byte) *FileMetadata {
   477  	// TODO(jackson): Assert that i.iter.cmp == btreeCmpSmallestKey.
   478  	if i.empty() {
   479  		return nil
   480  	}
   481  	meta := i.seek(func(m *FileMetadata) bool {
   482  		return cmp(m.Largest.UserKey, userKey) >= 0
   483  	})
   484  	for meta != nil {
   485  		switch i.filter {
   486  		case KeyTypePointAndRange:
   487  			return meta
   488  		case KeyTypePoint:
   489  			if meta.HasPointKeys && cmp(meta.LargestPointKey.UserKey, userKey) >= 0 {
   490  				return meta
   491  			}
   492  		case KeyTypeRange:
   493  			if meta.HasRangeKeys && cmp(meta.LargestRangeKey.UserKey, userKey) >= 0 {
   494  				return meta
   495  			}
   496  		}
   497  		meta = i.Next()
   498  	}
   499  	return i.filteredNextFile(meta)
   500  }
   501  
   502  // SeekLT seeks to the last file in the iterator's file set with a smallest
   503  // user key less than the provided user key. The iterator must have been
   504  // constructed from L1+, because it requires the underlying files to be sorted
   505  // by user keys and non-overlapping.
   506  func (i *LevelIterator) SeekLT(cmp Compare, userKey []byte) *FileMetadata {
   507  	// TODO(jackson): Assert that i.iter.cmp == btreeCmpSmallestKey.
   508  	if i.empty() {
   509  		return nil
   510  	}
   511  	i.seek(func(m *FileMetadata) bool {
   512  		return cmp(m.Smallest.UserKey, userKey) >= 0
   513  	})
   514  	meta := i.Prev()
   515  	for meta != nil {
   516  		switch i.filter {
   517  		case KeyTypePointAndRange:
   518  			return meta
   519  		case KeyTypePoint:
   520  			if meta.HasPointKeys && cmp(meta.SmallestPointKey.UserKey, userKey) < 0 {
   521  				return meta
   522  			}
   523  		case KeyTypeRange:
   524  			if meta.HasRangeKeys && cmp(meta.SmallestRangeKey.UserKey, userKey) < 0 {
   525  				return meta
   526  			}
   527  		}
   528  		meta = i.Prev()
   529  	}
   530  	return i.filteredPrevFile(meta)
   531  }
   532  
   533  func (i *LevelIterator) filteredNextFile(meta *FileMetadata) *FileMetadata {
   534  	switch i.filter {
   535  	case KeyTypePoint:
   536  		for meta != nil && !meta.HasPointKeys {
   537  			meta = i.Next()
   538  		}
   539  		return meta
   540  	case KeyTypeRange:
   541  		// TODO(bilal): Range keys are expected to be rare and sparse. Add an
   542  		// optimization to annotate the tree and efficiently skip over files that
   543  		// do not contain range keys right at the seek step, to reduce iterations
   544  		// here.
   545  		for meta != nil && !meta.HasRangeKeys {
   546  			meta = i.Next()
   547  		}
   548  		return meta
   549  	default:
   550  		return meta
   551  	}
   552  }
   553  
   554  func (i *LevelIterator) filteredPrevFile(meta *FileMetadata) *FileMetadata {
   555  	switch i.filter {
   556  	case KeyTypePoint:
   557  		for meta != nil && !meta.HasPointKeys {
   558  			meta = i.Prev()
   559  		}
   560  		return meta
   561  	case KeyTypeRange:
   562  		// TODO(bilal): Range keys are expected to be rare and sparse. Add an
   563  		// optimization to annotate the tree and efficiently skip over files that
   564  		// do not contain range keys right at the seek step, to reduce iterations
   565  		// here.
   566  		for meta != nil && !meta.HasRangeKeys {
   567  			meta = i.Prev()
   568  		}
   569  		return meta
   570  	default:
   571  		return meta
   572  	}
   573  }
   574  
   575  func (i *LevelIterator) seek(fn func(*FileMetadata) bool) *FileMetadata {
   576  	i.iter.seek(fn)
   577  
   578  	// i.iter.seek seeked in the unbounded underlying B-Tree. If the iterator
   579  	// has start or end bounds, we may have exceeded them. Reset to the bounds
   580  	// if necessary.
   581  	//
   582  	// NB: The LevelIterator and LevelSlice semantics require that a bounded
   583  	// LevelIterator/LevelSlice containing files x0, x1, ..., xn behave
   584  	// identically to an unbounded LevelIterator/LevelSlice of a B-Tree
   585  	// containing x0, x1, ..., xn. In other words, any files outside the
   586  	// LevelIterator's bounds should not influence the iterator's behavior.
   587  	// When seeking, this means a SeekGE that seeks beyond the end bound,
   588  	// followed by a Prev should return the last element within bounds.
   589  	if i.end != nil && cmpIter(i.iter, *i.end) > 0 {
   590  		i.iter = i.end.clone()
   591  		// Since seek(fn) positioned beyond i.end, we know there is nothing to
   592  		// return within bounds.
   593  		i.iter.next()
   594  		return nil
   595  	} else if i.start != nil && cmpIter(i.iter, *i.start) < 0 {
   596  		i.iter = i.start.clone()
   597  		return i.iter.cur()
   598  	}
   599  	if !i.iter.valid() {
   600  		return nil
   601  	}
   602  	return i.iter.cur()
   603  }
   604  
   605  // Take constructs a LevelFile containing the file at the iterator's current
   606  // position. Take panics if the iterator is not currently positioned over a
   607  // file.
   608  func (i *LevelIterator) Take() LevelFile {
   609  	m := i.Current()
   610  	if m == nil {
   611  		panic("Take called on invalid LevelIterator")
   612  	}
   613  	// LevelSlice's start and end fields are immutable and are positioned to
   614  	// the same position for a LevelFile because they're inclusive, so we can
   615  	// share one iterator stack between the two bounds.
   616  	boundsIter := i.iter.clone()
   617  	return LevelFile{
   618  		FileMetadata: m,
   619  		slice: LevelSlice{
   620  			iter:   i.iter.clone(),
   621  			start:  &boundsIter,
   622  			end:    &boundsIter,
   623  			length: 1,
   624  		},
   625  	}
   626  }