github.com/zuoyebang/bitalostable@v1.0.1-0.20240229032404-e3b99a834294/level_checker.go (about)

     1  // Copyright 2019 The LevelDB-Go and Pebble and Bitalostored Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package bitalostable
     6  
     7  import (
     8  	"fmt"
     9  	"io"
    10  	"sort"
    11  	"sync/atomic"
    12  
    13  	"github.com/cockroachdb/errors"
    14  	"github.com/zuoyebang/bitalostable/internal/base"
    15  	"github.com/zuoyebang/bitalostable/internal/keyspan"
    16  	"github.com/zuoyebang/bitalostable/internal/manifest"
    17  )
    18  
    19  // This file implements DB.CheckLevels() which checks that every entry in the
    20  // DB is consistent with respect to the level invariant: any point (or the
    21  // infinite number of points in a range tombstone) has a seqnum such that a
    22  // point with the same UserKey at a lower level has a lower seqnum. This is an
    23  // expensive check since it involves iterating over all the entries in the DB,
    24  // hence only intended for tests or tools.
    25  //
    26  // If we ignore range tombstones, the consistency checking of points can be
    27  // done with a simplified version of mergingIter. simpleMergingIter is that
    28  // simplified version of mergingIter that only needs to step through points
    29  // (analogous to only doing Next()). It can also easily accommodate
    30  // consistency checking of points relative to range tombstones.
    31  // simpleMergingIter does not do any seek optimizations present in mergingIter
    32  // (it minimally needs to seek the range delete iterators to position them at
    33  // or past the current point) since it does not want to miss points for
    34  // purposes of consistency checking.
    35  //
    36  // Mutual consistency of range tombstones is non-trivial to check. One needs
    37  // to detect inversions of the form [a, c)#8 at higher level and [b, c)#10 at
    38  // a lower level. The start key of the former is not contained in the latter
    39  // and we can't use the exclusive end key, c, for a containment check since it
    40  // is the sentinel key. We observe that if these tombstones were fragmented
    41  // wrt each other we would have [a, b)#8 and [b, c)#8 at the higher level and
    42  // [b, c)#10 at the lower level and then it is is trivial to compare the two
    43  // [b, c) tombstones. Note that this fragmentation needs to take into account
    44  // that tombstones in a file may be untruncated and need to act within the
    45  // bounds of the file. This checking is performed by checkRangeTombstones()
    46  // and its helper functions.
    47  
    48  // The per-level structure used by simpleMergingIter.
    49  type simpleMergingIterLevel struct {
    50  	iter         internalIterator
    51  	rangeDelIter keyspan.FragmentIterator
    52  	levelIterBoundaryContext
    53  
    54  	iterKey   *InternalKey
    55  	iterValue []byte
    56  	tombstone *keyspan.Span
    57  }
    58  
    59  type simpleMergingIter struct {
    60  	levels   []simpleMergingIterLevel
    61  	snapshot uint64
    62  	heap     mergingIterHeap
    63  	// The last point's key and level. For validation.
    64  	lastKey     InternalKey
    65  	lastLevel   int
    66  	lastIterMsg string
    67  	// A non-nil valueMerger means MERGE record processing is ongoing.
    68  	valueMerger base.ValueMerger
    69  	// The first error will cause step() to return false.
    70  	err       error
    71  	numPoints int64
    72  	merge     Merge
    73  	formatKey base.FormatKey
    74  }
    75  
    76  func (m *simpleMergingIter) init(
    77  	merge Merge,
    78  	cmp Compare,
    79  	snapshot uint64,
    80  	formatKey base.FormatKey,
    81  	levels ...simpleMergingIterLevel,
    82  ) {
    83  	m.levels = levels
    84  	m.formatKey = formatKey
    85  	m.merge = merge
    86  	m.snapshot = snapshot
    87  	m.lastLevel = -1
    88  	m.heap.cmp = cmp
    89  	m.heap.items = make([]mergingIterItem, 0, len(levels))
    90  	for i := range m.levels {
    91  		l := &m.levels[i]
    92  		l.iterKey, l.iterValue = l.iter.First()
    93  		if l.iterKey != nil {
    94  			item := mergingIterItem{
    95  				index: i,
    96  				value: l.iterValue,
    97  			}
    98  			item.key.Trailer = l.iterKey.Trailer
    99  			item.key.UserKey = append(item.key.UserKey[:0], l.iterKey.UserKey...)
   100  			m.heap.items = append(m.heap.items, item)
   101  		}
   102  	}
   103  	m.heap.init()
   104  
   105  	if m.heap.len() == 0 {
   106  		return
   107  	}
   108  	m.positionRangeDels()
   109  }
   110  
   111  // Positions all the rangedel iterators at or past the current top of the
   112  // heap, using SeekGE().
   113  func (m *simpleMergingIter) positionRangeDels() {
   114  	item := &m.heap.items[0]
   115  	for i := range m.levels {
   116  		l := &m.levels[i]
   117  		if l.rangeDelIter == nil {
   118  			continue
   119  		}
   120  		l.tombstone = keyspan.SeekGE(m.heap.cmp, l.rangeDelIter, item.key.UserKey)
   121  	}
   122  }
   123  
   124  // Returns true if not yet done.
   125  func (m *simpleMergingIter) step() bool {
   126  	if m.heap.len() == 0 || m.err != nil {
   127  		return false
   128  	}
   129  	item := &m.heap.items[0]
   130  	l := &m.levels[item.index]
   131  	// Sentinels are not relevant for this point checking.
   132  	if !item.key.IsExclusiveSentinel() && item.key.Visible(m.snapshot) {
   133  		m.numPoints++
   134  		keyChanged := m.heap.cmp(item.key.UserKey, m.lastKey.UserKey) != 0
   135  		if !keyChanged {
   136  			// At the same user key. We will see them in decreasing seqnum
   137  			// order so the lastLevel must not be lower.
   138  			if m.lastLevel > item.index {
   139  				m.err = errors.Errorf("found InternalKey %s in %s and InternalKey %s in %s",
   140  					item.key.Pretty(m.formatKey), l.iter, m.lastKey.Pretty(m.formatKey),
   141  					m.lastIterMsg)
   142  				return false
   143  			}
   144  			m.lastLevel = item.index
   145  		} else {
   146  			// The user key has changed.
   147  			m.lastKey.Trailer = item.key.Trailer
   148  			m.lastKey.UserKey = append(m.lastKey.UserKey[:0], item.key.UserKey...)
   149  			m.lastLevel = item.index
   150  		}
   151  		// Ongoing series of MERGE records ends with a MERGE record.
   152  		if keyChanged && m.valueMerger != nil {
   153  			var closer io.Closer
   154  			_, closer, m.err = m.valueMerger.Finish(true /* includesBase */)
   155  			if m.err == nil && closer != nil {
   156  				m.err = closer.Close()
   157  			}
   158  			m.valueMerger = nil
   159  		}
   160  		if m.valueMerger != nil {
   161  			// Ongoing series of MERGE records.
   162  			switch item.key.Kind() {
   163  			case InternalKeyKindSingleDelete, InternalKeyKindDelete:
   164  				var closer io.Closer
   165  				_, closer, m.err = m.valueMerger.Finish(true /* includesBase */)
   166  				if m.err == nil && closer != nil {
   167  					m.err = closer.Close()
   168  				}
   169  				m.valueMerger = nil
   170  			case InternalKeyKindSet, InternalKeyKindSetWithDelete:
   171  				m.err = m.valueMerger.MergeOlder(item.value)
   172  				if m.err == nil {
   173  					var closer io.Closer
   174  					_, closer, m.err = m.valueMerger.Finish(true /* includesBase */)
   175  					if m.err == nil && closer != nil {
   176  						m.err = closer.Close()
   177  					}
   178  				}
   179  				m.valueMerger = nil
   180  			case InternalKeyKindMerge:
   181  				m.err = m.valueMerger.MergeOlder(item.value)
   182  			default:
   183  				m.err = errors.Errorf("bitalostable: invalid internal key kind %s in %s",
   184  					item.key.Pretty(m.formatKey),
   185  					l.iter)
   186  				return false
   187  			}
   188  		} else if item.key.Kind() == InternalKeyKindMerge && m.err == nil {
   189  			// New series of MERGE records.
   190  			m.valueMerger, m.err = m.merge(item.key.UserKey, item.value)
   191  		}
   192  		if m.err != nil {
   193  			m.err = errors.Wrapf(m.err, "merge processing error on key %s in %s",
   194  				item.key.Pretty(m.formatKey), l.iter)
   195  			return false
   196  		}
   197  		// Is this point covered by a tombstone at a lower level? Note that all these
   198  		// iterators must be positioned at a key > item.key. So the Largest key bound
   199  		// of the sstable containing the tombstone >= item.key. So the upper limit of
   200  		// the tombstone cannot be file-bounds-constrained to < item.key. But it is
   201  		// possible that item.key < smallest key bound of the sstable, in which case
   202  		// this tombstone should be ignored.
   203  		for level := item.index + 1; level < len(m.levels); level++ {
   204  			lvl := &m.levels[level]
   205  			if lvl.rangeDelIter == nil || lvl.tombstone.Empty() {
   206  				continue
   207  			}
   208  			if (lvl.smallestUserKey == nil || m.heap.cmp(lvl.smallestUserKey, item.key.UserKey) <= 0) &&
   209  				lvl.tombstone.Contains(m.heap.cmp, item.key.UserKey) {
   210  				if lvl.tombstone.CoversAt(m.snapshot, item.key.SeqNum()) {
   211  					m.err = errors.Errorf("tombstone %s in %s deletes key %s in %s",
   212  						lvl.tombstone.Pretty(m.formatKey), lvl.iter, item.key.Pretty(m.formatKey),
   213  						l.iter)
   214  					return false
   215  				}
   216  			}
   217  		}
   218  	}
   219  
   220  	// The iterator for the current level may be closed in the following call to
   221  	// Next(). We save its debug string for potential use after it is closed -
   222  	// either in this current step() invocation or on the next invocation.
   223  	m.lastIterMsg = l.iter.String()
   224  
   225  	// Step to the next point.
   226  	if l.iterKey, l.iterValue = l.iter.Next(); l.iterKey != nil {
   227  		// Check point keys in an sstable are ordered. Although not required, we check
   228  		// for memtables as well. A subtle check here is that successive sstables of
   229  		// L1 and higher levels are ordered. This happens when levelIter moves to the
   230  		// next sstable in the level, in which case item.key is previous sstable's
   231  		// last point key.
   232  		if base.InternalCompare(m.heap.cmp, item.key, *l.iterKey) >= 0 {
   233  			m.err = errors.Errorf("out of order keys %s >= %s in %s",
   234  				item.key.Pretty(m.formatKey), l.iterKey.Pretty(m.formatKey), l.iter)
   235  			return false
   236  		}
   237  		item.key.Trailer = l.iterKey.Trailer
   238  		item.key.UserKey = append(item.key.UserKey[:0], l.iterKey.UserKey...)
   239  		item.value = l.iterValue
   240  		if m.heap.len() > 1 {
   241  			m.heap.fix(0)
   242  		}
   243  	} else {
   244  		m.err = l.iter.Close()
   245  		l.iter = nil
   246  		m.heap.pop()
   247  	}
   248  	if m.err != nil {
   249  		return false
   250  	}
   251  	if m.heap.len() == 0 {
   252  		// Last record was a MERGE record.
   253  		if m.valueMerger != nil {
   254  			var closer io.Closer
   255  			_, closer, m.err = m.valueMerger.Finish(true /* includesBase */)
   256  			if m.err == nil && closer != nil {
   257  				m.err = closer.Close()
   258  			}
   259  			if m.err != nil {
   260  				m.err = errors.Wrapf(m.err, "merge processing error on key %s in %s",
   261  					item.key.Pretty(m.formatKey), m.lastIterMsg)
   262  			}
   263  			m.valueMerger = nil
   264  		}
   265  		return false
   266  	}
   267  	m.positionRangeDels()
   268  	return true
   269  }
   270  
   271  // Checking that range tombstones are mutually consistent is performed by checkRangeTombstones().
   272  // See the overview comment at the top of the file.
   273  //
   274  // We do this check as follows:
   275  // - For each level that can have untruncated tombstones, compute the atomic compaction
   276  //   bounds (getAtomicUnitBounds()) and use them to truncate tombstones.
   277  // - Now that we have a set of truncated tombstones for each level, put them into one
   278  //   pool of tombstones along with their level information (addTombstonesFromIter()).
   279  // - Collect the start and end user keys from all these tombstones (collectAllUserKey()) and use
   280  //   them to fragment all the tombstones (fragmentUsingUserKey()).
   281  // - Sort tombstones by start key and decreasing seqnum (tombstonesByStartKeyAndSeqnum) -- all
   282  //   tombstones that have the same start key will have the same end key because they have been
   283  //   fragmented.
   284  // - Iterate and check (iterateAndCheckTombstones()).
   285  // Note that this simple approach requires holding all the tombstones across all levels in-memory.
   286  // A more sophisticated incremental approach could be devised, if necessary.
   287  
   288  // A tombstone and the corresponding level it was found in.
   289  type tombstoneWithLevel struct {
   290  	keyspan.Span
   291  	level int
   292  	// The level in LSM. A -1 means it's a memtable.
   293  	lsmLevel int
   294  	fileNum  FileNum
   295  }
   296  
   297  // For sorting tombstoneWithLevels in increasing order of start UserKey and
   298  // for the same start UserKey in decreasing order of seqnum.
   299  type tombstonesByStartKeyAndSeqnum struct {
   300  	cmp Compare
   301  	buf []tombstoneWithLevel
   302  }
   303  
   304  func (v *tombstonesByStartKeyAndSeqnum) Len() int { return len(v.buf) }
   305  func (v *tombstonesByStartKeyAndSeqnum) Less(i, j int) bool {
   306  	less := v.cmp(v.buf[i].Start, v.buf[j].Start)
   307  	if less == 0 {
   308  		return v.buf[i].LargestSeqNum() > v.buf[j].LargestSeqNum()
   309  	}
   310  	return less < 0
   311  }
   312  func (v *tombstonesByStartKeyAndSeqnum) Swap(i, j int) {
   313  	v.buf[i], v.buf[j] = v.buf[j], v.buf[i]
   314  }
   315  
   316  func iterateAndCheckTombstones(
   317  	cmp Compare, formatKey base.FormatKey, tombstones []tombstoneWithLevel,
   318  ) error {
   319  	sortBuf := tombstonesByStartKeyAndSeqnum{
   320  		cmp: cmp,
   321  		buf: tombstones,
   322  	}
   323  	sort.Sort(&sortBuf)
   324  
   325  	// For a sequence of tombstones that share the same start UserKey, we will
   326  	// encounter them in non-increasing seqnum order and so should encounter them
   327  	// in non-decreasing level order.
   328  	lastTombstone := tombstoneWithLevel{}
   329  	for _, t := range tombstones {
   330  		if cmp(lastTombstone.Start, t.Start) == 0 && lastTombstone.level > t.level {
   331  			return errors.Errorf("encountered tombstone %s in %s"+
   332  				" that has a lower seqnum than the same tombstone in %s",
   333  				t.Span.Pretty(formatKey), levelOrMemtable(t.lsmLevel, t.fileNum),
   334  				levelOrMemtable(lastTombstone.lsmLevel, lastTombstone.fileNum))
   335  		}
   336  		lastTombstone = t
   337  	}
   338  	return nil
   339  }
   340  
   341  type checkConfig struct {
   342  	logger    Logger
   343  	cmp       Compare
   344  	readState *readState
   345  	newIters  tableNewIters
   346  	seqNum    uint64
   347  	stats     *CheckLevelsStats
   348  	merge     Merge
   349  	formatKey base.FormatKey
   350  }
   351  
   352  func checkRangeTombstones(c *checkConfig) error {
   353  	var level int
   354  	var tombstones []tombstoneWithLevel
   355  	var err error
   356  
   357  	memtables := c.readState.memtables
   358  	for i := len(memtables) - 1; i >= 0; i-- {
   359  		iter := memtables[i].newRangeDelIter(nil)
   360  		if iter == nil {
   361  			continue
   362  		}
   363  		if tombstones, err = addTombstonesFromIter(iter, level, -1, 0, tombstones,
   364  			c.seqNum, c.cmp, c.formatKey, nil); err != nil {
   365  			return err
   366  		}
   367  		level++
   368  	}
   369  
   370  	current := c.readState.current
   371  	addTombstonesFromLevel := func(files manifest.LevelIterator, lsmLevel int) error {
   372  		for f := files.First(); f != nil; f = files.Next() {
   373  			lf := files.Take()
   374  			atomicUnit, _ := expandToAtomicUnit(c.cmp, lf.Slice(), true /* disableIsCompacting */)
   375  			lower, upper := manifest.KeyRange(c.cmp, atomicUnit.Iter())
   376  			iterToClose, iter, err := c.newIters(lf.FileMetadata, nil, internalIterOpts{})
   377  			if err != nil {
   378  				return err
   379  			}
   380  			iterToClose.Close()
   381  			if iter == nil {
   382  				continue
   383  			}
   384  			truncate := func(t keyspan.Span) keyspan.Span {
   385  				// Same checks as in keyspan.Truncate.
   386  				if c.cmp(t.Start, lower.UserKey) < 0 {
   387  					t.Start = lower.UserKey
   388  				}
   389  				if c.cmp(t.End, upper.UserKey) > 0 {
   390  					t.End = upper.UserKey
   391  				}
   392  				if c.cmp(t.Start, t.End) >= 0 {
   393  					// Remove the keys.
   394  					t.Keys = t.Keys[:0]
   395  				}
   396  				return t
   397  			}
   398  			if tombstones, err = addTombstonesFromIter(iter, level, lsmLevel, f.FileNum,
   399  				tombstones, c.seqNum, c.cmp, c.formatKey, truncate); err != nil {
   400  				return err
   401  			}
   402  		}
   403  		return nil
   404  	}
   405  	// Now the levels with untruncated tombsones.
   406  	for i := len(current.L0SublevelFiles) - 1; i >= 0; i-- {
   407  		if current.L0SublevelFiles[i].Empty() {
   408  			continue
   409  		}
   410  		err := addTombstonesFromLevel(current.L0SublevelFiles[i].Iter(), 0)
   411  		if err != nil {
   412  			return err
   413  		}
   414  		level++
   415  	}
   416  	for i := 1; i < len(current.Levels); i++ {
   417  		if err := addTombstonesFromLevel(current.Levels[i].Iter(), i); err != nil {
   418  			return err
   419  		}
   420  		level++
   421  	}
   422  	if c.stats != nil {
   423  		c.stats.NumTombstones = len(tombstones)
   424  	}
   425  	// We now have truncated tombstones.
   426  	// Fragment them all.
   427  	userKeys := collectAllUserKeys(c.cmp, tombstones)
   428  	tombstones = fragmentUsingUserKeys(c.cmp, tombstones, userKeys)
   429  	return iterateAndCheckTombstones(c.cmp, c.formatKey, tombstones)
   430  }
   431  
   432  func levelOrMemtable(lsmLevel int, fileNum FileNum) string {
   433  	if lsmLevel == -1 {
   434  		return "memtable"
   435  	}
   436  	return fmt.Sprintf("L%d: fileNum=%s", lsmLevel, fileNum)
   437  }
   438  
   439  func addTombstonesFromIter(
   440  	iter keyspan.FragmentIterator,
   441  	level int,
   442  	lsmLevel int,
   443  	fileNum FileNum,
   444  	tombstones []tombstoneWithLevel,
   445  	seqNum uint64,
   446  	cmp Compare,
   447  	formatKey base.FormatKey,
   448  	truncate func(tombstone keyspan.Span) keyspan.Span,
   449  ) (_ []tombstoneWithLevel, err error) {
   450  	defer func() {
   451  		err = firstError(err, iter.Close())
   452  	}()
   453  
   454  	var prevTombstone keyspan.Span
   455  	for tomb := iter.First(); tomb != nil; tomb = iter.Next() {
   456  		t := tomb.Visible(seqNum)
   457  		if t.Empty() {
   458  			continue
   459  		}
   460  		t = t.DeepClone()
   461  		// This is mainly a test for rangeDelV2 formatted blocks which are expected to
   462  		// be ordered and fragmented on disk. But we anyways check for memtables,
   463  		// rangeDelV1 as well.
   464  		if cmp(prevTombstone.End, t.Start) > 0 {
   465  			return nil, errors.Errorf("unordered or unfragmented range delete tombstones %s, %s in %s",
   466  				prevTombstone.Pretty(formatKey), t.Pretty(formatKey), levelOrMemtable(lsmLevel, fileNum))
   467  		}
   468  		prevTombstone = t
   469  
   470  		// Truncation of a tombstone must happen after checking its ordering,
   471  		// fragmentation wrt previous tombstone. Since it is possible that after
   472  		// truncation the tombstone is ordered, fragmented when it originally wasn't.
   473  		if truncate != nil {
   474  			t = truncate(t)
   475  		}
   476  		if !t.Empty() {
   477  			tombstones = append(tombstones, tombstoneWithLevel{
   478  				Span:     t,
   479  				level:    level,
   480  				lsmLevel: lsmLevel,
   481  				fileNum:  fileNum,
   482  			})
   483  		}
   484  	}
   485  	return tombstones, nil
   486  }
   487  
   488  type userKeysSort struct {
   489  	cmp Compare
   490  	buf [][]byte
   491  }
   492  
   493  func (v *userKeysSort) Len() int { return len(v.buf) }
   494  func (v *userKeysSort) Less(i, j int) bool {
   495  	return v.cmp(v.buf[i], v.buf[j]) < 0
   496  }
   497  func (v *userKeysSort) Swap(i, j int) {
   498  	v.buf[i], v.buf[j] = v.buf[j], v.buf[i]
   499  }
   500  func collectAllUserKeys(cmp Compare, tombstones []tombstoneWithLevel) [][]byte {
   501  	keys := make([][]byte, 0, len(tombstones)*2)
   502  	for _, t := range tombstones {
   503  		keys = append(keys, t.Start)
   504  		keys = append(keys, t.End)
   505  	}
   506  	sorter := userKeysSort{
   507  		cmp: cmp,
   508  		buf: keys,
   509  	}
   510  	sort.Sort(&sorter)
   511  	var last, curr int
   512  	for last, curr = -1, 0; curr < len(keys); curr++ {
   513  		if last < 0 || cmp(keys[last], keys[curr]) != 0 {
   514  			last++
   515  			keys[last] = keys[curr]
   516  		}
   517  	}
   518  	keys = keys[:last+1]
   519  	return keys
   520  }
   521  
   522  func fragmentUsingUserKeys(
   523  	cmp Compare, tombstones []tombstoneWithLevel, userKeys [][]byte,
   524  ) []tombstoneWithLevel {
   525  	var buf []tombstoneWithLevel
   526  	for _, t := range tombstones {
   527  		// Find the first position with tombstone start < user key
   528  		i := sort.Search(len(userKeys), func(i int) bool {
   529  			return cmp(t.Start, userKeys[i]) < 0
   530  		})
   531  		for ; i < len(userKeys); i++ {
   532  			if cmp(userKeys[i], t.End) >= 0 {
   533  				break
   534  			}
   535  			tPartial := t
   536  			tPartial.End = userKeys[i]
   537  			buf = append(buf, tPartial)
   538  			t.Start = userKeys[i]
   539  		}
   540  		buf = append(buf, t)
   541  	}
   542  	return buf
   543  }
   544  
   545  // CheckLevelsStats provides basic stats on points and tombstones encountered.
   546  type CheckLevelsStats struct {
   547  	NumPoints     int64
   548  	NumTombstones int
   549  }
   550  
   551  // CheckLevels checks:
   552  //   - Every entry in the DB is consistent with the level invariant. See the
   553  //     comment at the top of the file.
   554  //   - Point keys in sstables are ordered.
   555  //   - Range delete tombstones in sstables are ordered and fragmented.
   556  //   - Successful processing of all MERGE records.
   557  func (d *DB) CheckLevels(stats *CheckLevelsStats) error {
   558  	// Grab and reference the current readState.
   559  	readState := d.loadReadState()
   560  	defer readState.unref()
   561  
   562  	// Determine the seqnum to read at after grabbing the read state (current and
   563  	// memtables) above.
   564  	seqNum := atomic.LoadUint64(&d.mu.versions.atomic.visibleSeqNum)
   565  
   566  	checkConfig := &checkConfig{
   567  		logger:    d.opts.Logger,
   568  		cmp:       d.cmp,
   569  		readState: readState,
   570  		newIters:  d.newIters,
   571  		seqNum:    seqNum,
   572  		stats:     stats,
   573  		merge:     d.merge,
   574  		formatKey: d.opts.Comparer.FormatKey,
   575  	}
   576  	return checkLevelsInternal(checkConfig)
   577  }
   578  
   579  func checkLevelsInternal(c *checkConfig) (err error) {
   580  	// Phase 1: Use a simpleMergingIter to step through all the points and ensure
   581  	// that points with the same user key at different levels are not inverted
   582  	// wrt sequence numbers and the same holds for tombstones that cover points.
   583  	// To do this, one needs to construct a simpleMergingIter which is similar to
   584  	// how one constructs a mergingIter.
   585  
   586  	// Add mem tables from newest to oldest.
   587  	var mlevels []simpleMergingIterLevel
   588  	defer func() {
   589  		for i := range mlevels {
   590  			l := &mlevels[i]
   591  			if l.iter != nil {
   592  				err = firstError(err, l.iter.Close())
   593  				l.iter = nil
   594  			}
   595  			if l.rangeDelIter != nil {
   596  				err = firstError(err, l.rangeDelIter.Close())
   597  				l.rangeDelIter = nil
   598  			}
   599  		}
   600  	}()
   601  
   602  	memtables := c.readState.memtables
   603  	for i := len(memtables) - 1; i >= 0; i-- {
   604  		mem := memtables[i]
   605  		mlevels = append(mlevels, simpleMergingIterLevel{
   606  			iter:         mem.newIter(nil),
   607  			rangeDelIter: mem.newRangeDelIter(nil),
   608  		})
   609  	}
   610  
   611  	current := c.readState.current
   612  	// Determine the final size for mlevels so that there are no more
   613  	// reallocations. levelIter will hold a pointer to elements in mlevels.
   614  	start := len(mlevels)
   615  	for sublevel := len(current.L0SublevelFiles) - 1; sublevel >= 0; sublevel-- {
   616  		if current.L0SublevelFiles[sublevel].Empty() {
   617  			continue
   618  		}
   619  		mlevels = append(mlevels, simpleMergingIterLevel{})
   620  	}
   621  	for level := 1; level < len(current.Levels); level++ {
   622  		if current.Levels[level].Empty() {
   623  			continue
   624  		}
   625  		mlevels = append(mlevels, simpleMergingIterLevel{})
   626  	}
   627  	mlevelAlloc := mlevels[start:]
   628  	// Add L0 files by sublevel.
   629  	for sublevel := len(current.L0SublevelFiles) - 1; sublevel >= 0; sublevel-- {
   630  		if current.L0SublevelFiles[sublevel].Empty() {
   631  			continue
   632  		}
   633  		manifestIter := current.L0SublevelFiles[sublevel].Iter()
   634  		iterOpts := IterOptions{logger: c.logger}
   635  		li := &levelIter{}
   636  		li.init(iterOpts, c.cmp, nil /* split */, c.newIters, manifestIter,
   637  			manifest.L0Sublevel(sublevel), internalIterOpts{})
   638  		li.initRangeDel(&mlevelAlloc[0].rangeDelIter)
   639  		li.initBoundaryContext(&mlevelAlloc[0].levelIterBoundaryContext)
   640  		mlevelAlloc[0].iter = li
   641  		mlevelAlloc = mlevelAlloc[1:]
   642  	}
   643  	for level := 1; level < len(current.Levels); level++ {
   644  		if current.Levels[level].Empty() {
   645  			continue
   646  		}
   647  
   648  		iterOpts := IterOptions{logger: c.logger}
   649  		li := &levelIter{}
   650  		li.init(iterOpts, c.cmp, nil /* split */, c.newIters,
   651  			current.Levels[level].Iter(), manifest.Level(level), internalIterOpts{})
   652  		li.initRangeDel(&mlevelAlloc[0].rangeDelIter)
   653  		li.initBoundaryContext(&mlevelAlloc[0].levelIterBoundaryContext)
   654  		mlevelAlloc[0].iter = li
   655  		mlevelAlloc = mlevelAlloc[1:]
   656  	}
   657  
   658  	mergingIter := &simpleMergingIter{}
   659  	mergingIter.init(c.merge, c.cmp, c.seqNum, c.formatKey, mlevels...)
   660  	for cont := mergingIter.step(); cont; cont = mergingIter.step() {
   661  	}
   662  	if err := mergingIter.err; err != nil {
   663  		return err
   664  	}
   665  	if c.stats != nil {
   666  		c.stats.NumPoints = mergingIter.numPoints
   667  	}
   668  
   669  	// Phase 2: Check that the tombstones are mutually consistent.
   670  	return checkRangeTombstones(c)
   671  }