github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/snapshot.go (about)

     1  // Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package pebble
     6  
     7  import (
     8  	"context"
     9  	"io"
    10  	"math"
    11  	"sync"
    12  	"sync/atomic"
    13  	"time"
    14  
    15  	"github.com/cockroachdb/errors"
    16  	"github.com/cockroachdb/pebble/internal/invariants"
    17  	"github.com/cockroachdb/pebble/rangekey"
    18  	"github.com/cockroachdb/pebble/sstable"
    19  )
    20  
    21  // ErrSnapshotExcised is returned from WaitForFileOnlySnapshot if an excise
    22  // overlapping with one of the EventuallyFileOnlySnapshot's KeyRanges gets
    23  // applied before the transition of that EFOS to a file-only snapshot.
    24  var ErrSnapshotExcised = errors.New("pebble: snapshot excised before conversion to file-only snapshot")
    25  
    26  // Snapshot provides a read-only point-in-time view of the DB state.
    27  type Snapshot struct {
    28  	// The db the snapshot was created from.
    29  	db     *DB
    30  	seqNum uint64
    31  
    32  	// Set if part of an EventuallyFileOnlySnapshot.
    33  	efos *EventuallyFileOnlySnapshot
    34  
    35  	// The list the snapshot is linked into.
    36  	list *snapshotList
    37  
    38  	// The next/prev link for the snapshotList doubly-linked list of snapshots.
    39  	prev, next *Snapshot
    40  }
    41  
    42  var _ Reader = (*Snapshot)(nil)
    43  
    44  // Get gets the value for the given key. It returns ErrNotFound if the Snapshot
    45  // does not contain the key.
    46  //
    47  // The caller should not modify the contents of the returned slice, but it is
    48  // safe to modify the contents of the argument after Get returns. The returned
    49  // slice will remain valid until the returned Closer is closed. On success, the
    50  // caller MUST call closer.Close() or a memory leak will occur.
    51  func (s *Snapshot) Get(key []byte) ([]byte, io.Closer, error) {
    52  	if s.db == nil {
    53  		panic(ErrClosed)
    54  	}
    55  	return s.db.getInternal(key, nil /* batch */, s)
    56  }
    57  
    58  // NewIter returns an iterator that is unpositioned (Iterator.Valid() will
    59  // return false). The iterator can be positioned via a call to SeekGE,
    60  // SeekLT, First or Last.
    61  func (s *Snapshot) NewIter(o *IterOptions) (*Iterator, error) {
    62  	return s.NewIterWithContext(context.Background(), o)
    63  }
    64  
    65  // NewIterWithContext is like NewIter, and additionally accepts a context for
    66  // tracing.
    67  func (s *Snapshot) NewIterWithContext(ctx context.Context, o *IterOptions) (*Iterator, error) {
    68  	if s.db == nil {
    69  		panic(ErrClosed)
    70  	}
    71  	return s.db.newIter(ctx, nil /* batch */, newIterOpts{
    72  		snapshot: snapshotIterOpts{seqNum: s.seqNum},
    73  	}, o), nil
    74  }
    75  
    76  // ScanInternal scans all internal keys within the specified bounds, truncating
    77  // any rangedels and rangekeys to those bounds. For use when an external user
    78  // needs to be aware of all internal keys that make up a key range.
    79  //
    80  // See comment on db.ScanInternal for the behaviour that can be expected of
    81  // point keys deleted by range dels and keys masked by range keys.
    82  func (s *Snapshot) ScanInternal(
    83  	ctx context.Context,
    84  	categoryAndQoS sstable.CategoryAndQoS,
    85  	lower, upper []byte,
    86  	visitPointKey func(key *InternalKey, value LazyValue, iterInfo IteratorLevel) error,
    87  	visitRangeDel func(start, end []byte, seqNum uint64) error,
    88  	visitRangeKey func(start, end []byte, keys []rangekey.Key) error,
    89  	visitSharedFile func(sst *SharedSSTMeta) error,
    90  ) error {
    91  	if s.db == nil {
    92  		panic(ErrClosed)
    93  	}
    94  	scanInternalOpts := &scanInternalOptions{
    95  		CategoryAndQoS:   categoryAndQoS,
    96  		visitPointKey:    visitPointKey,
    97  		visitRangeDel:    visitRangeDel,
    98  		visitRangeKey:    visitRangeKey,
    99  		visitSharedFile:  visitSharedFile,
   100  		skipSharedLevels: visitSharedFile != nil,
   101  		IterOptions: IterOptions{
   102  			KeyTypes:   IterKeyTypePointsAndRanges,
   103  			LowerBound: lower,
   104  			UpperBound: upper,
   105  		},
   106  	}
   107  
   108  	iter, err := s.db.newInternalIter(ctx, snapshotIterOpts{seqNum: s.seqNum}, scanInternalOpts)
   109  	if err != nil {
   110  		return err
   111  	}
   112  	defer iter.close()
   113  
   114  	return scanInternalImpl(ctx, lower, upper, iter, scanInternalOpts)
   115  }
   116  
   117  // closeLocked is similar to Close(), except it requires that db.mu be held
   118  // by the caller.
   119  func (s *Snapshot) closeLocked() error {
   120  	s.db.mu.snapshots.remove(s)
   121  
   122  	// If s was the previous earliest snapshot, we might be able to reclaim
   123  	// disk space by dropping obsolete records that were pinned by s.
   124  	if e := s.db.mu.snapshots.earliest(); e > s.seqNum {
   125  		s.db.maybeScheduleCompactionPicker(pickElisionOnly)
   126  	}
   127  	s.db = nil
   128  	return nil
   129  }
   130  
   131  // Close closes the snapshot, releasing its resources. Close must be called.
   132  // Failure to do so will result in a tiny memory leak and a large leak of
   133  // resources on disk due to the entries the snapshot is preventing from being
   134  // deleted.
   135  //
   136  // d.mu must NOT be held by the caller.
   137  func (s *Snapshot) Close() error {
   138  	db := s.db
   139  	if db == nil {
   140  		panic(ErrClosed)
   141  	}
   142  	db.mu.Lock()
   143  	defer db.mu.Unlock()
   144  	return s.closeLocked()
   145  }
   146  
   147  type snapshotList struct {
   148  	root Snapshot
   149  }
   150  
   151  func (l *snapshotList) init() {
   152  	l.root.next = &l.root
   153  	l.root.prev = &l.root
   154  }
   155  
   156  func (l *snapshotList) empty() bool {
   157  	return l.root.next == &l.root
   158  }
   159  
   160  func (l *snapshotList) count() int {
   161  	if l.empty() {
   162  		return 0
   163  	}
   164  	var count int
   165  	for i := l.root.next; i != &l.root; i = i.next {
   166  		count++
   167  	}
   168  	return count
   169  }
   170  
   171  func (l *snapshotList) earliest() uint64 {
   172  	v := uint64(math.MaxUint64)
   173  	if !l.empty() {
   174  		v = l.root.next.seqNum
   175  	}
   176  	return v
   177  }
   178  
   179  func (l *snapshotList) toSlice() []uint64 {
   180  	if l.empty() {
   181  		return nil
   182  	}
   183  	var results []uint64
   184  	for i := l.root.next; i != &l.root; i = i.next {
   185  		results = append(results, i.seqNum)
   186  	}
   187  	return results
   188  }
   189  
   190  func (l *snapshotList) pushBack(s *Snapshot) {
   191  	if s.list != nil || s.prev != nil || s.next != nil {
   192  		panic("pebble: snapshot list is inconsistent")
   193  	}
   194  	s.prev = l.root.prev
   195  	s.prev.next = s
   196  	s.next = &l.root
   197  	s.next.prev = s
   198  	s.list = l
   199  }
   200  
   201  func (l *snapshotList) remove(s *Snapshot) {
   202  	if s == &l.root {
   203  		panic("pebble: cannot remove snapshot list root node")
   204  	}
   205  	if s.list != l {
   206  		panic("pebble: snapshot list is inconsistent")
   207  	}
   208  	s.prev.next = s.next
   209  	s.next.prev = s.prev
   210  	s.next = nil // avoid memory leaks
   211  	s.prev = nil // avoid memory leaks
   212  	s.list = nil // avoid memory leaks
   213  }
   214  
   215  // EventuallyFileOnlySnapshot (aka EFOS) provides a read-only point-in-time view
   216  // of the database state, similar to Snapshot. An EventuallyFileOnlySnapshot
   217  // induces less write amplification than Snapshot, at the cost of increased space
   218  // amplification. While a Snapshot may increase write amplification across all
   219  // flushes and compactions for the duration of its lifetime, an
   220  // EventuallyFileOnlySnapshot only incurs that cost for flushes/compactions if
   221  // memtables at the time of EFOS instantiation contained keys that the EFOS is
   222  // interested in (i.e. its protectedRanges). In that case, the EFOS prevents
   223  // elision of keys visible to it, similar to a Snapshot, until those memtables
   224  // are flushed, and once that happens, the "EventuallyFileOnlySnapshot"
   225  // transitions to a file-only snapshot state in which it pins zombies sstables
   226  // like an open Iterator would, without pinning any memtables. Callers that can
   227  // tolerate the increased space amplification of pinning zombie sstables until
   228  // the snapshot is closed may prefer EventuallyFileOnlySnapshots for their
   229  // reduced write amplification. Callers that desire the benefits of the file-only
   230  // state that requires no pinning of memtables should call
   231  // `WaitForFileOnlySnapshot()` (and possibly re-mint an EFOS if it returns
   232  // ErrSnapshotExcised) before relying on the EFOS to keep producing iterators
   233  // with zero write-amp and zero pinning of memtables in memory.
   234  //
   235  // EventuallyFileOnlySnapshots interact with the IngestAndExcise operation in
   236  // subtle ways. No new iterators can be created once
   237  // EventuallyFileOnlySnapshot.excised is set to true.
   238  type EventuallyFileOnlySnapshot struct {
   239  	mu struct {
   240  		// NB: If both this mutex and db.mu are being grabbed, db.mu should be
   241  		// grabbed _before_ grabbing this one.
   242  		sync.Mutex
   243  
   244  		// Either the snap field is set below, or the version is set at any given
   245  		// point of time. If a snapshot is referenced, this is not a file-only
   246  		// snapshot yet, and if a version is set (and ref'd) this is a file-only
   247  		// snapshot.
   248  
   249  		// The wrapped regular snapshot, if not a file-only snapshot yet.
   250  		snap *Snapshot
   251  		// The wrapped version reference, if a file-only snapshot.
   252  		vers *version
   253  
   254  		// The readState corresponding to when this EFOS was created. Only set
   255  		// if alwaysCreateIters is true.
   256  		rs *readState
   257  	}
   258  
   259  	// Key ranges to watch for an excise on.
   260  	protectedRanges []KeyRange
   261  	// excised, if true, signals that the above ranges were excised during the
   262  	// lifetime of this snapshot.
   263  	excised atomic.Bool
   264  
   265  	// The db the snapshot was created from.
   266  	db     *DB
   267  	seqNum uint64
   268  
   269  	// If true, this EventuallyFileOnlySnapshot will always generate iterators that
   270  	// retain snapshot semantics, by holding onto the readState if a conflicting
   271  	// excise were to happen. Only used in some tests to enforce deterministic
   272  	// behaviour around excises.
   273  	alwaysCreateIters bool
   274  
   275  	closed chan struct{}
   276  }
   277  
   278  func (d *DB) makeEventuallyFileOnlySnapshot(
   279  	keyRanges []KeyRange, internalKeyRanges []internalKeyRange,
   280  ) *EventuallyFileOnlySnapshot {
   281  	isFileOnly := true
   282  
   283  	d.mu.Lock()
   284  	defer d.mu.Unlock()
   285  	seqNum := d.mu.versions.visibleSeqNum.Load()
   286  	// Check if any of the keyRanges overlap with a memtable.
   287  	for i := range d.mu.mem.queue {
   288  		mem := d.mu.mem.queue[i]
   289  		if ingestMemtableOverlaps(d.cmp, mem, internalKeyRanges) {
   290  			isFileOnly = false
   291  			break
   292  		}
   293  	}
   294  	es := &EventuallyFileOnlySnapshot{
   295  		db:                d,
   296  		seqNum:            seqNum,
   297  		protectedRanges:   keyRanges,
   298  		closed:            make(chan struct{}),
   299  		alwaysCreateIters: d.opts.private.efosAlwaysCreatesIterators,
   300  	}
   301  	if es.alwaysCreateIters {
   302  		es.mu.rs = d.loadReadState()
   303  	}
   304  	if isFileOnly {
   305  		es.mu.vers = d.mu.versions.currentVersion()
   306  		es.mu.vers.Ref()
   307  	} else {
   308  		s := &Snapshot{
   309  			db:     d,
   310  			seqNum: seqNum,
   311  		}
   312  		s.efos = es
   313  		es.mu.snap = s
   314  		d.mu.snapshots.pushBack(s)
   315  	}
   316  	return es
   317  }
   318  
   319  // Transitions this EventuallyFileOnlySnapshot to a file-only snapshot. Requires
   320  // earliestUnflushedSeqNum and vers to correspond to the same Version from the
   321  // current or a past acquisition of db.mu. vers must have been Ref()'d before
   322  // that mutex was released, if it was released.
   323  //
   324  // NB: The caller is expected to check for es.excised before making this
   325  // call.
   326  //
   327  // d.mu must be held when calling this method.
   328  func (es *EventuallyFileOnlySnapshot) transitionToFileOnlySnapshot(vers *version) error {
   329  	es.mu.Lock()
   330  	select {
   331  	case <-es.closed:
   332  		vers.UnrefLocked()
   333  		es.mu.Unlock()
   334  		return ErrClosed
   335  	default:
   336  	}
   337  	if es.mu.snap == nil {
   338  		es.mu.Unlock()
   339  		panic("pebble: tried to transition an eventually-file-only-snapshot twice")
   340  	}
   341  	// The caller has already called Ref() on vers.
   342  	es.mu.vers = vers
   343  	// NB: The callers should have already done a check of es.excised.
   344  	oldSnap := es.mu.snap
   345  	es.mu.snap = nil
   346  	es.mu.Unlock()
   347  	return oldSnap.closeLocked()
   348  }
   349  
   350  // hasTransitioned returns true if this EFOS has transitioned to a file-only
   351  // snapshot.
   352  func (es *EventuallyFileOnlySnapshot) hasTransitioned() bool {
   353  	es.mu.Lock()
   354  	defer es.mu.Unlock()
   355  	return es.mu.vers != nil
   356  }
   357  
   358  // waitForFlush waits for a flush on any memtables that need to be flushed
   359  // before this EFOS can transition to a file-only snapshot. If this EFOS is
   360  // waiting on a flush of the mutable memtable, it forces a rotation within
   361  // `dur` duration. For immutable memtables, it schedules a flush and waits for
   362  // it to finish.
   363  func (es *EventuallyFileOnlySnapshot) waitForFlush(ctx context.Context, dur time.Duration) error {
   364  	es.db.mu.Lock()
   365  	defer es.db.mu.Unlock()
   366  
   367  	earliestUnflushedSeqNum := es.db.getEarliestUnflushedSeqNumLocked()
   368  	for earliestUnflushedSeqNum < es.seqNum {
   369  		select {
   370  		case <-es.closed:
   371  			return ErrClosed
   372  		case <-ctx.Done():
   373  			return ctx.Err()
   374  		default:
   375  		}
   376  		// Check if the current mutable memtable contains keys less than seqNum.
   377  		// If so, rotate it.
   378  		if es.db.mu.mem.mutable.logSeqNum < es.seqNum && dur.Nanoseconds() > 0 {
   379  			es.db.maybeScheduleDelayedFlush(es.db.mu.mem.mutable, dur)
   380  		} else {
   381  			// Find the last memtable that contains seqNums less than es.seqNum,
   382  			// and force a flush on it.
   383  			var mem *flushableEntry
   384  			for i := range es.db.mu.mem.queue {
   385  				if es.db.mu.mem.queue[i].logSeqNum < es.seqNum {
   386  					mem = es.db.mu.mem.queue[i]
   387  				}
   388  			}
   389  			mem.flushForced = true
   390  			es.db.maybeScheduleFlush()
   391  		}
   392  		es.db.mu.compact.cond.Wait()
   393  
   394  		earliestUnflushedSeqNum = es.db.getEarliestUnflushedSeqNumLocked()
   395  	}
   396  	if es.excised.Load() {
   397  		return ErrSnapshotExcised
   398  	}
   399  	return nil
   400  }
   401  
   402  // WaitForFileOnlySnapshot blocks the calling goroutine until this snapshot
   403  // has been converted into a file-only snapshot (i.e. all memtables containing
   404  // keys < seqNum are flushed). A duration can be passed in, and if nonzero,
   405  // a delayed flush will be scheduled at that duration if necessary.
   406  //
   407  // Idempotent; can be called multiple times with no side effects.
   408  func (es *EventuallyFileOnlySnapshot) WaitForFileOnlySnapshot(
   409  	ctx context.Context, dur time.Duration,
   410  ) error {
   411  	if es.hasTransitioned() {
   412  		return nil
   413  	}
   414  
   415  	if err := es.waitForFlush(ctx, dur); err != nil {
   416  		return err
   417  	}
   418  
   419  	if invariants.Enabled {
   420  		// Since we aren't returning an error, we _must_ have transitioned to a
   421  		// file-only snapshot by now.
   422  		if !es.hasTransitioned() {
   423  			panic("expected EFOS to have transitioned to file-only snapshot after flush")
   424  		}
   425  	}
   426  	return nil
   427  }
   428  
   429  // Close closes the file-only snapshot and releases all referenced resources.
   430  // Not idempotent.
   431  func (es *EventuallyFileOnlySnapshot) Close() error {
   432  	close(es.closed)
   433  	es.db.mu.Lock()
   434  	defer es.db.mu.Unlock()
   435  	es.mu.Lock()
   436  	defer es.mu.Unlock()
   437  
   438  	if es.mu.snap != nil {
   439  		if err := es.mu.snap.closeLocked(); err != nil {
   440  			return err
   441  		}
   442  	}
   443  	if es.mu.vers != nil {
   444  		es.mu.vers.UnrefLocked()
   445  	}
   446  	if es.mu.rs != nil {
   447  		es.mu.rs.unrefLocked()
   448  	}
   449  	return nil
   450  }
   451  
   452  // Get implements the Reader interface.
   453  func (es *EventuallyFileOnlySnapshot) Get(key []byte) (value []byte, closer io.Closer, err error) {
   454  	// TODO(jackson): Use getInternal.
   455  	iter, err := es.NewIter(nil)
   456  	if err != nil {
   457  		return nil, nil, err
   458  	}
   459  	var valid bool
   460  	if es.db.opts.Comparer.Split != nil {
   461  		valid = iter.SeekPrefixGE(key)
   462  	} else {
   463  		valid = iter.SeekGE(key)
   464  	}
   465  	if !valid {
   466  		if err = firstError(iter.Error(), iter.Close()); err != nil {
   467  			return nil, nil, err
   468  		}
   469  		return nil, nil, ErrNotFound
   470  	}
   471  	if !es.db.equal(iter.Key(), key) {
   472  		return nil, nil, firstError(iter.Close(), ErrNotFound)
   473  	}
   474  	return iter.Value(), iter, nil
   475  }
   476  
   477  // NewIter returns an iterator that is unpositioned (Iterator.Valid() will
   478  // return false). The iterator can be positioned via a call to SeekGE,
   479  // SeekLT, First or Last.
   480  func (es *EventuallyFileOnlySnapshot) NewIter(o *IterOptions) (*Iterator, error) {
   481  	return es.NewIterWithContext(context.Background(), o)
   482  }
   483  
   484  func (es *EventuallyFileOnlySnapshot) newAlwaysCreateIterWithContext(
   485  	ctx context.Context, o *IterOptions,
   486  ) (*Iterator, error) {
   487  	// Grab the db mutex. This avoids races down below, where we could get
   488  	// excised between the es.excised.Load() call, and the newIter call.
   489  	es.db.mu.Lock()
   490  	defer es.db.mu.Unlock()
   491  	es.mu.Lock()
   492  	defer es.mu.Unlock()
   493  	if es.mu.vers != nil {
   494  		sOpts := snapshotIterOpts{seqNum: es.seqNum, vers: es.mu.vers}
   495  		return es.db.newIter(ctx, nil /* batch */, newIterOpts{snapshot: sOpts}, o), nil
   496  	}
   497  
   498  	sOpts := snapshotIterOpts{seqNum: es.seqNum}
   499  	if es.excised.Load() {
   500  		if es.mu.rs == nil {
   501  			return nil, errors.AssertionFailedf("unexpected nil readState in EFOS' alwaysCreateIters mode")
   502  		}
   503  		sOpts.readState = es.mu.rs
   504  	}
   505  	iter := es.db.newIter(ctx, nil /* batch */, newIterOpts{snapshot: sOpts}, o)
   506  	return iter, nil
   507  }
   508  
   509  // NewIterWithContext is like NewIter, and additionally accepts a context for
   510  // tracing.
   511  func (es *EventuallyFileOnlySnapshot) NewIterWithContext(
   512  	ctx context.Context, o *IterOptions,
   513  ) (*Iterator, error) {
   514  	select {
   515  	case <-es.closed:
   516  		panic(ErrClosed)
   517  	default:
   518  	}
   519  
   520  	if es.alwaysCreateIters {
   521  		return es.newAlwaysCreateIterWithContext(ctx, o)
   522  	}
   523  	es.mu.Lock()
   524  	defer es.mu.Unlock()
   525  	if es.mu.vers != nil {
   526  		sOpts := snapshotIterOpts{seqNum: es.seqNum, vers: es.mu.vers}
   527  		return es.db.newIter(ctx, nil /* batch */, newIterOpts{snapshot: sOpts}, o), nil
   528  	}
   529  
   530  	sOpts := snapshotIterOpts{seqNum: es.seqNum}
   531  	if es.excised.Load() {
   532  		return nil, ErrSnapshotExcised
   533  	}
   534  	iter := es.db.newIter(ctx, nil /* batch */, newIterOpts{snapshot: sOpts}, o)
   535  
   536  	// If excised is true, then keys relevant to the snapshot might not be
   537  	// present in the readState being used by the iterator.
   538  	if es.excised.Load() {
   539  		iter.Close()
   540  		return nil, ErrSnapshotExcised
   541  	}
   542  	return iter, nil
   543  }
   544  
   545  // ScanInternal scans all internal keys within the specified bounds, truncating
   546  // any rangedels and rangekeys to those bounds. For use when an external user
   547  // needs to be aware of all internal keys that make up a key range.
   548  //
   549  // See comment on db.ScanInternal for the behaviour that can be expected of
   550  // point keys deleted by range dels and keys masked by range keys.
   551  func (es *EventuallyFileOnlySnapshot) ScanInternal(
   552  	ctx context.Context,
   553  	categoryAndQoS sstable.CategoryAndQoS,
   554  	lower, upper []byte,
   555  	visitPointKey func(key *InternalKey, value LazyValue, iterInfo IteratorLevel) error,
   556  	visitRangeDel func(start, end []byte, seqNum uint64) error,
   557  	visitRangeKey func(start, end []byte, keys []rangekey.Key) error,
   558  	visitSharedFile func(sst *SharedSSTMeta) error,
   559  ) error {
   560  	if es.db == nil {
   561  		panic(ErrClosed)
   562  	}
   563  	if es.excised.Load() && !es.alwaysCreateIters {
   564  		return ErrSnapshotExcised
   565  	}
   566  	var sOpts snapshotIterOpts
   567  	opts := &scanInternalOptions{
   568  		CategoryAndQoS: categoryAndQoS,
   569  		IterOptions: IterOptions{
   570  			KeyTypes:   IterKeyTypePointsAndRanges,
   571  			LowerBound: lower,
   572  			UpperBound: upper,
   573  		},
   574  		visitPointKey:    visitPointKey,
   575  		visitRangeDel:    visitRangeDel,
   576  		visitRangeKey:    visitRangeKey,
   577  		visitSharedFile:  visitSharedFile,
   578  		skipSharedLevels: visitSharedFile != nil,
   579  	}
   580  	if es.alwaysCreateIters {
   581  		// Grab the db mutex. This avoids races down below as it prevents excises
   582  		// from taking effect until the iterator is instantiated.
   583  		es.db.mu.Lock()
   584  	}
   585  	es.mu.Lock()
   586  	if es.mu.vers != nil {
   587  		sOpts = snapshotIterOpts{
   588  			seqNum: es.seqNum,
   589  			vers:   es.mu.vers,
   590  		}
   591  	} else {
   592  		if es.excised.Load() && es.alwaysCreateIters {
   593  			sOpts = snapshotIterOpts{
   594  				readState: es.mu.rs,
   595  				seqNum:    es.seqNum,
   596  			}
   597  		} else {
   598  			sOpts = snapshotIterOpts{
   599  				seqNum: es.seqNum,
   600  			}
   601  		}
   602  	}
   603  	es.mu.Unlock()
   604  	iter, err := es.db.newInternalIter(ctx, sOpts, opts)
   605  	if err != nil {
   606  		return err
   607  	}
   608  	defer iter.close()
   609  	if es.alwaysCreateIters {
   610  		// See the similar conditional above where we grab this mutex.
   611  		es.db.mu.Unlock()
   612  	}
   613  
   614  	// If excised is true, then keys relevant to the snapshot might not be
   615  	// present in the readState being used by the iterator. Error out.
   616  	if es.excised.Load() && !es.alwaysCreateIters {
   617  		return ErrSnapshotExcised
   618  	}
   619  
   620  	return scanInternalImpl(ctx, lower, upper, iter, opts)
   621  }