github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/metamorphic/key_manager.go

github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/metamorphic/key_manager.go (about)

     1  package metamorphic
     2  
     3  import (
     4  	"bytes"
     5  	"cmp"
     6  	"fmt"
     7  	"slices"
     8  	"strings"
     9  
    10  	"github.com/cockroachdb/errors"
    11  	"github.com/cockroachdb/pebble"
    12  	"github.com/cockroachdb/pebble/internal/base"
    13  	"github.com/cockroachdb/pebble/internal/testkeys"
    14  	"github.com/stretchr/testify/require"
    15  )
    16  
    17  // objKey is a tuple of (objID, key). This struct is used primarily as a map
    18  // key for keyManager. Only writer objTags can occur here, i.e., dbTag and
    19  // batchTag, since this is used for tracking the keys in a writer.
    20  type objKey struct {
    21  	id  objID
    22  	key []byte
    23  }
    24  
    25  // makeObjKey returns a new objKey given and id and key.
    26  func makeObjKey(id objID, key []byte) objKey {
    27  	if id.tag() != dbTag && id.tag() != batchTag {
    28  		panic(fmt.Sprintf("unexpected non-writer tag %v", id.tag()))
    29  	}
    30  	return objKey{id, key}
    31  }
    32  
    33  // String implements fmt.Stringer, returning a stable string representation of
    34  // the objKey. This string is used as map key.
    35  func (o objKey) String() string {
    36  	return fmt.Sprintf("%s:%s", o.id, o.key)
    37  }
    38  
    39  // keyMeta is metadata associated with an (objID, key) pair, where objID is
    40  // a writer containing the key.
    41  type keyMeta struct {
    42  	objKey
    43  	// history provides the history of writer operations applied against this
    44  	// key on this object. history is always ordered by non-decreasing
    45  	// metaTimestamp.
    46  	history keyHistory
    47  }
    48  
    49  func (m *keyMeta) clear() {
    50  	m.history = m.history[:0]
    51  }
    52  
    53  // mergeInto merges this metadata into the metadata for other, appending all of
    54  // its individual operations to dst at the provided timestamp.
    55  func (m *keyMeta) mergeInto(dst *keyMeta, ts int) {
    56  	for _, op := range m.history {
    57  		// If the key is being merged into a database object and the operation
    58  		// is a delete, we can clear the destination history. Database objects
    59  		// are end points in the merging of keys and won't be the source of a
    60  		// future merge. Deletions cause all other operations to behave as
    61  		// though the key was never written to the database at all, so we don't
    62  		// need to consider it for maintaining single delete invariants.
    63  		//
    64  		// NB: There's a subtlety here in that isDelete() will return true if
    65  		// opType is a writerSingleDelete, but single deletes are capable of
    66  		// leaking information about the history of writes. However, that's
    67  		// okay, because as long as we're properly generating single deletes
    68  		// according to the W1 invariant described in keyManager's comment, a
    69  		// single delete is equivalent to delete for the current history.
    70  		if dst.objKey.id.tag() == dbTag && op.opType.isDelete() {
    71  			dst.clear()
    72  			continue
    73  		}
    74  		dst.history = append(dst.history, keyHistoryItem{
    75  			opType:        op.opType,
    76  			metaTimestamp: ts,
    77  		})
    78  	}
    79  }
    80  
    81  type bounds struct {
    82  	smallest    []byte
    83  	largest     []byte
    84  	largestExcl bool // is largest exclusive?
    85  }
    86  
    87  func (b *bounds) String() string {
    88  	if b.largestExcl {
    89  		return fmt.Sprintf("[%q,%q)", b.smallest, b.largest)
    90  	}
    91  	return fmt.Sprintf("[%q,%q]", b.smallest, b.largest)
    92  }
    93  
    94  // overlaps returns true iff the bounds intersect.
    95  func (b *bounds) overlaps(cmp base.Compare, other *bounds) bool {
    96  	// Is b strictly before other?
    97  	if v := cmp(b.largest, other.smallest); v < 0 || (v == 0 && b.largestExcl) {
    98  		return false
    99  	}
   100  	// Is b strictly after other?
   101  	if v := cmp(b.smallest, other.largest); v > 0 || (v == 0 && other.largestExcl) {
   102  		return false
   103  	}
   104  	return true
   105  }
   106  
   107  // mergeInto merges the receiver bounds into other, mutating other.
   108  func (b bounds) mergeInto(cmp base.Compare, other *bounds) {
   109  	if cmp(other.smallest, b.smallest) > 0 {
   110  		other.smallest = b.smallest
   111  	}
   112  	if v := cmp(other.largest, b.largest); v < 0 || (v == 0 && other.largestExcl) {
   113  		other.largest = b.largest
   114  		other.largestExcl = b.largestExcl
   115  	}
   116  }
   117  
   118  // keyManager tracks the write operations performed on keys in the generation
   119  // phase of the metamorphic test. It maintains histories of operations performed
   120  // against every unique user key on every writer object. These histories inform
   121  // operation generation in order to maintain invariants that Pebble requires of
   122  // end users, mostly around single deletions.
   123  //
   124  // A single deletion has a subtle requirement of the writer:
   125  //
   126  //	W1: The writer may only single delete a key `k` if `k` has been Set once
   127  //	    (and never MergeD) since the last delete.
   128  //
   129  // When a SINGLEDEL key deletes a SET key within a compaction, both the SET and
   130  // the SINGLEDEL keys are elided. If multiple SETs of the key exist within the
   131  // LSM, the SINGLEDEL reveals the lower SET. This behavior is dependent on the
   132  // internal LSM state and nondeterministic. To ensure determinism, the end user
   133  // must satisfy W1 and use single delete only when they can guarantee that the
   134  // key has been set at most once since the last delete, preventing this rollback
   135  // to a previous value.
   136  //
   137  // This W1 invariant requires a delicate dance during operation generation,
   138  // because independent batches may be independently built and committed. With
   139  // multi-instance variants of the metamorphic tests, keys in batches may
   140  // ultimately be committed to any of several DB instances. To satisfy these
   141  // requirements, the key manager tracks the history of every key on every
   142  // writable object. When generating a new single deletion operation, the
   143  // generator asks the key manager for a set of keys for which a single delete
   144  // maintains the W1 invariant within the object itself. This object-local W1
   145  // invariant (OLW1) is equivalent to W1 if one only ever performs write
   146  // operations directly against individual DB objects.
   147  //
   148  // However with the existence of batches that receive writes independent of DB
   149  // objects, W1 may be violated by appending the histories of two objects that
   150  // independently satisfy OLW1. Consider a sequence such as:
   151  //
   152  //  1. db1.Set("foo")
   153  //  2. batch1.Set("foo")
   154  //  3. batch1.SingleDelete("foo")
   155  //  4. db1.Apply(batch1)
   156  //
   157  // Both db1 and batch1 satisfy the object-local invariant OLW1. However the
   158  // composition of the histories created by appending batch1's operations to db1
   159  // creates a history that now violates W1 on db1. To detect this violation,
   160  // batch applications/commits and ingestions examine the tail of the destination
   161  // object's history and the head of the source batch's history. When a violation
   162  // is detected, these operations insert additional Delete operations to clear
   163  // the conflicting keys before proceeding with the conflicting operation. These
   164  // deletes reset the key history.
   165  //
   166  // Note that this generation-time key tracking requires that operations be
   167  // infallible, because a runtime failure would cause the key manager's state to
   168  // diverge from the runtime object state. Ingestion operations pose an obstacle,
   169  // because the generator may generate ingestions that fail due to overlapping
   170  // sstables. Today, this complication is sidestepped by avoiding ingestion of
   171  // multiple batches containing deletes or single deletes since loss of those
   172  // specific operations on a key are what we cannot tolerate (doing SingleDelete
   173  // on a key that has not been written to because the Set was lost is harmless).
   174  //
   175  // TODO(jackson): Instead, compute smallest and largest bounds of batches so
   176  // that we know at generation-time whether or not an ingestion operation will
   177  // fail and can avoid updating key state.
   178  type keyManager struct {
   179  	comparer *base.Comparer
   180  
   181  	// metaTimestamp is used to provide a ordering over certain operations like
   182  	// iter creation, updates to keys. Keeping track of the timestamp allows us
   183  	// to make determinations such as whether a key will be visible to an
   184  	// iterator.
   185  	metaTimestamp int
   186  
   187  	// byObjKey tracks the state for each (writer, key) pair. It refers to the
   188  	// same *keyMeta as in the byObj slices. Using a map allows for fast state
   189  	// lookups when changing the state based on a writer operation on the key.
   190  	byObjKey map[string]*keyMeta
   191  	// List of keys per writer, and what has happened to it in that writer.
   192  	// Will be transferred when needed.
   193  	byObj map[objID][]*keyMeta
   194  	// boundsByObj holds user key bounds encompassing all the keys set within an
   195  	// object. It's updated within `update` when a new op is generated. It's
   196  	// used when determining whether an ingestion should succeed or not.
   197  	boundsByObj map[objID]*bounds
   198  
   199  	// globalKeys represents all the keys that have been generated so far. Not
   200  	// all these keys have been written to. globalKeys is sorted.
   201  	globalKeys [][]byte
   202  	// globalKeysMap contains the same keys as globalKeys but in a map. It
   203  	// ensures no duplication.
   204  	globalKeysMap map[string]bool
   205  	// globalKeyPrefixes contains all the key prefixes (as defined by the
   206  	// comparer's Split) generated so far. globalKeyPrefixes is sorted.
   207  	globalKeyPrefixes [][]byte
   208  	// globalKeyPrefixesMap contains the same keys as globalKeyPrefixes. It
   209  	// ensures no duplication.
   210  	globalKeyPrefixesMap map[string]struct{}
   211  }
   212  
   213  func (k *keyManager) nextMetaTimestamp() int {
   214  	ret := k.metaTimestamp
   215  	k.metaTimestamp++
   216  	return ret
   217  }
   218  
   219  // newKeyManager returns a pointer to a new keyManager. Callers should
   220  // interact with this using addNewKey, knownKeys, update methods only.
   221  func newKeyManager(numInstances int) *keyManager {
   222  	m := &keyManager{
   223  		comparer:             testkeys.Comparer,
   224  		byObjKey:             make(map[string]*keyMeta),
   225  		byObj:                make(map[objID][]*keyMeta),
   226  		boundsByObj:          make(map[objID]*bounds),
   227  		globalKeysMap:        make(map[string]bool),
   228  		globalKeyPrefixesMap: make(map[string]struct{}),
   229  	}
   230  	for i := 1; i <= max(numInstances, 1); i++ {
   231  		m.byObj[makeObjID(dbTag, uint32(i))] = []*keyMeta{}
   232  	}
   233  	return m
   234  }
   235  
   236  // addNewKey adds the given key to the key manager for global key tracking.
   237  // Returns false iff this is not a new key.
   238  func (k *keyManager) addNewKey(key []byte) bool {
   239  	if k.globalKeysMap[string(key)] {
   240  		return false
   241  	}
   242  	insertSorted(k.comparer.Compare, &k.globalKeys, key)
   243  	k.globalKeysMap[string(key)] = true
   244  
   245  	prefixLen := k.comparer.Split(key)
   246  	if _, ok := k.globalKeyPrefixesMap[string(key[:prefixLen])]; !ok {
   247  		insertSorted(k.comparer.Compare, &k.globalKeyPrefixes, key[:prefixLen])
   248  		k.globalKeyPrefixesMap[string(key[:prefixLen])] = struct{}{}
   249  	}
   250  	return true
   251  }
   252  
   253  // getOrInit returns the keyMeta for the (objID, key) pair, if it exists, else
   254  // allocates, initializes and returns a new value.
   255  func (k *keyManager) getOrInit(id objID, key []byte) *keyMeta {
   256  	o := makeObjKey(id, key)
   257  	m, ok := k.byObjKey[o.String()]
   258  	if ok {
   259  		return m
   260  	}
   261  	m = &keyMeta{objKey: makeObjKey(id, key)}
   262  	// Initialize the key-to-meta index.
   263  	k.byObjKey[o.String()] = m
   264  	// Add to the id-to-metas slide.
   265  	k.byObj[o.id] = append(k.byObj[o.id], m)
   266  
   267  	// Expand the object's bounds to contain this key if they don't already.
   268  	k.expandBounds(id, bounds{
   269  		smallest: key,
   270  		largest:  key,
   271  	})
   272  	return m
   273  }
   274  
   275  // mergeKeysInto merges all metadata for all keys associated with the "from" ID
   276  // with the metadata for keys associated with the "to" ID.
   277  func (k *keyManager) mergeKeysInto(from, to objID, mergeFunc func(src, dst *keyMeta, ts int)) {
   278  	msFrom, ok := k.byObj[from]
   279  	if !ok {
   280  		msFrom = []*keyMeta{}
   281  		k.byObj[from] = msFrom
   282  	}
   283  	msTo, ok := k.byObj[to]
   284  	if !ok {
   285  		msTo = []*keyMeta{}
   286  		k.byObj[to] = msTo
   287  	}
   288  
   289  	// Sort to facilitate a merge.
   290  	slices.SortFunc(msFrom, func(a, b *keyMeta) int {
   291  		return bytes.Compare(a.key, b.key)
   292  	})
   293  	slices.SortFunc(msTo, func(a, b *keyMeta) int {
   294  		return bytes.Compare(a.key, b.key)
   295  	})
   296  
   297  	ts := k.nextMetaTimestamp()
   298  	var msNew []*keyMeta
   299  	var iTo int
   300  	for _, m := range msFrom {
   301  		// Move cursor on mTo forward.
   302  		for iTo < len(msTo) && bytes.Compare(msTo[iTo].key, m.key) < 0 {
   303  			msNew = append(msNew, msTo[iTo])
   304  			iTo++
   305  		}
   306  
   307  		var mTo *keyMeta
   308  		if iTo < len(msTo) && bytes.Equal(msTo[iTo].key, m.key) {
   309  			mTo = msTo[iTo]
   310  			iTo++
   311  		} else {
   312  			mTo = &keyMeta{objKey: makeObjKey(to, m.key)}
   313  			k.byObjKey[mTo.String()] = mTo
   314  		}
   315  
   316  		mergeFunc(m, mTo, ts)
   317  		msNew = append(msNew, mTo)
   318  
   319  		delete(k.byObjKey, m.String()) // Unlink "from".
   320  	}
   321  
   322  	// Add any remaining items from the "to" set.
   323  	for iTo < len(msTo) {
   324  		msNew = append(msNew, msTo[iTo])
   325  		iTo++
   326  	}
   327  
   328  	// All the keys in `from` have been merged into `to`. Expand `to`'s bounds
   329  	// to be at least as wide as `from`'s.
   330  	if fromBounds := k.boundsByObj[from]; fromBounds != nil {
   331  		k.expandBounds(to, *fromBounds)
   332  	}
   333  	k.byObj[to] = msNew         // Update "to" obj.
   334  	delete(k.byObj, from)       // Unlink "from" obj.
   335  	delete(k.boundsByObj, from) // Unlink "from" bounds.
   336  }
   337  
   338  // expandBounds expands the incrementally maintained bounds of o to be at least
   339  // as wide as `b`.
   340  func (k *keyManager) expandBounds(o objID, b bounds) {
   341  	existing, ok := k.boundsByObj[o]
   342  	if !ok {
   343  		existing = new(bounds)
   344  		*existing = b
   345  		k.boundsByObj[o] = existing
   346  		return
   347  	}
   348  	b.mergeInto(k.comparer.Compare, existing)
   349  }
   350  
   351  // doObjectBoundsOverlap returns true iff any of the named objects have key
   352  // bounds that overlap any other named object.
   353  func (k *keyManager) doObjectBoundsOverlap(objIDs []objID) bool {
   354  	for i := range objIDs {
   355  		ib, iok := k.boundsByObj[objIDs[i]]
   356  		if !iok {
   357  			continue
   358  		}
   359  		for j := i + 1; j < len(objIDs); j++ {
   360  			jb, jok := k.boundsByObj[objIDs[j]]
   361  			if !jok {
   362  				continue
   363  			}
   364  			if ib.overlaps(k.comparer.Compare, jb) {
   365  				return true
   366  			}
   367  		}
   368  	}
   369  	return false
   370  }
   371  
   372  // checkForSingleDelConflicts examines all the keys written to srcObj, and
   373  // determines whether any of the contained single deletes would be
   374  // nondeterministic if applied to dstObj in dstObj's current state. It returns a
   375  // slice of all the keys that are found to conflict. In order to preserve
   376  // determinism, the caller must delete the key from the destination before
   377  // writing src's mutations to dst in order to ensure determinism.
   378  //
   379  // It takes a `srcCollapsed` parameter that determines whether the source
   380  // history should be "collapsed" (see keyHistory.collapsed) before determining
   381  // whether the applied state will conflict. This is required to facilitate
   382  // ingestOps which are NOT equivalent to committing the batch, because they can
   383  // only commit 1 internal point key at each unique user key.
   384  func (k *keyManager) checkForSingleDelConflicts(srcObj, dstObj objID, srcCollapsed bool) [][]byte {
   385  	var conflicts [][]byte
   386  	for _, src := range k.byObj[srcObj] {
   387  		// Single delete generation logic already ensures that both srcObj and
   388  		// dstObj's single deletes are deterministic within the context of their
   389  		// existing writes. However, applying srcObj on top of dstObj may
   390  		// violate the invariants. Consider:
   391  		//
   392  		//    src: a.SET; a.SINGLEDEL;
   393  		//    dst: a.SET;
   394  		//
   395  		// The merged view is:
   396  		//
   397  		//    a.SET; a.SET; a.SINGLEDEL;
   398  		//
   399  		// This is invalid, because there is more than 1 value mutation of the
   400  		// key before the single delete.
   401  		//
   402  		// We walk the source object's history in chronological order, looking
   403  		// for a single delete that was written before a DEL/RANGEDEL. (NB: We
   404  		// don't need to look beyond a DEL/RANGEDEL, because these deletes bound
   405  		// any subsequently-written single deletes to applying to the keys
   406  		// within src's history between the two tombstones. We already know from
   407  		// per-object history invariants that any such single delete must be
   408  		// deterministic with respect to src's keys.)
   409  		var srcHasUnboundedSingleDelete bool
   410  		var srcValuesBeforeSingleDelete int
   411  
   412  		// When the srcObj is being ingested (srcCollapsed=t), the semantics
   413  		// change. We must first "collapse" the key's history to represent the
   414  		// ingestion semantics.
   415  		srcHistory := src.history
   416  		if srcCollapsed {
   417  			srcHistory = src.history.collapsed()
   418  		}
   419  
   420  	srcloop:
   421  		for _, item := range srcHistory {
   422  			switch item.opType {
   423  			case writerDelete, writerDeleteRange:
   424  				// We found a DEL or RANGEDEL before any single delete. If src
   425  				// contains additional single deletes, their effects are limited
   426  				// to applying to later keys. Combining the two object histories
   427  				// doesn't pose any determinism risk.
   428  				break srcloop
   429  			case writerSingleDelete:
   430  				// We found a single delete. Since we found this single delete
   431  				// before a DEL or RANGEDEL, this delete has the potential to
   432  				// affect the visibility of keys in `dstObj`. We'll need to look
   433  				// for potential conflicts down below.
   434  				srcHasUnboundedSingleDelete = true
   435  				if srcValuesBeforeSingleDelete > 1 {
   436  					panic(errors.AssertionFailedf("unexpectedly found %d sets/merges within %s before single del",
   437  						srcValuesBeforeSingleDelete, srcObj))
   438  				}
   439  				break srcloop
   440  			case writerSet, writerMerge:
   441  				// We found a SET or MERGE operation for this key. If there's a
   442  				// subsequent single delete, we'll need to make sure there's not
   443  				// a SET or MERGE in the dst too.
   444  				srcValuesBeforeSingleDelete++
   445  			default:
   446  				panic(errors.AssertionFailedf("unexpected optype %d", item.opType))
   447  			}
   448  		}
   449  		if !srcHasUnboundedSingleDelete {
   450  			continue
   451  		}
   452  
   453  		dst, ok := k.byObjKey[makeObjKey(dstObj, src.key).String()]
   454  		// If the destination writer has no record of the key, the combined key
   455  		// history is simply the src object's key history which is valid due to
   456  		// per-object single deletion invariants.
   457  		if !ok {
   458  			continue
   459  		}
   460  
   461  		// We need to examine the trailing key history on dst.
   462  		consecutiveValues := srcValuesBeforeSingleDelete
   463  	dstloop:
   464  		for i := len(dst.history) - 1; i >= 0; i-- {
   465  			switch dst.history[i].opType {
   466  			case writerSet, writerMerge:
   467  				// A SET/MERGE may conflict if there's more than 1 consecutive
   468  				// SET/MERGEs.
   469  				consecutiveValues++
   470  				if consecutiveValues > 1 {
   471  					conflicts = append(conflicts, src.key)
   472  					break dstloop
   473  				}
   474  			case writerDelete, writerSingleDelete, writerDeleteRange:
   475  				// Dels clear the history, enabling use of single delete.
   476  				break dstloop
   477  			default:
   478  				panic(errors.AssertionFailedf("unexpected optype %d", dst.history[i].opType))
   479  			}
   480  		}
   481  	}
   482  	return conflicts
   483  }
   484  
   485  // update updates the internal state of the keyManager according to the given
   486  // op.
   487  func (k *keyManager) update(o op) {
   488  	switch s := o.(type) {
   489  	case *setOp:
   490  		meta := k.getOrInit(s.writerID, s.key)
   491  		meta.history = append(meta.history, keyHistoryItem{
   492  			opType:        writerSet,
   493  			metaTimestamp: k.nextMetaTimestamp(),
   494  		})
   495  	case *mergeOp:
   496  		meta := k.getOrInit(s.writerID, s.key)
   497  		meta.history = append(meta.history, keyHistoryItem{
   498  			opType:        writerMerge,
   499  			metaTimestamp: k.nextMetaTimestamp(),
   500  		})
   501  	case *deleteOp:
   502  		meta := k.getOrInit(s.writerID, s.key)
   503  		if meta.objKey.id.tag() == dbTag {
   504  			meta.clear()
   505  		} else {
   506  			meta.history = append(meta.history, keyHistoryItem{
   507  				opType:        writerDelete,
   508  				metaTimestamp: k.nextMetaTimestamp(),
   509  			})
   510  		}
   511  	case *deleteRangeOp:
   512  		// We track the history of discrete point keys, but a range deletion
   513  		// applies over a continuous key span of infinite keys. However, the key
   514  		// manager knows all keys that have been used in all operations, so we
   515  		// can discretize the range tombstone by adding it to every known key
   516  		// within the range.
   517  		ts := k.nextMetaTimestamp()
   518  		keyRange := pebble.KeyRange{Start: s.start, End: s.end}
   519  		for _, key := range k.knownKeysInRange(keyRange) {
   520  			meta := k.getOrInit(s.writerID, key)
   521  			if meta.objKey.id.tag() == dbTag {
   522  				meta.clear()
   523  			} else {
   524  				meta.history = append(meta.history, keyHistoryItem{
   525  					opType:        writerDeleteRange,
   526  					metaTimestamp: ts,
   527  				})
   528  			}
   529  		}
   530  		k.expandBounds(s.writerID, bounds{
   531  			smallest:    s.start,
   532  			largest:     s.end,
   533  			largestExcl: true,
   534  		})
   535  	case *singleDeleteOp:
   536  		meta := k.getOrInit(s.writerID, s.key)
   537  		meta.history = append(meta.history, keyHistoryItem{
   538  			opType:        writerSingleDelete,
   539  			metaTimestamp: k.nextMetaTimestamp(),
   540  		})
   541  
   542  	case *ingestOp:
   543  		// Some ingestion operations may attempt to ingest overlapping sstables
   544  		// which is prohibited. We know at generation time whether these
   545  		// ingestions will be successful. If they won't be successful, we should
   546  		// not update the key state because both the batch(es) and target DB
   547  		// will be left unmodified.
   548  		if k.doObjectBoundsOverlap(s.batchIDs) {
   549  			// This ingestion will fail.
   550  			return
   551  		}
   552  
   553  		// For each batch, merge the keys into the DB. We can't call
   554  		// keyMeta.mergeInto directly to merge, because ingest operations first
   555  		// "flatten" the batch (because you can't set the same key twice at a
   556  		// single sequence number). Instead we compute the collapsed history and
   557  		// merge that.
   558  		for _, batchID := range s.batchIDs {
   559  			k.mergeKeysInto(batchID, s.dbID, func(src, dst *keyMeta, ts int) {
   560  				collapsedSrc := keyMeta{
   561  					objKey:  src.objKey,
   562  					history: src.history.collapsed(),
   563  				}
   564  				collapsedSrc.mergeInto(dst, ts)
   565  			})
   566  		}
   567  		// TODO(bilal): Handle ingestAndExciseOp and replicateOp here.
   568  	case *applyOp:
   569  		// Merge the keys from this writer into the parent writer.
   570  		k.mergeKeysInto(s.batchID, s.writerID, (*keyMeta).mergeInto)
   571  	case *batchCommitOp:
   572  		// Merge the keys from the batch with the keys from the DB.
   573  		k.mergeKeysInto(s.batchID, s.dbID, (*keyMeta).mergeInto)
   574  	}
   575  }
   576  
   577  func (k *keyManager) knownKeys() (keys [][]byte) {
   578  	return k.globalKeys
   579  }
   580  
   581  // knownKeysInRange returns all eligible read keys within the range
   582  // [start,end). The returned slice is owned by the keyManager and must not be
   583  // retained.
   584  func (k *keyManager) knownKeysInRange(kr pebble.KeyRange) (keys [][]byte) {
   585  	s, _ := slices.BinarySearchFunc(k.globalKeys, kr.Start, k.comparer.Compare)
   586  	e, _ := slices.BinarySearchFunc(k.globalKeys, kr.End, k.comparer.Compare)
   587  	if s >= e {
   588  		return nil
   589  	}
   590  	return k.globalKeys[s:e]
   591  }
   592  
   593  func (k *keyManager) prefixes() (prefixes [][]byte) {
   594  	return k.globalKeyPrefixes
   595  }
   596  
   597  // prefixExists returns true if a key has been generated with the provided
   598  // prefix before.
   599  func (k *keyManager) prefixExists(prefix []byte) bool {
   600  	_, exists := k.globalKeyPrefixesMap[string(prefix)]
   601  	return exists
   602  }
   603  
   604  // eligibleSingleDeleteKeys returns a slice of keys that can be safely single
   605  // deleted, given the writer id. Restricting single delete keys through this
   606  // method is used to ensure the OLW1 guarantee (see the keyManager comment) for
   607  // the provided object ID.
   608  func (k *keyManager) eligibleSingleDeleteKeys(o objID) (keys [][]byte) {
   609  	// Creating a slice of keys is wasteful given that the caller will pick one,
   610  	// but makes it simpler for unit testing.
   611  	for _, key := range k.globalKeys {
   612  		objKey := makeObjKey(o, key)
   613  		meta, ok := k.byObjKey[objKey.String()]
   614  		if !ok {
   615  			keys = append(keys, key)
   616  			continue
   617  		}
   618  		// Examine the history within this object.
   619  		if meta.history.canSingleDelete() {
   620  			keys = append(keys, key)
   621  		}
   622  	}
   623  	return keys
   624  }
   625  
   626  // a keyHistoryItem describes an individual operation performed on a key.
   627  type keyHistoryItem struct {
   628  	// opType may be writerSet, writerDelete, writerSingleDelete,
   629  	// writerDeleteRange or writerMerge only. No other opTypes may appear here.
   630  	opType        opType
   631  	metaTimestamp int
   632  }
   633  
   634  // keyHistory captures the history of mutations to a key in chronological order.
   635  type keyHistory []keyHistoryItem
   636  
   637  // before returns the subslice of the key history that happened strictly before
   638  // the provided meta timestamp.
   639  func (h keyHistory) before(metaTimestamp int) keyHistory {
   640  	i, _ := slices.BinarySearchFunc(h, metaTimestamp, func(a keyHistoryItem, ts int) int {
   641  		return cmp.Compare(a.metaTimestamp, ts)
   642  	})
   643  	return h[:i]
   644  }
   645  
   646  // canSingleDelete examines the tail of the history and returns true if a single
   647  // delete appended to this history would satisfy the single delete invariants.
   648  func (h keyHistory) canSingleDelete() bool {
   649  	if len(h) == 0 {
   650  		return true
   651  	}
   652  	switch o := h[len(h)-1].opType; o {
   653  	case writerDelete, writerDeleteRange, writerSingleDelete:
   654  		return true
   655  	case writerSet, writerMerge:
   656  		if len(h) == 1 {
   657  			return true
   658  		}
   659  		return h[len(h)-2].opType.isDelete()
   660  	default:
   661  		panic(errors.AssertionFailedf("unexpected writer op %v", o))
   662  	}
   663  }
   664  
   665  func (h keyHistory) String() string {
   666  	var sb strings.Builder
   667  	for i, it := range h {
   668  		if i > 0 {
   669  			fmt.Fprint(&sb, ", ")
   670  		}
   671  		switch it.opType {
   672  		case writerDelete:
   673  			fmt.Fprint(&sb, "del")
   674  		case writerDeleteRange:
   675  			fmt.Fprint(&sb, "delrange")
   676  		case writerSingleDelete:
   677  			fmt.Fprint(&sb, "singledel")
   678  		case writerSet:
   679  			fmt.Fprint(&sb, "set")
   680  		case writerMerge:
   681  			fmt.Fprint(&sb, "merge")
   682  		default:
   683  			fmt.Fprintf(&sb, "optype[v=%d]", it.opType)
   684  		}
   685  		fmt.Fprintf(&sb, "(%d)", it.metaTimestamp)
   686  	}
   687  	return sb.String()
   688  }
   689  
   690  // hasVisibleKey examines the tail of the history and returns true if the
   691  // history should end in a visible value for this key.
   692  func (h keyHistory) hasVisibleValue() bool {
   693  	if len(h) == 0 {
   694  		return false
   695  	}
   696  	return !h[len(h)-1].opType.isDelete()
   697  }
   698  
   699  // collapsed returns a new key history that's equivalent to the history created
   700  // by an ingestOp that "collapses" a batch's keys. See ingestOp.build.
   701  func (h keyHistory) collapsed() keyHistory {
   702  	var ret keyHistory
   703  	// When collapsing a batch, any range deletes are semantically applied
   704  	// first. Look for any range deletes and apply them.
   705  	for _, op := range h {
   706  		if op.opType == writerDeleteRange {
   707  			ret = append(ret, op)
   708  			break
   709  		}
   710  	}
   711  	// Among point keys, the most recently written key wins.
   712  	for i := len(h) - 1; i >= 0; i-- {
   713  		if h[i].opType != writerDeleteRange {
   714  			ret = append(ret, h[i])
   715  			break
   716  		}
   717  	}
   718  	return ret
   719  }
   720  
   721  func opWrittenKeys(untypedOp op) [][]byte {
   722  	switch t := untypedOp.(type) {
   723  	case *applyOp:
   724  	case *batchCommitOp:
   725  	case *checkpointOp:
   726  	case *closeOp:
   727  	case *compactOp:
   728  	case *dbRestartOp:
   729  	case *deleteOp:
   730  		return [][]byte{t.key}
   731  	case *deleteRangeOp:
   732  		return [][]byte{t.start, t.end}
   733  	case *flushOp:
   734  	case *getOp:
   735  	case *ingestOp:
   736  	case *initOp:
   737  	case *iterFirstOp:
   738  	case *iterLastOp:
   739  	case *iterNextOp:
   740  	case *iterNextPrefixOp:
   741  	case *iterCanSingleDelOp:
   742  	case *iterPrevOp:
   743  	case *iterSeekGEOp:
   744  	case *iterSeekLTOp:
   745  	case *iterSeekPrefixGEOp:
   746  	case *iterSetBoundsOp:
   747  	case *iterSetOptionsOp:
   748  	case *mergeOp:
   749  		return [][]byte{t.key}
   750  	case *newBatchOp:
   751  	case *newIndexedBatchOp:
   752  	case *newIterOp:
   753  	case *newIterUsingCloneOp:
   754  	case *newSnapshotOp:
   755  	case *rangeKeyDeleteOp:
   756  	case *rangeKeySetOp:
   757  	case *rangeKeyUnsetOp:
   758  	case *setOp:
   759  		return [][]byte{t.key}
   760  	case *singleDeleteOp:
   761  		return [][]byte{t.key}
   762  	case *replicateOp:
   763  		return [][]byte{t.start, t.end}
   764  	}
   765  	return nil
   766  }
   767  
   768  func loadPrecedingKeys(t TestingT, ops []op, cfg *config, m *keyManager) {
   769  	for _, op := range ops {
   770  		// Pretend we're generating all the operation's keys as potential new
   771  		// key, so that we update the key manager's keys and prefix sets.
   772  		for _, k := range opWrittenKeys(op) {
   773  			m.addNewKey(k)
   774  
   775  			// If the key has a suffix, ratchet up the suffix distribution if
   776  			// necessary.
   777  			if s := m.comparer.Split(k); s < len(k) {
   778  				suffix, err := testkeys.ParseSuffix(k[s:])
   779  				require.NoError(t, err)
   780  				if uint64(suffix) > cfg.writeSuffixDist.Max() {
   781  					diff := int(uint64(suffix) - cfg.writeSuffixDist.Max())
   782  					cfg.writeSuffixDist.IncMax(diff)
   783  				}
   784  			}
   785  		}
   786  
   787  		// Update key tracking state.
   788  		m.update(op)
   789  	}
   790  }
   791  
   792  func insertSorted(cmp base.Compare, dst *[][]byte, k []byte) {
   793  	s := *dst
   794  	i, _ := slices.BinarySearchFunc(s, k, cmp)
   795  	*dst = slices.Insert(s, i, k)
   796  }