github.com/cockroachdb/pebble@v0.0.0-20231214172447-ab4952c5f87b/metamorphic/ops.go (about)

     1  // Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
     2  // of this source code is governed by a BSD-style license that can be found in
     3  // the LICENSE file.
     4  
     5  package metamorphic
     6  
     7  import (
     8  	"bytes"
     9  	"context"
    10  	"crypto/rand"
    11  	"encoding/binary"
    12  	"fmt"
    13  	"io"
    14  	"path"
    15  	"path/filepath"
    16  	"strings"
    17  
    18  	"github.com/cockroachdb/errors"
    19  	"github.com/cockroachdb/pebble"
    20  	"github.com/cockroachdb/pebble/internal/base"
    21  	"github.com/cockroachdb/pebble/internal/keyspan"
    22  	"github.com/cockroachdb/pebble/internal/private"
    23  	"github.com/cockroachdb/pebble/internal/rangekey"
    24  	"github.com/cockroachdb/pebble/internal/testkeys"
    25  	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
    26  	"github.com/cockroachdb/pebble/sstable"
    27  	"github.com/cockroachdb/pebble/vfs/errorfs"
    28  )
    29  
    30  // op defines the interface for a single operation, such as creating a batch,
    31  // or advancing an iterator.
    32  type op interface {
    33  	String() string
    34  	run(t *test, h historyRecorder)
    35  
    36  	// receiver returns the object ID of the object the operation is performed
    37  	// on. Every operation has a receiver (eg, batch0.Set(...) has `batch0` as
    38  	// its receiver). Receivers are used for synchronization when running with
    39  	// concurrency.
    40  	receiver() objID
    41  
    42  	// syncObjs returns an additional set of object IDs—excluding the
    43  	// receiver—that the operation must synchronize with. At execution time,
    44  	// the operation will run serially with respect to all other operations
    45  	// that return these objects from their own syncObjs or receiver methods.
    46  	syncObjs() objIDSlice
    47  }
    48  
    49  // initOp performs test initialization
    50  type initOp struct {
    51  	dbSlots       uint32
    52  	batchSlots    uint32
    53  	iterSlots     uint32
    54  	snapshotSlots uint32
    55  }
    56  
    57  func (o *initOp) run(t *test, h historyRecorder) {
    58  	t.batches = make([]*pebble.Batch, o.batchSlots)
    59  	t.iters = make([]*retryableIter, o.iterSlots)
    60  	t.snapshots = make([]readerCloser, o.snapshotSlots)
    61  	h.Recordf("%s", o)
    62  }
    63  
    64  func (o *initOp) String() string {
    65  	return fmt.Sprintf("Init(%d /* dbs */, %d /* batches */, %d /* iters */, %d /* snapshots */)",
    66  		o.dbSlots, o.batchSlots, o.iterSlots, o.snapshotSlots)
    67  }
    68  
    69  func (o *initOp) receiver() objID { return makeObjID(dbTag, 1) }
    70  func (o *initOp) syncObjs() objIDSlice {
    71  	syncObjs := make([]objID, 0)
    72  	// Add any additional DBs to syncObjs.
    73  	for i := uint32(2); i < o.dbSlots+1; i++ {
    74  		syncObjs = append(syncObjs, makeObjID(dbTag, i))
    75  	}
    76  	return syncObjs
    77  }
    78  
    79  // applyOp models a Writer.Apply operation.
    80  type applyOp struct {
    81  	writerID objID
    82  	batchID  objID
    83  }
    84  
    85  func (o *applyOp) run(t *test, h historyRecorder) {
    86  	b := t.getBatch(o.batchID)
    87  	w := t.getWriter(o.writerID)
    88  	var err error
    89  	if o.writerID.tag() == dbTag && t.testOpts.asyncApplyToDB && t.writeOpts.Sync {
    90  		err = w.(*pebble.DB).ApplyNoSyncWait(b, t.writeOpts)
    91  		if err == nil {
    92  			err = b.SyncWait()
    93  		}
    94  	} else {
    95  		err = w.Apply(b, t.writeOpts)
    96  	}
    97  	h.Recordf("%s // %v", o, err)
    98  	// batch will be closed by a closeOp which is guaranteed to be generated
    99  }
   100  
   101  func (o *applyOp) String() string  { return fmt.Sprintf("%s.Apply(%s)", o.writerID, o.batchID) }
   102  func (o *applyOp) receiver() objID { return o.writerID }
   103  func (o *applyOp) syncObjs() objIDSlice {
   104  	// Apply should not be concurrent with operations that are mutating the
   105  	// batch.
   106  	return []objID{o.batchID}
   107  }
   108  
   109  // checkpointOp models a DB.Checkpoint operation.
   110  type checkpointOp struct {
   111  	dbID objID
   112  	// If non-empty, the checkpoint is restricted to these spans.
   113  	spans []pebble.CheckpointSpan
   114  }
   115  
   116  func (o *checkpointOp) run(t *test, h historyRecorder) {
   117  	// TODO(josh): db.Checkpoint does not work with shared storage yet.
   118  	// It would be better to filter out ahead of calling run on the op,
   119  	// by setting the weight that generator.go uses to zero, or similar.
   120  	// But IIUC the ops are shared for ALL the metamorphic test runs, so
   121  	// not sure how to do that easily:
   122  	// https://github.com/cockroachdb/pebble/blob/master/metamorphic/meta.go#L177
   123  	if t.testOpts.sharedStorageEnabled {
   124  		h.Recordf("%s // %v", o, nil)
   125  		return
   126  	}
   127  	var opts []pebble.CheckpointOption
   128  	if len(o.spans) > 0 {
   129  		opts = append(opts, pebble.WithRestrictToSpans(o.spans))
   130  	}
   131  	db := t.getDB(o.dbID)
   132  	err := withRetries(func() error {
   133  		return db.Checkpoint(o.dir(t.dir, h.op), opts...)
   134  	})
   135  	h.Recordf("%s // %v", o, err)
   136  }
   137  
   138  func (o *checkpointOp) dir(dataDir string, idx int) string {
   139  	return filepath.Join(dataDir, "checkpoints", fmt.Sprintf("op-%06d", idx))
   140  }
   141  
   142  func (o *checkpointOp) String() string {
   143  	var spanStr bytes.Buffer
   144  	for i, span := range o.spans {
   145  		if i > 0 {
   146  			spanStr.WriteString(",")
   147  		}
   148  		fmt.Fprintf(&spanStr, "%q,%q", span.Start, span.End)
   149  	}
   150  	return fmt.Sprintf("%s.Checkpoint(%s)", o.dbID, spanStr.String())
   151  }
   152  
   153  func (o *checkpointOp) receiver() objID      { return o.dbID }
   154  func (o *checkpointOp) syncObjs() objIDSlice { return nil }
   155  
   156  // closeOp models a {Batch,Iterator,Snapshot}.Close operation.
   157  type closeOp struct {
   158  	objID       objID
   159  	derivedDBID objID
   160  }
   161  
   162  func (o *closeOp) run(t *test, h historyRecorder) {
   163  	c := t.getCloser(o.objID)
   164  	if o.objID.tag() == dbTag && t.opts.DisableWAL {
   165  		// Special case: If WAL is disabled, do a flush right before DB Close. This
   166  		// allows us to reuse this run's data directory as initial state for
   167  		// future runs without losing any mutations.
   168  		_ = t.getDB(o.objID).Flush()
   169  	}
   170  	t.clearObj(o.objID)
   171  	err := c.Close()
   172  	h.Recordf("%s // %v", o, err)
   173  }
   174  
   175  func (o *closeOp) String() string  { return fmt.Sprintf("%s.Close()", o.objID) }
   176  func (o *closeOp) receiver() objID { return o.objID }
   177  func (o *closeOp) syncObjs() objIDSlice {
   178  	// Synchronize on the database so that we don't close the database before
   179  	// all its iterators, snapshots and batches are closed.
   180  	// TODO(jackson): It would be nice to relax this so that Close calls can
   181  	// execute in parallel.
   182  	if o.objID.tag() == dbTag {
   183  		return nil
   184  	}
   185  	if o.derivedDBID != 0 {
   186  		return []objID{o.derivedDBID}
   187  	}
   188  	return nil
   189  }
   190  
   191  // compactOp models a DB.Compact operation.
   192  type compactOp struct {
   193  	dbID        objID
   194  	start       []byte
   195  	end         []byte
   196  	parallelize bool
   197  }
   198  
   199  func (o *compactOp) run(t *test, h historyRecorder) {
   200  	err := withRetries(func() error {
   201  		return t.getDB(o.dbID).Compact(o.start, o.end, o.parallelize)
   202  	})
   203  	h.Recordf("%s // %v", o, err)
   204  }
   205  
   206  func (o *compactOp) String() string {
   207  	return fmt.Sprintf("%s.Compact(%q, %q, %t /* parallelize */)", o.dbID, o.start, o.end, o.parallelize)
   208  }
   209  
   210  func (o *compactOp) receiver() objID      { return o.dbID }
   211  func (o *compactOp) syncObjs() objIDSlice { return nil }
   212  
   213  // deleteOp models a Write.Delete operation.
   214  type deleteOp struct {
   215  	writerID objID
   216  	key      []byte
   217  
   218  	derivedDBID objID
   219  }
   220  
   221  func (o *deleteOp) run(t *test, h historyRecorder) {
   222  	w := t.getWriter(o.writerID)
   223  	var err error
   224  	if t.testOpts.deleteSized && t.isFMV(o.derivedDBID, pebble.FormatDeleteSizedAndObsolete) {
   225  		// Call DeleteSized with a deterministic size derived from the index.
   226  		// The size does not need to be accurate for correctness.
   227  		err = w.DeleteSized(o.key, hashSize(t.idx), t.writeOpts)
   228  	} else {
   229  		err = w.Delete(o.key, t.writeOpts)
   230  	}
   231  	h.Recordf("%s // %v", o, err)
   232  }
   233  
   234  func hashSize(index int) uint32 {
   235  	// Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
   236  	return uint32((11400714819323198485 * uint64(index)) % maxValueSize)
   237  }
   238  
   239  func (o *deleteOp) String() string {
   240  	return fmt.Sprintf("%s.Delete(%q)", o.writerID, o.key)
   241  }
   242  func (o *deleteOp) receiver() objID      { return o.writerID }
   243  func (o *deleteOp) syncObjs() objIDSlice { return nil }
   244  
   245  // singleDeleteOp models a Write.SingleDelete operation.
   246  type singleDeleteOp struct {
   247  	writerID           objID
   248  	key                []byte
   249  	maybeReplaceDelete bool
   250  }
   251  
   252  func (o *singleDeleteOp) run(t *test, h historyRecorder) {
   253  	w := t.getWriter(o.writerID)
   254  	var err error
   255  	if t.testOpts.replaceSingleDelete && o.maybeReplaceDelete {
   256  		err = w.Delete(o.key, t.writeOpts)
   257  	} else {
   258  		err = w.SingleDelete(o.key, t.writeOpts)
   259  	}
   260  	// NOTE: even if the SINGLEDEL was replaced with a DELETE, we must still
   261  	// write the former to the history log. The log line will indicate whether
   262  	// or not the delete *could* have been replaced. The OPTIONS file should
   263  	// also be consulted to determine what happened at runtime (i.e. by taking
   264  	// the logical AND).
   265  	h.Recordf("%s // %v", o, err)
   266  }
   267  
   268  func (o *singleDeleteOp) String() string {
   269  	return fmt.Sprintf("%s.SingleDelete(%q, %v /* maybeReplaceDelete */)", o.writerID, o.key, o.maybeReplaceDelete)
   270  }
   271  
   272  func (o *singleDeleteOp) receiver() objID      { return o.writerID }
   273  func (o *singleDeleteOp) syncObjs() objIDSlice { return nil }
   274  
   275  // deleteRangeOp models a Write.DeleteRange operation.
   276  type deleteRangeOp struct {
   277  	writerID objID
   278  	start    []byte
   279  	end      []byte
   280  }
   281  
   282  func (o *deleteRangeOp) run(t *test, h historyRecorder) {
   283  	w := t.getWriter(o.writerID)
   284  	err := w.DeleteRange(o.start, o.end, t.writeOpts)
   285  	h.Recordf("%s // %v", o, err)
   286  }
   287  
   288  func (o *deleteRangeOp) String() string {
   289  	return fmt.Sprintf("%s.DeleteRange(%q, %q)", o.writerID, o.start, o.end)
   290  }
   291  
   292  func (o *deleteRangeOp) receiver() objID      { return o.writerID }
   293  func (o *deleteRangeOp) syncObjs() objIDSlice { return nil }
   294  
   295  // flushOp models a DB.Flush operation.
   296  type flushOp struct {
   297  	db objID
   298  }
   299  
   300  func (o *flushOp) run(t *test, h historyRecorder) {
   301  	db := t.getDB(o.db)
   302  	err := db.Flush()
   303  	h.Recordf("%s // %v", o, err)
   304  }
   305  
   306  func (o *flushOp) String() string       { return fmt.Sprintf("%s.Flush()", o.db) }
   307  func (o *flushOp) receiver() objID      { return o.db }
   308  func (o *flushOp) syncObjs() objIDSlice { return nil }
   309  
   310  // mergeOp models a Write.Merge operation.
   311  type mergeOp struct {
   312  	writerID objID
   313  	key      []byte
   314  	value    []byte
   315  }
   316  
   317  func (o *mergeOp) run(t *test, h historyRecorder) {
   318  	w := t.getWriter(o.writerID)
   319  	err := w.Merge(o.key, o.value, t.writeOpts)
   320  	h.Recordf("%s // %v", o, err)
   321  }
   322  
   323  func (o *mergeOp) String() string       { return fmt.Sprintf("%s.Merge(%q, %q)", o.writerID, o.key, o.value) }
   324  func (o *mergeOp) receiver() objID      { return o.writerID }
   325  func (o *mergeOp) syncObjs() objIDSlice { return nil }
   326  
   327  // setOp models a Write.Set operation.
   328  type setOp struct {
   329  	writerID objID
   330  	key      []byte
   331  	value    []byte
   332  }
   333  
   334  func (o *setOp) run(t *test, h historyRecorder) {
   335  	w := t.getWriter(o.writerID)
   336  	err := w.Set(o.key, o.value, t.writeOpts)
   337  	h.Recordf("%s // %v", o, err)
   338  }
   339  
   340  func (o *setOp) String() string       { return fmt.Sprintf("%s.Set(%q, %q)", o.writerID, o.key, o.value) }
   341  func (o *setOp) receiver() objID      { return o.writerID }
   342  func (o *setOp) syncObjs() objIDSlice { return nil }
   343  
   344  // rangeKeyDeleteOp models a Write.RangeKeyDelete operation.
   345  type rangeKeyDeleteOp struct {
   346  	writerID objID
   347  	start    []byte
   348  	end      []byte
   349  }
   350  
   351  func (o *rangeKeyDeleteOp) run(t *test, h historyRecorder) {
   352  	w := t.getWriter(o.writerID)
   353  	err := w.RangeKeyDelete(o.start, o.end, t.writeOpts)
   354  	h.Recordf("%s // %v", o, err)
   355  }
   356  
   357  func (o *rangeKeyDeleteOp) String() string {
   358  	return fmt.Sprintf("%s.RangeKeyDelete(%q, %q)", o.writerID, o.start, o.end)
   359  }
   360  
   361  func (o *rangeKeyDeleteOp) receiver() objID      { return o.writerID }
   362  func (o *rangeKeyDeleteOp) syncObjs() objIDSlice { return nil }
   363  
   364  // rangeKeySetOp models a Write.RangeKeySet operation.
   365  type rangeKeySetOp struct {
   366  	writerID objID
   367  	start    []byte
   368  	end      []byte
   369  	suffix   []byte
   370  	value    []byte
   371  }
   372  
   373  func (o *rangeKeySetOp) run(t *test, h historyRecorder) {
   374  	w := t.getWriter(o.writerID)
   375  	err := w.RangeKeySet(o.start, o.end, o.suffix, o.value, t.writeOpts)
   376  	h.Recordf("%s // %v", o, err)
   377  }
   378  
   379  func (o *rangeKeySetOp) String() string {
   380  	return fmt.Sprintf("%s.RangeKeySet(%q, %q, %q, %q)",
   381  		o.writerID, o.start, o.end, o.suffix, o.value)
   382  }
   383  
   384  func (o *rangeKeySetOp) receiver() objID      { return o.writerID }
   385  func (o *rangeKeySetOp) syncObjs() objIDSlice { return nil }
   386  
   387  // rangeKeyUnsetOp models a Write.RangeKeyUnset operation.
   388  type rangeKeyUnsetOp struct {
   389  	writerID objID
   390  	start    []byte
   391  	end      []byte
   392  	suffix   []byte
   393  }
   394  
   395  func (o *rangeKeyUnsetOp) run(t *test, h historyRecorder) {
   396  	w := t.getWriter(o.writerID)
   397  	err := w.RangeKeyUnset(o.start, o.end, o.suffix, t.writeOpts)
   398  	h.Recordf("%s // %v", o, err)
   399  }
   400  
   401  func (o *rangeKeyUnsetOp) String() string {
   402  	return fmt.Sprintf("%s.RangeKeyUnset(%q, %q, %q)",
   403  		o.writerID, o.start, o.end, o.suffix)
   404  }
   405  
   406  func (o *rangeKeyUnsetOp) receiver() objID      { return o.writerID }
   407  func (o *rangeKeyUnsetOp) syncObjs() objIDSlice { return nil }
   408  
   409  // newBatchOp models a Write.NewBatch operation.
   410  type newBatchOp struct {
   411  	dbID    objID
   412  	batchID objID
   413  }
   414  
   415  func (o *newBatchOp) run(t *test, h historyRecorder) {
   416  	b := t.getDB(o.dbID).NewBatch()
   417  	t.setBatch(o.batchID, b)
   418  	h.Recordf("%s", o)
   419  }
   420  
   421  func (o *newBatchOp) String() string  { return fmt.Sprintf("%s = %s.NewBatch()", o.batchID, o.dbID) }
   422  func (o *newBatchOp) receiver() objID { return o.dbID }
   423  func (o *newBatchOp) syncObjs() objIDSlice {
   424  	// NewBatch should not be concurrent with operations that interact with that
   425  	// same batch.
   426  	return []objID{o.batchID}
   427  }
   428  
   429  // newIndexedBatchOp models a Write.NewIndexedBatch operation.
   430  type newIndexedBatchOp struct {
   431  	dbID    objID
   432  	batchID objID
   433  }
   434  
   435  func (o *newIndexedBatchOp) run(t *test, h historyRecorder) {
   436  	b := t.getDB(o.dbID).NewIndexedBatch()
   437  	t.setBatch(o.batchID, b)
   438  	h.Recordf("%s", o)
   439  }
   440  
   441  func (o *newIndexedBatchOp) String() string {
   442  	return fmt.Sprintf("%s = %s.NewIndexedBatch()", o.batchID, o.dbID)
   443  }
   444  func (o *newIndexedBatchOp) receiver() objID { return o.dbID }
   445  func (o *newIndexedBatchOp) syncObjs() objIDSlice {
   446  	// NewIndexedBatch should not be concurrent with operations that interact
   447  	// with that same batch.
   448  	return []objID{o.batchID}
   449  }
   450  
   451  // batchCommitOp models a Batch.Commit operation.
   452  type batchCommitOp struct {
   453  	dbID    objID
   454  	batchID objID
   455  }
   456  
   457  func (o *batchCommitOp) run(t *test, h historyRecorder) {
   458  	b := t.getBatch(o.batchID)
   459  	err := b.Commit(t.writeOpts)
   460  	h.Recordf("%s // %v", o, err)
   461  }
   462  
   463  func (o *batchCommitOp) String() string  { return fmt.Sprintf("%s.Commit()", o.batchID) }
   464  func (o *batchCommitOp) receiver() objID { return o.batchID }
   465  func (o *batchCommitOp) syncObjs() objIDSlice {
   466  	// Synchronize on the database so that NewIters wait for the commit.
   467  	return []objID{o.dbID}
   468  }
   469  
   470  // ingestOp models a DB.Ingest operation.
   471  type ingestOp struct {
   472  	dbID     objID
   473  	batchIDs []objID
   474  
   475  	derivedDBIDs []objID
   476  }
   477  
   478  func (o *ingestOp) run(t *test, h historyRecorder) {
   479  	// We can only use apply as an alternative for ingestion if we are ingesting
   480  	// a single batch. If we are ingesting multiple batches, the batches may
   481  	// overlap which would cause ingestion to fail but apply would succeed.
   482  	if t.testOpts.ingestUsingApply && len(o.batchIDs) == 1 && o.derivedDBIDs[0] == o.dbID {
   483  		id := o.batchIDs[0]
   484  		b := t.getBatch(id)
   485  		iter, rangeDelIter, rangeKeyIter := private.BatchSort(b)
   486  		db := t.getDB(o.dbID)
   487  		c, err := o.collapseBatch(t, db, iter, rangeDelIter, rangeKeyIter, b)
   488  		if err == nil {
   489  			err = db.Apply(c, t.writeOpts)
   490  		}
   491  		_ = b.Close()
   492  		_ = c.Close()
   493  		t.clearObj(id)
   494  		h.Recordf("%s // %v", o, err)
   495  		return
   496  	}
   497  
   498  	var paths []string
   499  	var err error
   500  	for i, id := range o.batchIDs {
   501  		b := t.getBatch(id)
   502  		t.clearObj(id)
   503  		path, err2 := o.build(t, h, b, i)
   504  		if err2 != nil {
   505  			h.Recordf("Build(%s) // %v", id, err2)
   506  		}
   507  		err = firstError(err, err2)
   508  		if err2 == nil {
   509  			paths = append(paths, path)
   510  		}
   511  		err = firstError(err, b.Close())
   512  	}
   513  
   514  	err = firstError(err, withRetries(func() error {
   515  		return t.getDB(o.dbID).Ingest(paths)
   516  	}))
   517  
   518  	h.Recordf("%s // %v", o, err)
   519  }
   520  
   521  func buildForIngest(
   522  	t *test, dbID objID, h historyRecorder, b *pebble.Batch, i int,
   523  ) (string, *sstable.WriterMetadata, error) {
   524  	path := t.opts.FS.PathJoin(t.tmpDir, fmt.Sprintf("ext%d-%d", dbID.slot(), i))
   525  	f, err := t.opts.FS.Create(path)
   526  	if err != nil {
   527  		return "", nil, err
   528  	}
   529  	db := t.getDB(dbID)
   530  
   531  	iter, rangeDelIter, rangeKeyIter := private.BatchSort(b)
   532  	defer closeIters(iter, rangeDelIter, rangeKeyIter)
   533  
   534  	equal := t.opts.Comparer.Equal
   535  	tableFormat := db.FormatMajorVersion().MaxTableFormat()
   536  	w := sstable.NewWriter(
   537  		objstorageprovider.NewFileWritable(f),
   538  		t.opts.MakeWriterOptions(0, tableFormat),
   539  	)
   540  
   541  	var lastUserKey []byte
   542  	for key, value := iter.First(); key != nil; key, value = iter.Next() {
   543  		// Ignore duplicate keys.
   544  		if equal(lastUserKey, key.UserKey) {
   545  			continue
   546  		}
   547  		// NB: We don't have to copy the key or value since we're reading from a
   548  		// batch which doesn't do prefix compression.
   549  		lastUserKey = key.UserKey
   550  
   551  		key.SetSeqNum(base.SeqNumZero)
   552  		// It's possible that we wrote the key on a batch from a db that supported
   553  		// DeleteSized, but are now ingesting into a db that does not. Detect
   554  		// this case and translate the key to an InternalKeyKindDelete.
   555  		if key.Kind() == pebble.InternalKeyKindDeleteSized && !t.isFMV(dbID, pebble.FormatDeleteSizedAndObsolete) {
   556  			value = pebble.LazyValue{}
   557  			key.SetKind(pebble.InternalKeyKindDelete)
   558  		}
   559  		if err := w.Add(*key, value.InPlaceValue()); err != nil {
   560  			return "", nil, err
   561  		}
   562  	}
   563  	if err := iter.Close(); err != nil {
   564  		return "", nil, err
   565  	}
   566  	iter = nil
   567  
   568  	if rangeDelIter != nil {
   569  		// NB: The range tombstones have already been fragmented by the Batch.
   570  		for t := rangeDelIter.First(); t != nil; t = rangeDelIter.Next() {
   571  			// NB: We don't have to copy the key or value since we're reading from a
   572  			// batch which doesn't do prefix compression.
   573  			if err := w.DeleteRange(t.Start, t.End); err != nil {
   574  				return "", nil, err
   575  			}
   576  		}
   577  		if err := rangeDelIter.Close(); err != nil {
   578  			return "", nil, err
   579  		}
   580  		rangeDelIter = nil
   581  	}
   582  
   583  	if rangeKeyIter != nil {
   584  		for span := rangeKeyIter.First(); span != nil; span = rangeKeyIter.Next() {
   585  			// Coalesce the keys of this span and then zero the sequence
   586  			// numbers. This is necessary in order to make the range keys within
   587  			// the ingested sstable internally consistent at the sequence number
   588  			// it's ingested at. The individual keys within a batch are
   589  			// committed at unique sequence numbers, whereas all the keys of an
   590  			// ingested sstable are given the same sequence number. A span
   591  			// contaning keys that both set and unset the same suffix at the
   592  			// same sequence number is nonsensical, so we "coalesce" or collapse
   593  			// the keys.
   594  			collapsed := keyspan.Span{
   595  				Start: span.Start,
   596  				End:   span.End,
   597  				Keys:  make([]keyspan.Key, 0, len(span.Keys)),
   598  			}
   599  			err = rangekey.Coalesce(t.opts.Comparer.Compare, equal, span.Keys, &collapsed.Keys)
   600  			if err != nil {
   601  				return "", nil, err
   602  			}
   603  			for i := range collapsed.Keys {
   604  				collapsed.Keys[i].Trailer = base.MakeTrailer(0, collapsed.Keys[i].Kind())
   605  			}
   606  			keyspan.SortKeysByTrailer(&collapsed.Keys)
   607  			if err := rangekey.Encode(&collapsed, w.AddRangeKey); err != nil {
   608  				return "", nil, err
   609  			}
   610  		}
   611  		if err := rangeKeyIter.Error(); err != nil {
   612  			return "", nil, err
   613  		}
   614  		if err := rangeKeyIter.Close(); err != nil {
   615  			return "", nil, err
   616  		}
   617  		rangeKeyIter = nil
   618  	}
   619  
   620  	if err := w.Close(); err != nil {
   621  		return "", nil, err
   622  	}
   623  	meta, err := w.Metadata()
   624  	return path, meta, err
   625  }
   626  
   627  func (o *ingestOp) build(t *test, h historyRecorder, b *pebble.Batch, i int) (string, error) {
   628  	path, _, err := buildForIngest(t, o.dbID, h, b, i)
   629  	return path, err
   630  }
   631  
   632  func (o *ingestOp) receiver() objID { return o.dbID }
   633  func (o *ingestOp) syncObjs() objIDSlice {
   634  	// Ingest should not be concurrent with mutating the batches that will be
   635  	// ingested as sstables.
   636  	objs := make([]objID, 0, len(o.batchIDs)+1)
   637  	objs = append(objs, o.batchIDs...)
   638  	addedDBs := make(map[objID]struct{})
   639  	for i := range o.derivedDBIDs {
   640  		_, ok := addedDBs[o.derivedDBIDs[i]]
   641  		if !ok && o.derivedDBIDs[i] != o.dbID {
   642  			objs = append(objs, o.derivedDBIDs[i])
   643  			addedDBs[o.derivedDBIDs[i]] = struct{}{}
   644  		}
   645  	}
   646  	return objs
   647  }
   648  
   649  func closeIters(
   650  	pointIter base.InternalIterator,
   651  	rangeDelIter keyspan.FragmentIterator,
   652  	rangeKeyIter keyspan.FragmentIterator,
   653  ) {
   654  	if pointIter != nil {
   655  		pointIter.Close()
   656  	}
   657  	if rangeDelIter != nil {
   658  		rangeDelIter.Close()
   659  	}
   660  	if rangeKeyIter != nil {
   661  		rangeKeyIter.Close()
   662  	}
   663  }
   664  
   665  // collapseBatch collapses the mutations in a batch to be equivalent to an
   666  // sstable ingesting those mutations. Duplicate updates to a key are collapsed
   667  // so that only the latest update is performed. All range deletions are
   668  // performed first in the batch to match the semantics of ingestion where a
   669  // range deletion does not delete a point record contained in the sstable.
   670  func (o *ingestOp) collapseBatch(
   671  	t *test,
   672  	db *pebble.DB,
   673  	pointIter base.InternalIterator,
   674  	rangeDelIter, rangeKeyIter keyspan.FragmentIterator,
   675  	b *pebble.Batch,
   676  ) (*pebble.Batch, error) {
   677  	defer closeIters(pointIter, rangeDelIter, rangeKeyIter)
   678  	equal := t.opts.Comparer.Equal
   679  	collapsed := db.NewBatch()
   680  
   681  	if rangeDelIter != nil {
   682  		// NB: The range tombstones have already been fragmented by the Batch.
   683  		for t := rangeDelIter.First(); t != nil; t = rangeDelIter.Next() {
   684  			// NB: We don't have to copy the key or value since we're reading from a
   685  			// batch which doesn't do prefix compression.
   686  			if err := collapsed.DeleteRange(t.Start, t.End, nil); err != nil {
   687  				return nil, err
   688  			}
   689  		}
   690  		if err := rangeDelIter.Close(); err != nil {
   691  			return nil, err
   692  		}
   693  		rangeDelIter = nil
   694  	}
   695  
   696  	if pointIter != nil {
   697  		var lastUserKey []byte
   698  		for key, value := pointIter.First(); key != nil; key, value = pointIter.Next() {
   699  			// Ignore duplicate keys.
   700  			//
   701  			// Note: this is necessary due to MERGE keys, otherwise it would be
   702  			// fine to include all the keys in the batch and let the normal
   703  			// sequence number precedence determine which of the keys "wins".
   704  			// But the code to build the ingested sstable will only keep the
   705  			// most recent internal key and will not merge across internal keys.
   706  			if equal(lastUserKey, key.UserKey) {
   707  				continue
   708  			}
   709  			// NB: We don't have to copy the key or value since we're reading from a
   710  			// batch which doesn't do prefix compression.
   711  			lastUserKey = key.UserKey
   712  
   713  			var err error
   714  			switch key.Kind() {
   715  			case pebble.InternalKeyKindDelete:
   716  				err = collapsed.Delete(key.UserKey, nil)
   717  			case pebble.InternalKeyKindDeleteSized:
   718  				v, _ := binary.Uvarint(value.InPlaceValue())
   719  				// Batch.DeleteSized takes just the length of the value being
   720  				// deleted and adds the key's length to derive the overall entry
   721  				// size of the value being deleted. This has already been done
   722  				// to the key we're reading from the batch, so we must subtract
   723  				// the key length from the encoded value before calling
   724  				// collapsed.DeleteSized, which will again add the key length
   725  				// before encoding.
   726  				err = collapsed.DeleteSized(key.UserKey, uint32(v-uint64(len(key.UserKey))), nil)
   727  			case pebble.InternalKeyKindSingleDelete:
   728  				err = collapsed.SingleDelete(key.UserKey, nil)
   729  			case pebble.InternalKeyKindSet:
   730  				err = collapsed.Set(key.UserKey, value.InPlaceValue(), nil)
   731  			case pebble.InternalKeyKindMerge:
   732  				err = collapsed.Merge(key.UserKey, value.InPlaceValue(), nil)
   733  			case pebble.InternalKeyKindLogData:
   734  				err = collapsed.LogData(key.UserKey, nil)
   735  			default:
   736  				err = errors.Errorf("unknown batch record kind: %d", key.Kind())
   737  			}
   738  			if err != nil {
   739  				return nil, err
   740  			}
   741  		}
   742  		if err := pointIter.Close(); err != nil {
   743  			return nil, err
   744  		}
   745  		pointIter = nil
   746  	}
   747  
   748  	// There's no equivalent of a MERGE operator for range keys, so there's no
   749  	// need to collapse the range keys here. Rather than reading the range keys
   750  	// from `rangeKeyIter`, which will already be fragmented, read the range
   751  	// keys from the batch and copy them verbatim. This marginally improves our
   752  	// test coverage over the alternative approach of pre-fragmenting and
   753  	// pre-coalescing before writing to the batch.
   754  	//
   755  	// The `rangeKeyIter` is used only to determine if there are any range keys
   756  	// in the batch at all, and only because we already have it handy from
   757  	// private.BatchSort.
   758  	if rangeKeyIter != nil {
   759  		for r := b.Reader(); ; {
   760  			kind, key, value, ok, err := r.Next()
   761  			if !ok {
   762  				if err != nil {
   763  					return nil, err
   764  				}
   765  				break
   766  			} else if !rangekey.IsRangeKey(kind) {
   767  				continue
   768  			}
   769  			ik := base.MakeInternalKey(key, 0, kind)
   770  			if err := collapsed.AddInternalKey(&ik, value, nil); err != nil {
   771  				return nil, err
   772  			}
   773  		}
   774  		if err := rangeKeyIter.Close(); err != nil {
   775  			return nil, err
   776  		}
   777  		rangeKeyIter = nil
   778  	}
   779  
   780  	return collapsed, nil
   781  }
   782  
   783  func (o *ingestOp) String() string {
   784  	var buf strings.Builder
   785  	buf.WriteString(o.dbID.String())
   786  	buf.WriteString(".Ingest(")
   787  	for i, id := range o.batchIDs {
   788  		if i > 0 {
   789  			buf.WriteString(", ")
   790  		}
   791  		buf.WriteString(id.String())
   792  	}
   793  	buf.WriteString(")")
   794  	return buf.String()
   795  }
   796  
   797  type ingestAndExciseOp struct {
   798  	dbID                   objID
   799  	batchID                objID
   800  	derivedDBID            objID
   801  	exciseStart, exciseEnd []byte
   802  }
   803  
   804  func (o *ingestAndExciseOp) run(t *test, h historyRecorder) {
   805  	var err error
   806  	b := t.getBatch(o.batchID)
   807  	t.clearObj(o.batchID)
   808  	if t.testOpts.Opts.Comparer.Compare(o.exciseEnd, o.exciseStart) <= 0 {
   809  		panic("non-well-formed excise span")
   810  	}
   811  	if b.Empty() {
   812  		// No-op.
   813  		h.Recordf("%s // %v", o, err)
   814  		return
   815  	}
   816  	path, writerMeta, err2 := o.build(t, h, b, 0 /* i */)
   817  	if err2 != nil {
   818  		h.Recordf("Build(%s) // %v", o.batchID, err2)
   819  		return
   820  	}
   821  	err = firstError(err, err2)
   822  	err = firstError(err, b.Close())
   823  
   824  	if writerMeta.Properties.NumEntries == 0 && writerMeta.Properties.NumRangeKeys() == 0 {
   825  		// No-op.
   826  		h.Recordf("%s // %v", o, err)
   827  		return
   828  	}
   829  	db := t.getDB(o.dbID)
   830  	if !t.testOpts.useExcise {
   831  		// Do a rangedel and rangekeydel before the ingestion. This mimics the
   832  		// behaviour of an excise.
   833  		err = firstError(err, db.DeleteRange(o.exciseStart, o.exciseEnd, t.writeOpts))
   834  		err = firstError(err, db.RangeKeyDelete(o.exciseStart, o.exciseEnd, t.writeOpts))
   835  	}
   836  
   837  	if t.testOpts.useExcise {
   838  		err = firstError(err, withRetries(func() error {
   839  			_, err := t.getDB(o.dbID).IngestAndExcise([]string{path}, nil /* sharedSSTs */, pebble.KeyRange{
   840  				Start: o.exciseStart,
   841  				End:   o.exciseEnd,
   842  			})
   843  			return err
   844  		}))
   845  	} else {
   846  		err = firstError(err, withRetries(func() error {
   847  			return t.getDB(o.dbID).Ingest([]string{path})
   848  		}))
   849  	}
   850  
   851  	h.Recordf("%s // %v", o, err)
   852  }
   853  
   854  func (o *ingestAndExciseOp) build(
   855  	t *test, h historyRecorder, b *pebble.Batch, i int,
   856  ) (string, *sstable.WriterMetadata, error) {
   857  	return buildForIngest(t, o.dbID, h, b, i)
   858  }
   859  
   860  func (o *ingestAndExciseOp) receiver() objID { return o.dbID }
   861  func (o *ingestAndExciseOp) syncObjs() objIDSlice {
   862  	// Ingest should not be concurrent with mutating the batches that will be
   863  	// ingested as sstables.
   864  	objs := []objID{o.batchID}
   865  	if o.derivedDBID != o.dbID {
   866  		objs = append(objs, o.derivedDBID)
   867  	}
   868  	return objs
   869  }
   870  
   871  func (o *ingestAndExciseOp) String() string {
   872  	return fmt.Sprintf("%s.IngestAndExcise(%s, %q, %q)", o.dbID, o.batchID, o.exciseStart, o.exciseEnd)
   873  }
   874  
   875  // getOp models a Reader.Get operation.
   876  type getOp struct {
   877  	readerID    objID
   878  	key         []byte
   879  	derivedDBID objID
   880  }
   881  
   882  func (o *getOp) run(t *test, h historyRecorder) {
   883  	r := t.getReader(o.readerID)
   884  	var val []byte
   885  	var closer io.Closer
   886  	err := withRetries(func() (err error) {
   887  		val, closer, err = r.Get(o.key)
   888  		return err
   889  	})
   890  	h.Recordf("%s // [%q] %v", o, val, err)
   891  	if closer != nil {
   892  		closer.Close()
   893  	}
   894  }
   895  
   896  func (o *getOp) String() string  { return fmt.Sprintf("%s.Get(%q)", o.readerID, o.key) }
   897  func (o *getOp) receiver() objID { return o.readerID }
   898  func (o *getOp) syncObjs() objIDSlice {
   899  	if o.readerID.tag() == dbTag {
   900  		return nil
   901  	}
   902  	// batch.Get reads through to the current database state.
   903  	if o.derivedDBID != 0 {
   904  		return []objID{o.derivedDBID}
   905  	}
   906  	return nil
   907  }
   908  
   909  // newIterOp models a Reader.NewIter operation.
   910  type newIterOp struct {
   911  	readerID objID
   912  	iterID   objID
   913  	iterOpts
   914  	derivedDBID objID
   915  }
   916  
   917  func (o *newIterOp) run(t *test, h historyRecorder) {
   918  	r := t.getReader(o.readerID)
   919  	opts := iterOptions(o.iterOpts)
   920  
   921  	var i *pebble.Iterator
   922  	for {
   923  		i, _ = r.NewIter(opts)
   924  		if err := i.Error(); !errors.Is(err, errorfs.ErrInjected) {
   925  			break
   926  		}
   927  		// close this iter and retry NewIter
   928  		_ = i.Close()
   929  	}
   930  	t.setIter(o.iterID, i)
   931  
   932  	// Trash the bounds to ensure that Pebble doesn't rely on the stability of
   933  	// the user-provided bounds.
   934  	if opts != nil {
   935  		rand.Read(opts.LowerBound[:])
   936  		rand.Read(opts.UpperBound[:])
   937  	}
   938  	h.Recordf("%s // %v", o, i.Error())
   939  }
   940  
   941  func (o *newIterOp) String() string {
   942  	return fmt.Sprintf("%s = %s.NewIter(%q, %q, %d /* key types */, %d, %d, %t /* use L6 filters */, %q /* masking suffix */)",
   943  		o.iterID, o.readerID, o.lower, o.upper, o.keyTypes, o.filterMin, o.filterMax, o.useL6Filters, o.maskSuffix)
   944  }
   945  
   946  func (o *newIterOp) receiver() objID { return o.readerID }
   947  func (o *newIterOp) syncObjs() objIDSlice {
   948  	// Prevent o.iterID ops from running before it exists.
   949  	objs := []objID{o.iterID}
   950  	// If reading through a batch or snapshot, the new iterator will also observe database
   951  	// state, and we must synchronize on the database state for a consistent
   952  	// view.
   953  	if o.readerID.tag() == batchTag || o.readerID.tag() == snapTag {
   954  		objs = append(objs, o.derivedDBID)
   955  	}
   956  	return objs
   957  }
   958  
   959  // newIterUsingCloneOp models a Iterator.Clone operation.
   960  type newIterUsingCloneOp struct {
   961  	existingIterID objID
   962  	iterID         objID
   963  	refreshBatch   bool
   964  	iterOpts
   965  
   966  	// derivedReaderID is the ID of the underlying reader that backs both the
   967  	// existing iterator and the new iterator. The derivedReaderID is NOT
   968  	// serialized by String and is derived from other operations during parse.
   969  	derivedReaderID objID
   970  }
   971  
   972  func (o *newIterUsingCloneOp) run(t *test, h historyRecorder) {
   973  	iter := t.getIter(o.existingIterID)
   974  	cloneOpts := pebble.CloneOptions{
   975  		IterOptions:      iterOptions(o.iterOpts),
   976  		RefreshBatchView: o.refreshBatch,
   977  	}
   978  	i, err := iter.iter.Clone(cloneOpts)
   979  	if err != nil {
   980  		panic(err)
   981  	}
   982  	t.setIter(o.iterID, i)
   983  	h.Recordf("%s // %v", o, i.Error())
   984  }
   985  
   986  func (o *newIterUsingCloneOp) String() string {
   987  	return fmt.Sprintf("%s = %s.Clone(%t, %q, %q, %d /* key types */, %d, %d, %t /* use L6 filters */, %q /* masking suffix */)",
   988  		o.iterID, o.existingIterID, o.refreshBatch, o.lower, o.upper,
   989  		o.keyTypes, o.filterMin, o.filterMax, o.useL6Filters, o.maskSuffix)
   990  }
   991  
   992  func (o *newIterUsingCloneOp) receiver() objID { return o.existingIterID }
   993  
   994  func (o *newIterUsingCloneOp) syncObjs() objIDSlice {
   995  	objIDs := []objID{o.iterID}
   996  	// If the underlying reader is a batch, we must synchronize with the batch.
   997  	// If refreshBatch=true, synchronizing is necessary to observe all the
   998  	// mutations up to until this op and no more. Even when refreshBatch=false,
   999  	// we must synchronize because iterator construction may access state cached
  1000  	// on the indexed batch to avoid refragmenting range tombstones or range
  1001  	// keys.
  1002  	if o.derivedReaderID.tag() == batchTag {
  1003  		objIDs = append(objIDs, o.derivedReaderID)
  1004  	}
  1005  	return objIDs
  1006  }
  1007  
  1008  // iterSetBoundsOp models an Iterator.SetBounds operation.
  1009  type iterSetBoundsOp struct {
  1010  	iterID objID
  1011  	lower  []byte
  1012  	upper  []byte
  1013  }
  1014  
  1015  func (o *iterSetBoundsOp) run(t *test, h historyRecorder) {
  1016  	i := t.getIter(o.iterID)
  1017  	var lower, upper []byte
  1018  	if o.lower != nil {
  1019  		lower = append(lower, o.lower...)
  1020  	}
  1021  	if o.upper != nil {
  1022  		upper = append(upper, o.upper...)
  1023  	}
  1024  	i.SetBounds(lower, upper)
  1025  
  1026  	// Trash the bounds to ensure that Pebble doesn't rely on the stability of
  1027  	// the user-provided bounds.
  1028  	rand.Read(lower[:])
  1029  	rand.Read(upper[:])
  1030  
  1031  	h.Recordf("%s // %v", o, i.Error())
  1032  }
  1033  
  1034  func (o *iterSetBoundsOp) String() string {
  1035  	return fmt.Sprintf("%s.SetBounds(%q, %q)", o.iterID, o.lower, o.upper)
  1036  }
  1037  
  1038  func (o *iterSetBoundsOp) receiver() objID      { return o.iterID }
  1039  func (o *iterSetBoundsOp) syncObjs() objIDSlice { return nil }
  1040  
  1041  // iterSetOptionsOp models an Iterator.SetOptions operation.
  1042  type iterSetOptionsOp struct {
  1043  	iterID objID
  1044  	iterOpts
  1045  
  1046  	// derivedReaderID is the ID of the underlying reader that backs the
  1047  	// iterator. The derivedReaderID is NOT serialized by String and is derived
  1048  	// from other operations during parse.
  1049  	derivedReaderID objID
  1050  }
  1051  
  1052  func (o *iterSetOptionsOp) run(t *test, h historyRecorder) {
  1053  	i := t.getIter(o.iterID)
  1054  
  1055  	opts := iterOptions(o.iterOpts)
  1056  	if opts == nil {
  1057  		opts = &pebble.IterOptions{}
  1058  	}
  1059  	i.SetOptions(opts)
  1060  
  1061  	// Trash the bounds to ensure that Pebble doesn't rely on the stability of
  1062  	// the user-provided bounds.
  1063  	rand.Read(opts.LowerBound[:])
  1064  	rand.Read(opts.UpperBound[:])
  1065  
  1066  	h.Recordf("%s // %v", o, i.Error())
  1067  }
  1068  
  1069  func (o *iterSetOptionsOp) String() string {
  1070  	return fmt.Sprintf("%s.SetOptions(%q, %q, %d /* key types */, %d, %d, %t /* use L6 filters */, %q /* masking suffix */)",
  1071  		o.iterID, o.lower, o.upper, o.keyTypes, o.filterMin, o.filterMax, o.useL6Filters, o.maskSuffix)
  1072  }
  1073  
  1074  func iterOptions(o iterOpts) *pebble.IterOptions {
  1075  	if o.IsZero() {
  1076  		return nil
  1077  	}
  1078  	var lower, upper []byte
  1079  	if o.lower != nil {
  1080  		lower = append(lower, o.lower...)
  1081  	}
  1082  	if o.upper != nil {
  1083  		upper = append(upper, o.upper...)
  1084  	}
  1085  	opts := &pebble.IterOptions{
  1086  		LowerBound: lower,
  1087  		UpperBound: upper,
  1088  		KeyTypes:   pebble.IterKeyType(o.keyTypes),
  1089  		RangeKeyMasking: pebble.RangeKeyMasking{
  1090  			Suffix: o.maskSuffix,
  1091  		},
  1092  		UseL6Filters: o.useL6Filters,
  1093  	}
  1094  	if opts.RangeKeyMasking.Suffix != nil {
  1095  		opts.RangeKeyMasking.Filter = func() pebble.BlockPropertyFilterMask {
  1096  			return sstable.NewTestKeysMaskingFilter()
  1097  		}
  1098  	}
  1099  	if o.filterMax > 0 {
  1100  		opts.PointKeyFilters = []pebble.BlockPropertyFilter{
  1101  			sstable.NewTestKeysBlockPropertyFilter(o.filterMin, o.filterMax),
  1102  		}
  1103  		// Enforce the timestamp bounds in SkipPoint, so that the iterator never
  1104  		// returns a key outside the filterMin, filterMax bounds. This provides
  1105  		// deterministic iteration.
  1106  		opts.SkipPoint = func(k []byte) (skip bool) {
  1107  			n := testkeys.Comparer.Split(k)
  1108  			if n == len(k) {
  1109  				// No suffix, don't skip it.
  1110  				return false
  1111  			}
  1112  			v, err := testkeys.ParseSuffix(k[n:])
  1113  			if err != nil {
  1114  				panic(err)
  1115  			}
  1116  			ts := uint64(v)
  1117  			return ts < o.filterMin || ts >= o.filterMax
  1118  		}
  1119  	}
  1120  	return opts
  1121  }
  1122  
  1123  func (o *iterSetOptionsOp) receiver() objID { return o.iterID }
  1124  
  1125  func (o *iterSetOptionsOp) syncObjs() objIDSlice {
  1126  	if o.derivedReaderID.tag() == batchTag {
  1127  		// If the underlying reader is a batch, we must synchronize with the
  1128  		// batch so that we observe all the mutations up until this operation
  1129  		// and no more.
  1130  		return []objID{o.derivedReaderID}
  1131  	}
  1132  	return nil
  1133  }
  1134  
  1135  // iterSeekGEOp models an Iterator.SeekGE[WithLimit] operation.
  1136  type iterSeekGEOp struct {
  1137  	iterID objID
  1138  	key    []byte
  1139  	limit  []byte
  1140  
  1141  	derivedReaderID objID
  1142  }
  1143  
  1144  func iteratorPos(i *retryableIter) string {
  1145  	var buf bytes.Buffer
  1146  	fmt.Fprintf(&buf, "%q", i.Key())
  1147  	hasPoint, hasRange := i.HasPointAndRange()
  1148  	if hasPoint {
  1149  		fmt.Fprintf(&buf, ",%q", i.Value())
  1150  	} else {
  1151  		fmt.Fprint(&buf, ",<no point>")
  1152  	}
  1153  	if hasRange {
  1154  		start, end := i.RangeBounds()
  1155  		fmt.Fprintf(&buf, ",[%q,%q)=>{", start, end)
  1156  		for i, rk := range i.RangeKeys() {
  1157  			if i > 0 {
  1158  				fmt.Fprint(&buf, ",")
  1159  			}
  1160  			fmt.Fprintf(&buf, "%q=%q", rk.Suffix, rk.Value)
  1161  		}
  1162  		fmt.Fprint(&buf, "}")
  1163  	} else {
  1164  		fmt.Fprint(&buf, ",<no range>")
  1165  	}
  1166  	if i.RangeKeyChanged() {
  1167  		fmt.Fprint(&buf, "*")
  1168  	}
  1169  	return buf.String()
  1170  }
  1171  
  1172  func validBoolToStr(valid bool) string {
  1173  	return fmt.Sprintf("%t", valid)
  1174  }
  1175  
  1176  func validityStateToStr(validity pebble.IterValidityState) (bool, string) {
  1177  	// We can't distinguish between IterExhausted and IterAtLimit in a
  1178  	// deterministic manner.
  1179  	switch validity {
  1180  	case pebble.IterExhausted, pebble.IterAtLimit:
  1181  		return false, "invalid"
  1182  	case pebble.IterValid:
  1183  		return true, "valid"
  1184  	default:
  1185  		panic("unknown validity")
  1186  	}
  1187  }
  1188  
  1189  func (o *iterSeekGEOp) run(t *test, h historyRecorder) {
  1190  	i := t.getIter(o.iterID)
  1191  	var valid bool
  1192  	var validStr string
  1193  	if o.limit == nil {
  1194  		valid = i.SeekGE(o.key)
  1195  		validStr = validBoolToStr(valid)
  1196  	} else {
  1197  		valid, validStr = validityStateToStr(i.SeekGEWithLimit(o.key, o.limit))
  1198  	}
  1199  	if valid {
  1200  		h.Recordf("%s // [%s,%s] %v", o, validStr, iteratorPos(i), i.Error())
  1201  	} else {
  1202  		h.Recordf("%s // [%s] %v", o, validStr, i.Error())
  1203  	}
  1204  }
  1205  
  1206  func (o *iterSeekGEOp) String() string {
  1207  	return fmt.Sprintf("%s.SeekGE(%q, %q)", o.iterID, o.key, o.limit)
  1208  }
  1209  func (o *iterSeekGEOp) receiver() objID      { return o.iterID }
  1210  func (o *iterSeekGEOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
  1211  
  1212  func onlyBatchIDs(ids ...objID) objIDSlice {
  1213  	var ret objIDSlice
  1214  	for _, id := range ids {
  1215  		if id.tag() == batchTag {
  1216  			ret = append(ret, id)
  1217  		}
  1218  	}
  1219  	return ret
  1220  }
  1221  
  1222  // iterSeekPrefixGEOp models an Iterator.SeekPrefixGE operation.
  1223  type iterSeekPrefixGEOp struct {
  1224  	iterID objID
  1225  	key    []byte
  1226  
  1227  	derivedReaderID objID
  1228  }
  1229  
  1230  func (o *iterSeekPrefixGEOp) run(t *test, h historyRecorder) {
  1231  	i := t.getIter(o.iterID)
  1232  	valid := i.SeekPrefixGE(o.key)
  1233  	if valid {
  1234  		h.Recordf("%s // [%t,%s] %v", o, valid, iteratorPos(i), i.Error())
  1235  	} else {
  1236  		h.Recordf("%s // [%t] %v", o, valid, i.Error())
  1237  	}
  1238  }
  1239  
  1240  func (o *iterSeekPrefixGEOp) String() string {
  1241  	return fmt.Sprintf("%s.SeekPrefixGE(%q)", o.iterID, o.key)
  1242  }
  1243  func (o *iterSeekPrefixGEOp) receiver() objID      { return o.iterID }
  1244  func (o *iterSeekPrefixGEOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
  1245  
  1246  // iterSeekLTOp models an Iterator.SeekLT[WithLimit] operation.
  1247  type iterSeekLTOp struct {
  1248  	iterID objID
  1249  	key    []byte
  1250  	limit  []byte
  1251  
  1252  	derivedReaderID objID
  1253  }
  1254  
  1255  func (o *iterSeekLTOp) run(t *test, h historyRecorder) {
  1256  	i := t.getIter(o.iterID)
  1257  	var valid bool
  1258  	var validStr string
  1259  	if o.limit == nil {
  1260  		valid = i.SeekLT(o.key)
  1261  		validStr = validBoolToStr(valid)
  1262  	} else {
  1263  		valid, validStr = validityStateToStr(i.SeekLTWithLimit(o.key, o.limit))
  1264  	}
  1265  	if valid {
  1266  		h.Recordf("%s // [%s,%s] %v", o, validStr, iteratorPos(i), i.Error())
  1267  	} else {
  1268  		h.Recordf("%s // [%s] %v", o, validStr, i.Error())
  1269  	}
  1270  }
  1271  
  1272  func (o *iterSeekLTOp) String() string {
  1273  	return fmt.Sprintf("%s.SeekLT(%q, %q)", o.iterID, o.key, o.limit)
  1274  }
  1275  
  1276  func (o *iterSeekLTOp) receiver() objID      { return o.iterID }
  1277  func (o *iterSeekLTOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
  1278  
  1279  // iterFirstOp models an Iterator.First operation.
  1280  type iterFirstOp struct {
  1281  	iterID objID
  1282  
  1283  	derivedReaderID objID
  1284  }
  1285  
  1286  func (o *iterFirstOp) run(t *test, h historyRecorder) {
  1287  	i := t.getIter(o.iterID)
  1288  	valid := i.First()
  1289  	if valid {
  1290  		h.Recordf("%s // [%t,%s] %v", o, valid, iteratorPos(i), i.Error())
  1291  	} else {
  1292  		h.Recordf("%s // [%t] %v", o, valid, i.Error())
  1293  	}
  1294  }
  1295  
  1296  func (o *iterFirstOp) String() string       { return fmt.Sprintf("%s.First()", o.iterID) }
  1297  func (o *iterFirstOp) receiver() objID      { return o.iterID }
  1298  func (o *iterFirstOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
  1299  
  1300  // iterLastOp models an Iterator.Last operation.
  1301  type iterLastOp struct {
  1302  	iterID objID
  1303  
  1304  	derivedReaderID objID
  1305  }
  1306  
  1307  func (o *iterLastOp) run(t *test, h historyRecorder) {
  1308  	i := t.getIter(o.iterID)
  1309  	valid := i.Last()
  1310  	if valid {
  1311  		h.Recordf("%s // [%t,%s] %v", o, valid, iteratorPos(i), i.Error())
  1312  	} else {
  1313  		h.Recordf("%s // [%t] %v", o, valid, i.Error())
  1314  	}
  1315  }
  1316  
  1317  func (o *iterLastOp) String() string       { return fmt.Sprintf("%s.Last()", o.iterID) }
  1318  func (o *iterLastOp) receiver() objID      { return o.iterID }
  1319  func (o *iterLastOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
  1320  
  1321  // iterNextOp models an Iterator.Next[WithLimit] operation.
  1322  type iterNextOp struct {
  1323  	iterID objID
  1324  	limit  []byte
  1325  
  1326  	derivedReaderID objID
  1327  }
  1328  
  1329  func (o *iterNextOp) run(t *test, h historyRecorder) {
  1330  	i := t.getIter(o.iterID)
  1331  	var valid bool
  1332  	var validStr string
  1333  	if o.limit == nil {
  1334  		valid = i.Next()
  1335  		validStr = validBoolToStr(valid)
  1336  	} else {
  1337  		valid, validStr = validityStateToStr(i.NextWithLimit(o.limit))
  1338  	}
  1339  	if valid {
  1340  		h.Recordf("%s // [%s,%s] %v", o, validStr, iteratorPos(i), i.Error())
  1341  	} else {
  1342  		h.Recordf("%s // [%s] %v", o, validStr, i.Error())
  1343  	}
  1344  }
  1345  
  1346  func (o *iterNextOp) String() string       { return fmt.Sprintf("%s.Next(%q)", o.iterID, o.limit) }
  1347  func (o *iterNextOp) receiver() objID      { return o.iterID }
  1348  func (o *iterNextOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
  1349  
  1350  // iterNextPrefixOp models an Iterator.NextPrefix operation.
  1351  type iterNextPrefixOp struct {
  1352  	iterID objID
  1353  
  1354  	derivedReaderID objID
  1355  }
  1356  
  1357  func (o *iterNextPrefixOp) run(t *test, h historyRecorder) {
  1358  	i := t.getIter(o.iterID)
  1359  	valid := i.NextPrefix()
  1360  	validStr := validBoolToStr(valid)
  1361  	if valid {
  1362  		h.Recordf("%s // [%s,%s] %v", o, validStr, iteratorPos(i), i.Error())
  1363  	} else {
  1364  		h.Recordf("%s // [%s] %v", o, validStr, i.Error())
  1365  	}
  1366  }
  1367  
  1368  func (o *iterNextPrefixOp) String() string       { return fmt.Sprintf("%s.NextPrefix()", o.iterID) }
  1369  func (o *iterNextPrefixOp) receiver() objID      { return o.iterID }
  1370  func (o *iterNextPrefixOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
  1371  
  1372  // iterCanSingleDelOp models a call to CanDeterministicallySingleDelete with an
  1373  // Iterator.
  1374  type iterCanSingleDelOp struct {
  1375  	iterID objID
  1376  
  1377  	derivedReaderID objID
  1378  }
  1379  
  1380  func (o *iterCanSingleDelOp) run(t *test, h historyRecorder) {
  1381  	// TODO(jackson): When we perform error injection, we'll need to rethink
  1382  	// this.
  1383  	_, err := pebble.CanDeterministicallySingleDelete(t.getIter(o.iterID).iter)
  1384  	// The return value of CanDeterministicallySingleDelete is dependent on
  1385  	// internal LSM state and non-deterministic, so we don't record it.
  1386  	// Including the operation within the metamorphic test at all helps ensure
  1387  	// that it does not change the result of any other Iterator operation that
  1388  	// should be deterministic, regardless of its own outcome.
  1389  	//
  1390  	// We still record the value of the error because it's deterministic, at
  1391  	// least for now. The possible error cases are:
  1392  	//  - The iterator was already in an error state when the operation ran.
  1393  	//  - The operation is deterministically invalid (like using an InternalNext
  1394  	//    to change directions.)
  1395  	h.Recordf("%s // %v", o, err)
  1396  }
  1397  
  1398  func (o *iterCanSingleDelOp) String() string       { return fmt.Sprintf("%s.InternalNext()", o.iterID) }
  1399  func (o *iterCanSingleDelOp) receiver() objID      { return o.iterID }
  1400  func (o *iterCanSingleDelOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
  1401  
  1402  // iterPrevOp models an Iterator.Prev[WithLimit] operation.
  1403  type iterPrevOp struct {
  1404  	iterID objID
  1405  	limit  []byte
  1406  
  1407  	derivedReaderID objID
  1408  }
  1409  
  1410  func (o *iterPrevOp) run(t *test, h historyRecorder) {
  1411  	i := t.getIter(o.iterID)
  1412  	var valid bool
  1413  	var validStr string
  1414  	if o.limit == nil {
  1415  		valid = i.Prev()
  1416  		validStr = validBoolToStr(valid)
  1417  	} else {
  1418  		valid, validStr = validityStateToStr(i.PrevWithLimit(o.limit))
  1419  	}
  1420  	if valid {
  1421  		h.Recordf("%s // [%s,%s] %v", o, validStr, iteratorPos(i), i.Error())
  1422  	} else {
  1423  		h.Recordf("%s // [%s] %v", o, validStr, i.Error())
  1424  	}
  1425  }
  1426  
  1427  func (o *iterPrevOp) String() string       { return fmt.Sprintf("%s.Prev(%q)", o.iterID, o.limit) }
  1428  func (o *iterPrevOp) receiver() objID      { return o.iterID }
  1429  func (o *iterPrevOp) syncObjs() objIDSlice { return onlyBatchIDs(o.derivedReaderID) }
  1430  
  1431  // newSnapshotOp models a DB.NewSnapshot operation.
  1432  type newSnapshotOp struct {
  1433  	dbID   objID
  1434  	snapID objID
  1435  	// If nonempty, this snapshot must not be used to read any keys outside of
  1436  	// the provided bounds. This allows some implementations to use 'Eventually
  1437  	// file-only snapshots,' which require bounds.
  1438  	bounds []pebble.KeyRange
  1439  }
  1440  
  1441  func (o *newSnapshotOp) run(t *test, h historyRecorder) {
  1442  	bounds := o.bounds
  1443  	if len(bounds) == 0 {
  1444  		panic("bounds unexpectedly unset for newSnapshotOp")
  1445  	}
  1446  	// Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
  1447  	createEfos := ((11400714819323198485 * uint64(t.idx) * t.testOpts.seedEFOS) >> 63) == 1
  1448  	// If either of these options is true, an EFOS _must_ be created, regardless
  1449  	// of what the fibonacci hash returned.
  1450  	excisePossible := t.testOpts.useSharedReplicate || t.testOpts.useExcise
  1451  	if createEfos || excisePossible {
  1452  		s := t.getDB(o.dbID).NewEventuallyFileOnlySnapshot(bounds)
  1453  		t.setSnapshot(o.snapID, s)
  1454  		// If the EFOS isn't guaranteed to always create iterators, we must force
  1455  		// a flush on this DB so it transitions this EFOS into a file-only snapshot.
  1456  		if excisePossible && !t.testOpts.efosAlwaysCreatesIters {
  1457  			err := t.getDB(o.dbID).Flush()
  1458  			if err != nil {
  1459  				h.Recordf("%s // %v", o, err)
  1460  				panic(errors.Wrap(err, "newSnapshotOp"))
  1461  			}
  1462  		}
  1463  	} else {
  1464  		s := t.getDB(o.dbID).NewSnapshot()
  1465  		t.setSnapshot(o.snapID, s)
  1466  	}
  1467  	h.Recordf("%s", o)
  1468  }
  1469  
  1470  func (o *newSnapshotOp) String() string {
  1471  	var buf bytes.Buffer
  1472  	fmt.Fprintf(&buf, "%s = %s.NewSnapshot(", o.snapID, o.dbID)
  1473  	for i := range o.bounds {
  1474  		if i > 0 {
  1475  			fmt.Fprint(&buf, ", ")
  1476  		}
  1477  		fmt.Fprintf(&buf, "%q, %q", o.bounds[i].Start, o.bounds[i].End)
  1478  	}
  1479  	fmt.Fprint(&buf, ")")
  1480  	return buf.String()
  1481  }
  1482  func (o *newSnapshotOp) receiver() objID      { return o.dbID }
  1483  func (o *newSnapshotOp) syncObjs() objIDSlice { return []objID{o.snapID} }
  1484  
  1485  type dbRatchetFormatMajorVersionOp struct {
  1486  	dbID objID
  1487  	vers pebble.FormatMajorVersion
  1488  }
  1489  
  1490  func (o *dbRatchetFormatMajorVersionOp) run(t *test, h historyRecorder) {
  1491  	var err error
  1492  	// NB: We no-op the operation if we're already at or above the provided
  1493  	// format major version. Different runs start at different format major
  1494  	// versions, making the presence of an error and the error message itself
  1495  	// non-deterministic if we attempt to upgrade to an older version.
  1496  	//
  1497  	//Regardless, subsequent operations should behave identically, which is what
  1498  	//we're really aiming to test by including this format major version ratchet
  1499  	//operation.
  1500  	if t.getDB(o.dbID).FormatMajorVersion() < o.vers {
  1501  		err = t.getDB(o.dbID).RatchetFormatMajorVersion(o.vers)
  1502  	}
  1503  	h.Recordf("%s // %v", o, err)
  1504  }
  1505  
  1506  func (o *dbRatchetFormatMajorVersionOp) String() string {
  1507  	return fmt.Sprintf("%s.RatchetFormatMajorVersion(%s)", o.dbID, o.vers)
  1508  }
  1509  func (o *dbRatchetFormatMajorVersionOp) receiver() objID      { return o.dbID }
  1510  func (o *dbRatchetFormatMajorVersionOp) syncObjs() objIDSlice { return nil }
  1511  
  1512  type dbRestartOp struct {
  1513  	dbID objID
  1514  }
  1515  
  1516  func (o *dbRestartOp) run(t *test, h historyRecorder) {
  1517  	if err := t.restartDB(o.dbID); err != nil {
  1518  		h.Recordf("%s // %v", o, err)
  1519  		h.history.err.Store(errors.Wrap(err, "dbRestartOp"))
  1520  	} else {
  1521  		h.Recordf("%s", o)
  1522  	}
  1523  }
  1524  
  1525  func (o *dbRestartOp) String() string       { return fmt.Sprintf("%s.Restart()", o.dbID) }
  1526  func (o *dbRestartOp) receiver() objID      { return o.dbID }
  1527  func (o *dbRestartOp) syncObjs() objIDSlice { return nil }
  1528  
  1529  func formatOps(ops []op) string {
  1530  	var buf strings.Builder
  1531  	for _, op := range ops {
  1532  		fmt.Fprintf(&buf, "%s\n", op)
  1533  	}
  1534  	return buf.String()
  1535  }
  1536  
  1537  // replicateOp models an operation that could copy keys from one db to
  1538  // another through either an IngestAndExcise, or an Ingest.
  1539  type replicateOp struct {
  1540  	source, dest objID
  1541  	start, end   []byte
  1542  }
  1543  
  1544  func (r *replicateOp) runSharedReplicate(
  1545  	t *test, h historyRecorder, source, dest *pebble.DB, w *sstable.Writer, sstPath string,
  1546  ) {
  1547  	var sharedSSTs []pebble.SharedSSTMeta
  1548  	var err error
  1549  	err = source.ScanInternal(context.TODO(), sstable.CategoryAndQoS{}, r.start, r.end,
  1550  		func(key *pebble.InternalKey, value pebble.LazyValue, _ pebble.IteratorLevel) error {
  1551  			val, _, err := value.Value(nil)
  1552  			if err != nil {
  1553  				panic(err)
  1554  			}
  1555  			return w.Add(base.MakeInternalKey(key.UserKey, 0, key.Kind()), val)
  1556  		},
  1557  		func(start, end []byte, seqNum uint64) error {
  1558  			return w.DeleteRange(start, end)
  1559  		},
  1560  		func(start, end []byte, keys []keyspan.Key) error {
  1561  			s := keyspan.Span{
  1562  				Start: start,
  1563  				End:   end,
  1564  				Keys:  keys,
  1565  			}
  1566  			return rangekey.Encode(&s, w.AddRangeKey)
  1567  		},
  1568  		func(sst *pebble.SharedSSTMeta) error {
  1569  			sharedSSTs = append(sharedSSTs, *sst)
  1570  			return nil
  1571  		},
  1572  	)
  1573  	if err != nil {
  1574  		h.Recordf("%s // %v", r, err)
  1575  		return
  1576  	}
  1577  
  1578  	err = w.Close()
  1579  	if err != nil {
  1580  		h.Recordf("%s // %v", r, err)
  1581  		return
  1582  	}
  1583  	meta, err := w.Metadata()
  1584  	if err != nil {
  1585  		h.Recordf("%s // %v", r, err)
  1586  		return
  1587  	}
  1588  	if len(sharedSSTs) == 0 && meta.Properties.NumEntries == 0 && meta.Properties.NumRangeKeys() == 0 {
  1589  		// IngestAndExcise below will be a no-op. We should do a
  1590  		// DeleteRange+RangeKeyDel to mimic the behaviour of the non-shared-replicate
  1591  		// case.
  1592  		//
  1593  		// TODO(bilal): Remove this when we support excises with no matching ingests.
  1594  		if err := dest.RangeKeyDelete(r.start, r.end, t.writeOpts); err != nil {
  1595  			h.Recordf("%s // %v", r, err)
  1596  			return
  1597  		}
  1598  		err := dest.DeleteRange(r.start, r.end, t.writeOpts)
  1599  		h.Recordf("%s // %v", r, err)
  1600  		return
  1601  	}
  1602  
  1603  	_, err = dest.IngestAndExcise([]string{sstPath}, sharedSSTs, pebble.KeyRange{Start: r.start, End: r.end})
  1604  	h.Recordf("%s // %v", r, err)
  1605  }
  1606  
  1607  func (r *replicateOp) run(t *test, h historyRecorder) {
  1608  	// Shared replication only works if shared storage is enabled.
  1609  	useSharedIngest := t.testOpts.useSharedReplicate
  1610  	if !t.testOpts.sharedStorageEnabled {
  1611  		useSharedIngest = false
  1612  	}
  1613  
  1614  	source := t.getDB(r.source)
  1615  	dest := t.getDB(r.dest)
  1616  	sstPath := path.Join(t.tmpDir, fmt.Sprintf("ext-replicate%d.sst", t.idx))
  1617  	f, err := t.opts.FS.Create(sstPath)
  1618  	if err != nil {
  1619  		h.Recordf("%s // %v", r, err)
  1620  		return
  1621  	}
  1622  	w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), t.opts.MakeWriterOptions(0, dest.FormatMajorVersion().MaxTableFormat()))
  1623  
  1624  	if useSharedIngest {
  1625  		r.runSharedReplicate(t, h, source, dest, w, sstPath)
  1626  		return
  1627  	}
  1628  
  1629  	// First, do a RangeKeyDelete and DeleteRange on the whole span.
  1630  	if err := dest.RangeKeyDelete(r.start, r.end, t.writeOpts); err != nil {
  1631  		h.Recordf("%s // %v", r, err)
  1632  		return
  1633  	}
  1634  	if err := dest.DeleteRange(r.start, r.end, t.writeOpts); err != nil {
  1635  		h.Recordf("%s // %v", r, err)
  1636  		return
  1637  	}
  1638  	iter, err := source.NewIter(&pebble.IterOptions{
  1639  		LowerBound: r.start,
  1640  		UpperBound: r.end,
  1641  		KeyTypes:   pebble.IterKeyTypePointsAndRanges,
  1642  	})
  1643  	if err != nil {
  1644  		panic(err)
  1645  	}
  1646  	defer iter.Close()
  1647  
  1648  	for ok := iter.SeekGE(r.start); ok && iter.Error() == nil; ok = iter.Next() {
  1649  		hasPoint, hasRange := iter.HasPointAndRange()
  1650  		if hasPoint {
  1651  			val, err := iter.ValueAndErr()
  1652  			if err != nil {
  1653  				panic(err)
  1654  			}
  1655  			if err := w.Set(iter.Key(), val); err != nil {
  1656  				panic(err)
  1657  			}
  1658  		}
  1659  		if hasRange && iter.RangeKeyChanged() {
  1660  			rangeKeys := iter.RangeKeys()
  1661  			rkStart, rkEnd := iter.RangeBounds()
  1662  
  1663  			span := &keyspan.Span{Start: rkStart, End: rkEnd, Keys: make([]keyspan.Key, len(rangeKeys))}
  1664  			for i := range rangeKeys {
  1665  				span.Keys[i] = keyspan.Key{
  1666  					Trailer: base.MakeTrailer(0, base.InternalKeyKindRangeKeySet),
  1667  					Suffix:  rangeKeys[i].Suffix,
  1668  					Value:   rangeKeys[i].Value,
  1669  				}
  1670  			}
  1671  			keyspan.SortKeysByTrailer(&span.Keys)
  1672  			if err := rangekey.Encode(span, w.AddRangeKey); err != nil {
  1673  				panic(err)
  1674  			}
  1675  		}
  1676  	}
  1677  	if err := iter.Error(); err != nil {
  1678  		h.Recordf("%s // %v", r, err)
  1679  		return
  1680  	}
  1681  	if err := w.Close(); err != nil {
  1682  		panic(err)
  1683  	}
  1684  
  1685  	err = dest.Ingest([]string{sstPath})
  1686  	h.Recordf("%s // %v", r, err)
  1687  }
  1688  
  1689  func (r *replicateOp) String() string {
  1690  	return fmt.Sprintf("%s.Replicate(%s, %q, %q)", r.source, r.dest, r.start, r.end)
  1691  }
  1692  
  1693  func (r *replicateOp) receiver() objID      { return r.source }
  1694  func (r *replicateOp) syncObjs() objIDSlice { return objIDSlice{r.dest} }