github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/storage/bench_test.go (about)

     1  // Copyright 2014 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package storage
    12  
    13  import (
    14  	"context"
    15  	"fmt"
    16  	"math"
    17  	"math/rand"
    18  	"os"
    19  	"path/filepath"
    20  	"testing"
    21  	"time"
    22  
    23  	"github.com/cockroachdb/cockroach/pkg/base"
    24  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    25  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    26  	"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
    27  	"github.com/cockroachdb/cockroach/pkg/testutils"
    28  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    29  	"github.com/cockroachdb/cockroach/pkg/util/fileutil"
    30  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    31  	"github.com/cockroachdb/cockroach/pkg/util/log"
    32  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    33  	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
    34  )
    35  
    36  const overhead = 48 // Per key/value overhead (empirically determined)
    37  
    38  type engineMaker func(testing.TB, string) Engine
    39  
    40  type benchDataOptions struct {
    41  	numVersions int
    42  	numKeys     int
    43  	valueBytes  int
    44  
    45  	// In transactional mode, data is written by writing and later resolving
    46  	// intents. In non-transactional mode, data is written directly, without
    47  	// leaving intents. Transactional mode notably stresses RocksDB deletion
    48  	// tombstones, as the metadata key is repeatedly written and deleted.
    49  	//
    50  	// Both modes are reflective of real workloads. Transactional mode simulates
    51  	// data that has recently been INSERTed into a table, while non-transactional
    52  	// mode simulates data that has been RESTOREd or is old enough to have been
    53  	// fully compacted.
    54  	transactional bool
    55  }
    56  
    57  // loadTestData writes numKeys keys in numBatches separate batches. Keys are
    58  // written in order. Every key in a given batch has the same MVCC timestamp;
    59  // batch timestamps start at batchTimeSpan and increase in intervals of
    60  // batchTimeSpan.
    61  //
    62  // Importantly, writing keys in order convinces RocksDB to output one SST per
    63  // batch, where each SST contains keys of only one timestamp. E.g., writing A,B
    64  // at t0 and C at t1 will create two SSTs: one for A,B that only contains keys
    65  // at t0, and one for C that only contains keys at t1. Conversely, writing A, C
    66  // at t0 and B at t1 would create just one SST that contained A,B,C (due to an
    67  // immediate compaction).
    68  //
    69  // The creation of the database is time consuming, so the caller can choose
    70  // whether to use a temporary or permanent location.
    71  func loadTestData(dir string, numKeys, numBatches, batchTimeSpan, valueBytes int) (Engine, error) {
    72  	ctx := context.Background()
    73  
    74  	exists := true
    75  	if _, err := os.Stat(dir); os.IsNotExist(err) {
    76  		exists = false
    77  	}
    78  
    79  	eng, err := NewRocksDB(
    80  		RocksDBConfig{
    81  			StorageConfig: base.StorageConfig{
    82  				Settings: cluster.MakeTestingClusterSettings(),
    83  				Dir:      dir,
    84  			},
    85  		},
    86  		RocksDBCache{},
    87  	)
    88  	if err != nil {
    89  		return nil, err
    90  	}
    91  
    92  	if exists {
    93  		testutils.ReadAllFiles(filepath.Join(dir, "*"))
    94  		return eng, nil
    95  	}
    96  
    97  	log.Infof(context.Background(), "creating test data: %s", dir)
    98  
    99  	// Generate the same data every time.
   100  	rng := rand.New(rand.NewSource(1449168817))
   101  
   102  	keys := make([]roachpb.Key, numKeys)
   103  	for i := 0; i < numKeys; i++ {
   104  		keys[i] = roachpb.Key(encoding.EncodeUvarintAscending([]byte("key-"), uint64(i)))
   105  	}
   106  
   107  	sstTimestamps := make([]int64, numBatches)
   108  	for i := 0; i < len(sstTimestamps); i++ {
   109  		sstTimestamps[i] = int64((i + 1) * batchTimeSpan)
   110  	}
   111  
   112  	var batch Batch
   113  	var minWallTime int64
   114  	for i, key := range keys {
   115  		if scaled := len(keys) / numBatches; (i % scaled) == 0 {
   116  			if i > 0 {
   117  				log.Infof(ctx, "committing (%d/~%d)", i/scaled, numBatches)
   118  				if err := batch.Commit(false /* sync */); err != nil {
   119  					return nil, err
   120  				}
   121  				batch.Close()
   122  				if err := eng.Flush(); err != nil {
   123  					return nil, err
   124  				}
   125  			}
   126  			batch = eng.NewBatch()
   127  			minWallTime = sstTimestamps[i/scaled]
   128  		}
   129  		timestamp := hlc.Timestamp{WallTime: minWallTime + rand.Int63n(int64(batchTimeSpan))}
   130  		value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueBytes))
   131  		value.InitChecksum(key)
   132  		if err := MVCCPut(ctx, batch, nil, key, timestamp, value, nil); err != nil {
   133  			return nil, err
   134  		}
   135  	}
   136  	if err := batch.Commit(false /* sync */); err != nil {
   137  		return nil, err
   138  	}
   139  	batch.Close()
   140  	if err := eng.Flush(); err != nil {
   141  		return nil, err
   142  	}
   143  
   144  	return eng, nil
   145  }
   146  
   147  // setupMVCCData writes up to numVersions values at each of numKeys
   148  // keys. The number of versions written for each key is chosen
   149  // randomly according to a uniform distribution. Each successive
   150  // version is written starting at 5ns and then in 5ns increments. This
   151  // allows scans at various times, starting at t=5ns, and continuing to
   152  // t=5ns*(numVersions+1). A version for each key will be read on every
   153  // such scan, but the dynamics of the scan will change depending on
   154  // the historical timestamp. Earlier timestamps mean scans which must
   155  // skip more historical versions; later timestamps mean scans which
   156  // skip fewer.
   157  //
   158  // The creation of the database is time consuming, especially for larger
   159  // numbers of versions. The database is persisted between runs and stored in
   160  // the current directory as "mvcc_scan_<versions>_<keys>_<valueBytes>" (which
   161  // is also returned).
   162  func setupMVCCData(
   163  	ctx context.Context, b *testing.B, emk engineMaker, opts benchDataOptions,
   164  ) (Engine, string) {
   165  	loc := fmt.Sprintf("mvcc_data_%d_%d_%d", opts.numVersions, opts.numKeys, opts.valueBytes)
   166  	if opts.transactional {
   167  		loc += "_txn"
   168  	}
   169  
   170  	exists := true
   171  	if _, err := os.Stat(loc); os.IsNotExist(err) {
   172  		exists = false
   173  	} else if err != nil {
   174  		b.Fatal(err)
   175  	}
   176  
   177  	eng := emk(b, loc)
   178  
   179  	if exists {
   180  		testutils.ReadAllFiles(filepath.Join(loc, "*"))
   181  		return eng, loc
   182  	}
   183  
   184  	log.Infof(ctx, "creating mvcc data: %s", loc)
   185  
   186  	// Generate the same data every time.
   187  	rng := rand.New(rand.NewSource(1449168817))
   188  
   189  	keys := make([]roachpb.Key, opts.numKeys)
   190  	var order []int
   191  	for i := 0; i < opts.numKeys; i++ {
   192  		keys[i] = roachpb.Key(encoding.EncodeUvarintAscending([]byte("key-"), uint64(i)))
   193  		keyVersions := rng.Intn(opts.numVersions) + 1
   194  		for j := 0; j < keyVersions; j++ {
   195  			order = append(order, i)
   196  		}
   197  	}
   198  
   199  	// Randomize the order in which the keys are written.
   200  	for i, n := 0, len(order); i < n-1; i++ {
   201  		j := i + rng.Intn(n-i)
   202  		order[i], order[j] = order[j], order[i]
   203  	}
   204  
   205  	counts := make([]int, opts.numKeys)
   206  
   207  	var txn *roachpb.Transaction
   208  	if opts.transactional {
   209  		txnCopy := *txn1Commit
   210  		txn = &txnCopy
   211  	}
   212  
   213  	writeKey := func(batch Batch, idx int) {
   214  		key := keys[idx]
   215  		value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, opts.valueBytes))
   216  		value.InitChecksum(key)
   217  		counts[idx]++
   218  		ts := hlc.Timestamp{WallTime: int64(counts[idx] * 5)}
   219  		if txn != nil {
   220  			txn.ReadTimestamp = ts
   221  			txn.WriteTimestamp = ts
   222  		}
   223  		if err := MVCCPut(ctx, batch, nil /* ms */, key, ts, value, txn); err != nil {
   224  			b.Fatal(err)
   225  		}
   226  	}
   227  
   228  	resolveLastIntent := func(batch Batch, idx int) {
   229  		key := keys[idx]
   230  		txnMeta := txn.TxnMeta
   231  		txnMeta.WriteTimestamp = hlc.Timestamp{WallTime: int64(counts[idx]) * 5}
   232  		if _, err := MVCCResolveWriteIntent(ctx, batch, nil /* ms */, roachpb.LockUpdate{
   233  			Span:   roachpb.Span{Key: key},
   234  			Status: roachpb.COMMITTED,
   235  			Txn:    txnMeta,
   236  		}); err != nil {
   237  			b.Fatal(err)
   238  		}
   239  	}
   240  
   241  	batch := eng.NewBatch()
   242  	for i, idx := range order {
   243  		// Output the keys in ~20 batches. If we used a single batch to output all
   244  		// of the keys rocksdb would create a single sstable. We want multiple
   245  		// sstables in order to exercise filtering of which sstables are examined
   246  		// during iterator seeking. We fix the number of batches we output so that
   247  		// optimizations which change the data size result in the same number of
   248  		// sstables.
   249  		if scaled := len(order) / 20; i > 0 && (i%scaled) == 0 {
   250  			log.Infof(ctx, "committing (%d/~%d)", i/scaled, 20)
   251  			if err := batch.Commit(false /* sync */); err != nil {
   252  				b.Fatal(err)
   253  			}
   254  			batch.Close()
   255  			batch = eng.NewBatch()
   256  			if err := eng.Flush(); err != nil {
   257  				b.Fatal(err)
   258  			}
   259  		}
   260  
   261  		if opts.transactional {
   262  			// If we've previously written this key transactionally, we need to
   263  			// resolve the intent we left. We don't do this immediately after writing
   264  			// the key to introduce the possibility that the intent's resolution ends
   265  			// up in a different batch than writing the intent itself. Note that the
   266  			// first time through this loop for any given key we'll attempt to resolve
   267  			// a non-existent intent, but that's OK.
   268  			resolveLastIntent(batch, idx)
   269  		}
   270  		writeKey(batch, idx)
   271  	}
   272  	if opts.transactional {
   273  		// If we were writing transactionally, we need to do one last round of
   274  		// intent resolution. Just stuff it all into the last batch.
   275  		for idx := range keys {
   276  			resolveLastIntent(batch, idx)
   277  		}
   278  	}
   279  	if err := batch.Commit(false /* sync */); err != nil {
   280  		b.Fatal(err)
   281  	}
   282  	batch.Close()
   283  	if err := eng.Flush(); err != nil {
   284  		b.Fatal(err)
   285  	}
   286  
   287  	return eng, loc
   288  }
   289  
   290  type benchScanOptions struct {
   291  	benchDataOptions
   292  	numRows int
   293  	reverse bool
   294  }
   295  
   296  // runMVCCScan first creates test data (and resets the benchmarking
   297  // timer). It then performs b.N MVCCScans in increments of numRows
   298  // keys over all of the data in the Engine instance, restarting at
   299  // the beginning of the keyspace, as many times as necessary.
   300  func runMVCCScan(ctx context.Context, b *testing.B, emk engineMaker, opts benchScanOptions) {
   301  	// Use the same number of keys for all of the mvcc scan
   302  	// benchmarks. Using a different number of keys per test gives
   303  	// preferential treatment to tests with fewer keys. Note that the
   304  	// datasets all fit in cache and the cache is pre-warmed.
   305  	if opts.numKeys != 0 {
   306  		b.Fatal("test error: cannot call runMVCCScan with non-zero numKeys")
   307  	}
   308  	opts.numKeys = 100000
   309  
   310  	eng, _ := setupMVCCData(ctx, b, emk, opts.benchDataOptions)
   311  	defer eng.Close()
   312  
   313  	{
   314  		// Pull all of the sstables into the RocksDB cache in order to make the
   315  		// timings more stable. Otherwise, the first run will be penalized pulling
   316  		// data into the cache while later runs will not.
   317  		iter := eng.NewIterator(IterOptions{UpperBound: roachpb.KeyMax})
   318  		_, _ = iter.ComputeStats(roachpb.KeyMin, roachpb.KeyMax, 0)
   319  		iter.Close()
   320  	}
   321  
   322  	b.SetBytes(int64(opts.numRows * opts.valueBytes))
   323  	b.ResetTimer()
   324  
   325  	startKeyBuf := append(make([]byte, 0, 64), []byte("key-")...)
   326  	endKeyBuf := append(make([]byte, 0, 64), []byte("key-")...)
   327  	for i := 0; i < b.N; i++ {
   328  		// Choose a random key to start scan.
   329  		keyIdx := rand.Int31n(int32(opts.numKeys - opts.numRows))
   330  		startKey := roachpb.Key(encoding.EncodeUvarintAscending(startKeyBuf[:4], uint64(keyIdx)))
   331  		endKey := roachpb.Key(encoding.EncodeUvarintAscending(endKeyBuf[:4], uint64(keyIdx+int32(opts.numRows)-1)))
   332  		endKey = endKey.Next()
   333  		walltime := int64(5 * (rand.Int31n(int32(opts.numVersions)) + 1))
   334  		ts := hlc.Timestamp{WallTime: walltime}
   335  		res, err := MVCCScan(ctx, eng, startKey, endKey, ts, MVCCScanOptions{
   336  			MaxKeys: int64(opts.numRows),
   337  			Reverse: opts.reverse,
   338  		})
   339  		if err != nil {
   340  			b.Fatalf("failed scan: %+v", err)
   341  		}
   342  		if len(res.KVs) != opts.numRows {
   343  			b.Fatalf("failed to scan: %d != %d", len(res.KVs), opts.numRows)
   344  		}
   345  	}
   346  
   347  	b.StopTimer()
   348  }
   349  
   350  // runMVCCGet first creates test data (and resets the benchmarking
   351  // timer). It then performs b.N MVCCGets.
   352  func runMVCCGet(ctx context.Context, b *testing.B, emk engineMaker, opts benchDataOptions) {
   353  	// Use the same number of keys for all of the mvcc scan
   354  	// benchmarks. Using a different number of keys per test gives
   355  	// preferential treatment to tests with fewer keys. Note that the
   356  	// datasets all fit in cache and the cache is pre-warmed.
   357  	if opts.numKeys != 0 {
   358  		b.Fatal("test error: cannot call runMVCCGet with non-zero numKeys")
   359  	}
   360  	opts.numKeys = 100000
   361  
   362  	eng, _ := setupMVCCData(ctx, b, emk, opts)
   363  	defer eng.Close()
   364  
   365  	b.SetBytes(int64(opts.valueBytes))
   366  	b.ResetTimer()
   367  
   368  	keyBuf := append(make([]byte, 0, 64), []byte("key-")...)
   369  	for i := 0; i < b.N; i++ {
   370  		// Choose a random key to retrieve.
   371  		keyIdx := rand.Int31n(int32(opts.numKeys))
   372  		key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(keyIdx)))
   373  		walltime := int64(5 * (rand.Int31n(int32(opts.numVersions)) + 1))
   374  		ts := hlc.Timestamp{WallTime: walltime}
   375  		if v, _, err := MVCCGet(ctx, eng, key, ts, MVCCGetOptions{}); err != nil {
   376  			b.Fatalf("failed get: %+v", err)
   377  		} else if v == nil {
   378  			b.Fatalf("failed get (key not found): %d@%d", keyIdx, walltime)
   379  		} else if valueBytes, err := v.GetBytes(); err != nil {
   380  			b.Fatal(err)
   381  		} else if len(valueBytes) != opts.valueBytes {
   382  			b.Fatalf("unexpected value size: %d", len(valueBytes))
   383  		}
   384  	}
   385  
   386  	b.StopTimer()
   387  }
   388  
   389  func runMVCCPut(ctx context.Context, b *testing.B, emk engineMaker, valueSize int) {
   390  	rng, _ := randutil.NewPseudoRand()
   391  	value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize))
   392  	keyBuf := append(make([]byte, 0, 64), []byte("key-")...)
   393  
   394  	eng := emk(b, fmt.Sprintf("put_%d", valueSize))
   395  	defer eng.Close()
   396  
   397  	b.SetBytes(int64(valueSize))
   398  	b.ResetTimer()
   399  
   400  	for i := 0; i < b.N; i++ {
   401  		key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i)))
   402  		ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()}
   403  		if err := MVCCPut(ctx, eng, nil, key, ts, value, nil); err != nil {
   404  			b.Fatalf("failed put: %+v", err)
   405  		}
   406  	}
   407  
   408  	b.StopTimer()
   409  }
   410  
   411  func runMVCCBlindPut(ctx context.Context, b *testing.B, emk engineMaker, valueSize int) {
   412  	rng, _ := randutil.NewPseudoRand()
   413  	value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize))
   414  	keyBuf := append(make([]byte, 0, 64), []byte("key-")...)
   415  
   416  	eng := emk(b, fmt.Sprintf("put_%d", valueSize))
   417  	defer eng.Close()
   418  
   419  	b.SetBytes(int64(valueSize))
   420  	b.ResetTimer()
   421  
   422  	for i := 0; i < b.N; i++ {
   423  		key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i)))
   424  		ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()}
   425  		if err := MVCCBlindPut(ctx, eng, nil, key, ts, value, nil); err != nil {
   426  			b.Fatalf("failed put: %+v", err)
   427  		}
   428  	}
   429  
   430  	b.StopTimer()
   431  }
   432  
   433  func runMVCCConditionalPut(
   434  	ctx context.Context, b *testing.B, emk engineMaker, valueSize int, createFirst bool,
   435  ) {
   436  	rng, _ := randutil.NewPseudoRand()
   437  	value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize))
   438  	keyBuf := append(make([]byte, 0, 64), []byte("key-")...)
   439  
   440  	eng := emk(b, fmt.Sprintf("cput_%d", valueSize))
   441  	defer eng.Close()
   442  
   443  	b.SetBytes(int64(valueSize))
   444  	var expected *roachpb.Value
   445  	if createFirst {
   446  		for i := 0; i < b.N; i++ {
   447  			key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i)))
   448  			ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()}
   449  			if err := MVCCPut(ctx, eng, nil, key, ts, value, nil); err != nil {
   450  				b.Fatalf("failed put: %+v", err)
   451  			}
   452  		}
   453  		expected = &value
   454  	}
   455  
   456  	b.ResetTimer()
   457  
   458  	for i := 0; i < b.N; i++ {
   459  		key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i)))
   460  		ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()}
   461  		if err := MVCCConditionalPut(ctx, eng, nil, key, ts, value, expected, CPutFailIfMissing, nil); err != nil {
   462  			b.Fatalf("failed put: %+v", err)
   463  		}
   464  	}
   465  
   466  	b.StopTimer()
   467  }
   468  
   469  func runMVCCBlindConditionalPut(ctx context.Context, b *testing.B, emk engineMaker, valueSize int) {
   470  	rng, _ := randutil.NewPseudoRand()
   471  	value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize))
   472  	keyBuf := append(make([]byte, 0, 64), []byte("key-")...)
   473  
   474  	eng := emk(b, fmt.Sprintf("cput_%d", valueSize))
   475  	defer eng.Close()
   476  
   477  	b.SetBytes(int64(valueSize))
   478  	b.ResetTimer()
   479  
   480  	for i := 0; i < b.N; i++ {
   481  		key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i)))
   482  		ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()}
   483  		if err := MVCCBlindConditionalPut(ctx, eng, nil, key, ts, value, nil, CPutFailIfMissing, nil); err != nil {
   484  			b.Fatalf("failed put: %+v", err)
   485  		}
   486  	}
   487  
   488  	b.StopTimer()
   489  }
   490  
   491  func runMVCCInitPut(ctx context.Context, b *testing.B, emk engineMaker, valueSize int) {
   492  	rng, _ := randutil.NewPseudoRand()
   493  	value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize))
   494  	keyBuf := append(make([]byte, 0, 64), []byte("key-")...)
   495  
   496  	eng := emk(b, fmt.Sprintf("iput_%d", valueSize))
   497  	defer eng.Close()
   498  
   499  	b.SetBytes(int64(valueSize))
   500  	b.ResetTimer()
   501  
   502  	for i := 0; i < b.N; i++ {
   503  		key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i)))
   504  		ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()}
   505  		if err := MVCCInitPut(ctx, eng, nil, key, ts, value, false, nil); err != nil {
   506  			b.Fatalf("failed put: %+v", err)
   507  		}
   508  	}
   509  
   510  	b.StopTimer()
   511  }
   512  
   513  func runMVCCBlindInitPut(ctx context.Context, b *testing.B, emk engineMaker, valueSize int) {
   514  	rng, _ := randutil.NewPseudoRand()
   515  	value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize))
   516  	keyBuf := append(make([]byte, 0, 64), []byte("key-")...)
   517  
   518  	eng := emk(b, fmt.Sprintf("iput_%d", valueSize))
   519  	defer eng.Close()
   520  
   521  	b.SetBytes(int64(valueSize))
   522  	b.ResetTimer()
   523  
   524  	for i := 0; i < b.N; i++ {
   525  		key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i)))
   526  		ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()}
   527  		if err := MVCCBlindInitPut(ctx, eng, nil, key, ts, value, false, nil); err != nil {
   528  			b.Fatalf("failed put: %+v", err)
   529  		}
   530  	}
   531  
   532  	b.StopTimer()
   533  }
   534  
   535  func runMVCCBatchPut(ctx context.Context, b *testing.B, emk engineMaker, valueSize, batchSize int) {
   536  	rng, _ := randutil.NewPseudoRand()
   537  	value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize))
   538  	keyBuf := append(make([]byte, 0, 64), []byte("key-")...)
   539  
   540  	eng := emk(b, fmt.Sprintf("batch_put_%d_%d", valueSize, batchSize))
   541  	defer eng.Close()
   542  
   543  	b.SetBytes(int64(valueSize))
   544  	b.ResetTimer()
   545  
   546  	for i := 0; i < b.N; i += batchSize {
   547  		end := i + batchSize
   548  		if end > b.N {
   549  			end = b.N
   550  		}
   551  
   552  		batch := eng.NewBatch()
   553  
   554  		for j := i; j < end; j++ {
   555  			key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(j)))
   556  			ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()}
   557  			if err := MVCCPut(ctx, batch, nil, key, ts, value, nil); err != nil {
   558  				b.Fatalf("failed put: %+v", err)
   559  			}
   560  		}
   561  
   562  		if err := batch.Commit(false /* sync */); err != nil {
   563  			b.Fatal(err)
   564  		}
   565  
   566  		batch.Close()
   567  	}
   568  
   569  	b.StopTimer()
   570  }
   571  
   572  // Benchmark batch time series merge operations. This benchmark does not
   573  // perform any reads and is only used to measure the cost of the periodic time
   574  // series updates.
   575  func runMVCCBatchTimeSeries(ctx context.Context, b *testing.B, emk engineMaker, batchSize int) {
   576  	// Precompute keys so we don't waste time formatting them at each iteration.
   577  	numKeys := batchSize
   578  	keys := make([]roachpb.Key, numKeys)
   579  	for i := 0; i < numKeys; i++ {
   580  		keys[i] = roachpb.Key(fmt.Sprintf("key-%d", i))
   581  	}
   582  
   583  	// We always write the same time series data (containing a single unchanging
   584  	// sample). This isn't realistic but is fine because we're never reading the
   585  	// data.
   586  	var value roachpb.Value
   587  	if err := value.SetProto(&roachpb.InternalTimeSeriesData{
   588  		StartTimestampNanos: 0,
   589  		SampleDurationNanos: 1000,
   590  		Samples: []roachpb.InternalTimeSeriesSample{
   591  			{Offset: 0, Count: 1, Sum: 5.0},
   592  		},
   593  	}); err != nil {
   594  		b.Fatal(err)
   595  	}
   596  
   597  	eng := emk(b, fmt.Sprintf("batch_merge_%d", batchSize))
   598  	defer eng.Close()
   599  
   600  	b.ResetTimer()
   601  
   602  	var ts hlc.Timestamp
   603  	for i := 0; i < b.N; i++ {
   604  		batch := eng.NewBatch()
   605  
   606  		for j := 0; j < batchSize; j++ {
   607  			ts.Logical++
   608  			if err := MVCCMerge(ctx, batch, nil, keys[j], ts, value); err != nil {
   609  				b.Fatalf("failed put: %+v", err)
   610  			}
   611  		}
   612  
   613  		if err := batch.Commit(false /* sync */); err != nil {
   614  			b.Fatal(err)
   615  		}
   616  		batch.Close()
   617  	}
   618  
   619  	b.StopTimer()
   620  }
   621  
   622  // runMVCCMerge merges value into numKeys separate keys.
   623  func runMVCCMerge(
   624  	ctx context.Context, b *testing.B, emk engineMaker, value *roachpb.Value, numKeys int,
   625  ) {
   626  	eng := emk(b, fmt.Sprintf("merge_%d", numKeys))
   627  	defer eng.Close()
   628  
   629  	// Precompute keys so we don't waste time formatting them at each iteration.
   630  	keys := make([]roachpb.Key, numKeys)
   631  	for i := 0; i < numKeys; i++ {
   632  		keys[i] = roachpb.Key(fmt.Sprintf("key-%d", i))
   633  	}
   634  
   635  	b.ResetTimer()
   636  
   637  	ts := hlc.Timestamp{}
   638  	// Use parallelism if specified when test is run.
   639  	b.RunParallel(func(pb *testing.PB) {
   640  		for pb.Next() {
   641  			ms := enginepb.MVCCStats{}
   642  			ts.Logical++
   643  			err := MVCCMerge(ctx, eng, &ms, keys[rand.Intn(numKeys)], ts, *value)
   644  			if err != nil {
   645  				b.Fatal(err)
   646  			}
   647  		}
   648  	})
   649  	b.StopTimer()
   650  
   651  	// Read values out to force merge.
   652  	for _, key := range keys {
   653  		val, _, err := MVCCGet(ctx, eng, key, hlc.Timestamp{}, MVCCGetOptions{})
   654  		if err != nil {
   655  			b.Fatal(err)
   656  		} else if val == nil {
   657  			continue
   658  		}
   659  	}
   660  }
   661  
   662  // runMVCCGetMergedValue reads merged values for numKeys separate keys and mergesPerKey
   663  // operands per key.
   664  func runMVCCGetMergedValue(
   665  	ctx context.Context, b *testing.B, emk engineMaker, numKeys, mergesPerKey int,
   666  ) {
   667  	eng := emk(b, fmt.Sprintf("get_merged_%d_%d", numKeys, mergesPerKey))
   668  	defer eng.Close()
   669  
   670  	// Precompute keys so we don't waste time formatting them at each iteration.
   671  	keys := make([]roachpb.Key, numKeys)
   672  	for i := 0; i < numKeys; i++ {
   673  		keys[i] = roachpb.Key(fmt.Sprintf("key-%d", i))
   674  	}
   675  
   676  	timestamp := hlc.Timestamp{}
   677  	for i := 0; i < numKeys; i++ {
   678  		for j := 0; j < mergesPerKey; j++ {
   679  			timeseries := &roachpb.InternalTimeSeriesData{
   680  				StartTimestampNanos: 0,
   681  				SampleDurationNanos: 1000,
   682  				Samples: []roachpb.InternalTimeSeriesSample{
   683  					{Offset: int32(j), Count: 1, Sum: 5.0},
   684  				},
   685  			}
   686  			var value roachpb.Value
   687  			if err := value.SetProto(timeseries); err != nil {
   688  				b.Fatal(err)
   689  			}
   690  			ms := enginepb.MVCCStats{}
   691  			timestamp.Logical++
   692  			err := MVCCMerge(ctx, eng, &ms, keys[i], timestamp, value)
   693  			if err != nil {
   694  				b.Fatal(err)
   695  			}
   696  		}
   697  	}
   698  
   699  	b.ResetTimer()
   700  	for i := 0; i < b.N; i++ {
   701  		_, _, err := MVCCGet(ctx, eng, keys[rand.Intn(numKeys)], timestamp, MVCCGetOptions{})
   702  		if err != nil {
   703  			b.Fatal(err)
   704  		}
   705  	}
   706  	b.StopTimer()
   707  }
   708  
   709  func runMVCCDeleteRange(ctx context.Context, b *testing.B, emk engineMaker, valueBytes int) {
   710  	// 512 KB ranges so the benchmark doesn't take forever
   711  	const rangeBytes = 512 * 1024
   712  	numKeys := rangeBytes / (overhead + valueBytes)
   713  	eng, dir := setupMVCCData(ctx, b, emk, benchDataOptions{
   714  		numVersions: 1,
   715  		numKeys:     numKeys,
   716  		valueBytes:  valueBytes,
   717  	})
   718  	eng.Close()
   719  
   720  	b.SetBytes(rangeBytes)
   721  	b.StopTimer()
   722  	b.ResetTimer()
   723  
   724  	locDirty := dir + "_dirty"
   725  
   726  	for i := 0; i < b.N; i++ {
   727  		if err := os.RemoveAll(locDirty); err != nil {
   728  			b.Fatal(err)
   729  		}
   730  		if err := fileutil.CopyDir(dir, locDirty); err != nil {
   731  			b.Fatal(err)
   732  		}
   733  		func() {
   734  			eng := emk(b, locDirty)
   735  			defer eng.Close()
   736  
   737  			b.StartTimer()
   738  			if _, _, _, err := MVCCDeleteRange(
   739  				ctx,
   740  				eng,
   741  				&enginepb.MVCCStats{},
   742  				roachpb.KeyMin,
   743  				roachpb.KeyMax,
   744  				math.MaxInt64,
   745  				hlc.MaxTimestamp,
   746  				nil,
   747  				false,
   748  			); err != nil {
   749  				b.Fatal(err)
   750  			}
   751  			b.StopTimer()
   752  		}()
   753  	}
   754  }
   755  
   756  func runClearRange(
   757  	ctx context.Context,
   758  	b *testing.B,
   759  	emk engineMaker,
   760  	clearRange func(e Engine, b Batch, start, end MVCCKey) error,
   761  ) {
   762  	const rangeBytes = 64 << 20
   763  	const valueBytes = 92
   764  	numKeys := rangeBytes / (overhead + valueBytes)
   765  	eng, _ := setupMVCCData(ctx, b, emk, benchDataOptions{
   766  		numVersions: 1,
   767  		numKeys:     numKeys,
   768  		valueBytes:  valueBytes,
   769  	})
   770  	defer eng.Close()
   771  
   772  	// It is not currently possible to ClearRange(NilKey, MVCCKeyMax) thanks to a
   773  	// variety of hacks inside of ClearRange that explode if provided the NilKey.
   774  	// So instead we start our ClearRange at the first key that actually exists.
   775  	//
   776  	// TODO(benesch): when those hacks are removed, don't bother computing the
   777  	// first key and simply ClearRange(NilKey, MVCCKeyMax).
   778  	iter := eng.NewIterator(IterOptions{UpperBound: roachpb.KeyMax})
   779  	defer iter.Close()
   780  	iter.SeekGE(NilKey)
   781  	if ok, err := iter.Valid(); !ok {
   782  		b.Fatalf("unable to find first key (err: %v)", err)
   783  	}
   784  	firstKey := iter.Key()
   785  
   786  	b.SetBytes(rangeBytes)
   787  	b.ResetTimer()
   788  
   789  	for i := 0; i < b.N; i++ {
   790  		batch := eng.NewWriteOnlyBatch()
   791  		if err := clearRange(eng, batch, firstKey, MVCCKeyMax); err != nil {
   792  			b.Fatal(err)
   793  		}
   794  		// NB: We don't actually commit the batch here as we don't want to delete
   795  		// the data. Doing so would require repopulating on every iteration of the
   796  		// loop which was ok when ClearRange was slow but now causes the benchmark
   797  		// to take an exceptionally long time since ClearRange is very fast.
   798  		batch.Close()
   799  	}
   800  
   801  	b.StopTimer()
   802  }
   803  
   804  // runMVCCComputeStats benchmarks computing MVCC stats on a 64MB range of data.
   805  func runMVCCComputeStats(ctx context.Context, b *testing.B, emk engineMaker, valueBytes int) {
   806  	const rangeBytes = 64 * 1024 * 1024
   807  	numKeys := rangeBytes / (overhead + valueBytes)
   808  	eng, _ := setupMVCCData(ctx, b, emk, benchDataOptions{
   809  		numVersions: 1,
   810  		numKeys:     numKeys,
   811  		valueBytes:  valueBytes,
   812  	})
   813  	defer eng.Close()
   814  
   815  	b.SetBytes(rangeBytes)
   816  	b.ResetTimer()
   817  
   818  	var stats enginepb.MVCCStats
   819  	var err error
   820  	for i := 0; i < b.N; i++ {
   821  		iter := eng.NewIterator(IterOptions{UpperBound: roachpb.KeyMax})
   822  		stats, err = iter.ComputeStats(roachpb.KeyMin, roachpb.KeyMax, 0)
   823  		iter.Close()
   824  		if err != nil {
   825  			b.Fatal(err)
   826  		}
   827  	}
   828  
   829  	b.StopTimer()
   830  	log.Infof(ctx, "live_bytes: %d", stats.LiveBytes)
   831  }
   832  
   833  // runMVCCCFindSplitKey benchmarks MVCCFindSplitKey on a 64MB range of data.
   834  func runMVCCFindSplitKey(ctx context.Context, b *testing.B, emk engineMaker, valueBytes int) {
   835  	const rangeBytes = 64 * 1024 * 1024
   836  	numKeys := rangeBytes / (overhead + valueBytes)
   837  	eng, _ := setupMVCCData(ctx, b, emk, benchDataOptions{
   838  		numVersions: 1,
   839  		numKeys:     numKeys,
   840  		valueBytes:  valueBytes,
   841  	})
   842  	defer eng.Close()
   843  
   844  	b.SetBytes(rangeBytes)
   845  	b.ResetTimer()
   846  
   847  	var err error
   848  	for i := 0; i < b.N; i++ {
   849  		_, err = MVCCFindSplitKey(ctx, eng, roachpb.RKeyMin,
   850  			roachpb.RKeyMax, rangeBytes/2)
   851  		if err != nil {
   852  			b.Fatal(err)
   853  		}
   854  	}
   855  
   856  	b.StopTimer()
   857  }
   858  
   859  type benchGarbageCollectOptions struct {
   860  	benchDataOptions
   861  	keyBytes       int
   862  	deleteVersions int
   863  }
   864  
   865  func runMVCCGarbageCollect(
   866  	ctx context.Context, b *testing.B, emk engineMaker, opts benchGarbageCollectOptions,
   867  ) {
   868  	rng, _ := randutil.NewPseudoRand()
   869  	eng := emk(b, "mvcc_gc")
   870  	defer eng.Close()
   871  
   872  	ts := hlc.Timestamp{}.Add(time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC).UnixNano(), 0)
   873  	val := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, opts.valueBytes))
   874  
   875  	// We write values at ts+(0,i), set now=ts+(1,0) so that we're ahead of all
   876  	// the writes. This value doesn't matter in practice, as it's used only for
   877  	// stats updates.
   878  	now := ts.Add(1, 0)
   879  
   880  	// Write 'numKeys' of the given 'keySize' and 'valSize' to the given engine.
   881  	// For each key, write 'numVersions' versions, and add a GCRequest_GCKey to
   882  	// the returned slice that affects the oldest 'deleteVersions' versions. The
   883  	// first write for each key will be at `ts`, the second one at `ts+(0,1)`,
   884  	// etc.
   885  	//
   886  	// NB: a real invocation of MVCCGarbageCollect typically has most of the keys
   887  	// in sorted order. Here they will be ordered randomly.
   888  	setup := func() (gcKeys []roachpb.GCRequest_GCKey) {
   889  		batch := eng.NewBatch()
   890  		for i := 0; i < opts.numKeys; i++ {
   891  			key := randutil.RandBytes(rng, opts.keyBytes)
   892  			if opts.deleteVersions > 0 {
   893  				gcKeys = append(gcKeys, roachpb.GCRequest_GCKey{
   894  					Timestamp: ts.Add(0, int32(opts.deleteVersions-1)),
   895  					Key:       key,
   896  				})
   897  			}
   898  			for j := 0; j < opts.numVersions; j++ {
   899  				if err := MVCCPut(ctx, batch, nil /* ms */, key, ts.Add(0, int32(j)), val, nil); err != nil {
   900  					b.Fatal(err)
   901  				}
   902  			}
   903  		}
   904  		if err := batch.Commit(false); err != nil {
   905  			b.Fatal(err)
   906  		}
   907  		batch.Close()
   908  		return gcKeys
   909  	}
   910  
   911  	gcKeys := setup()
   912  
   913  	b.ResetTimer()
   914  	for i := 0; i < b.N; i++ {
   915  		batch := eng.NewWriteOnlyBatch()
   916  		distinct := batch.Distinct()
   917  		if err := MVCCGarbageCollect(ctx, distinct, nil /* ms */, gcKeys, now); err != nil {
   918  			b.Fatal(err)
   919  		}
   920  		distinct.Close()
   921  		batch.Close()
   922  	}
   923  }
   924  
   925  func runBatchApplyBatchRepr(
   926  	ctx context.Context,
   927  	b *testing.B,
   928  	emk engineMaker,
   929  	indexed, sequential bool,
   930  	valueSize, batchSize int,
   931  ) {
   932  	rng, _ := randutil.NewPseudoRand()
   933  	value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize))
   934  	keyBuf := append(make([]byte, 0, 64), []byte("key-")...)
   935  
   936  	eng := emk(b, fmt.Sprintf("batch_apply_batch_repr_%d_%d", valueSize, batchSize))
   937  	defer eng.Close()
   938  
   939  	var repr []byte
   940  	{
   941  		order := make([]int, batchSize)
   942  		for i := range order {
   943  			order[i] = i
   944  		}
   945  		if !sequential {
   946  			rng.Shuffle(len(order), func(i, j int) {
   947  				order[i], order[j] = order[j], order[i]
   948  			})
   949  		}
   950  
   951  		batch := eng.NewWriteOnlyBatch()
   952  		defer batch.Close() // NB: hold open so batch.Repr() doesn't get reused
   953  
   954  		for i := 0; i < batchSize; i++ {
   955  			key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(order[i])))
   956  			ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()}
   957  			if err := MVCCBlindPut(ctx, batch, nil, key, ts, value, nil); err != nil {
   958  				b.Fatal(err)
   959  			}
   960  		}
   961  		repr = batch.Repr()
   962  	}
   963  
   964  	b.SetBytes(int64(len(repr)))
   965  	b.ResetTimer()
   966  
   967  	for i := 0; i < b.N; i++ {
   968  		var batch Batch
   969  		if !indexed {
   970  			batch = eng.NewWriteOnlyBatch()
   971  		} else {
   972  			batch = eng.NewBatch()
   973  		}
   974  		if err := batch.ApplyBatchRepr(repr, false /* sync */); err != nil {
   975  			b.Fatal(err)
   976  		}
   977  		if r, ok := batch.(*rocksDBBatch); ok {
   978  			// Ensure mutations are flushed for RocksDB indexed batches.
   979  			r.flushMutations()
   980  		}
   981  		batch.Close()
   982  	}
   983  
   984  	b.StopTimer()
   985  }
   986  
   987  func runExportToSst(
   988  	ctx context.Context,
   989  	b *testing.B,
   990  	emk engineMaker,
   991  	numKeys int,
   992  	numRevisions int,
   993  	exportAllRevisions bool,
   994  	contention bool,
   995  ) {
   996  	dir, cleanup := testutils.TempDir(b)
   997  	defer cleanup()
   998  	engine := emk(b, dir)
   999  	defer engine.Close()
  1000  
  1001  	batch := engine.NewWriteOnlyBatch()
  1002  	for i := 0; i < numKeys; i++ {
  1003  		key := make([]byte, 16)
  1004  		key = append(key, 'a', 'a', 'a')
  1005  		key = encoding.EncodeUint32Ascending(key, uint32(i))
  1006  
  1007  		for j := 0; j < numRevisions; j++ {
  1008  			err := batch.Put(MVCCKey{Key: key, Timestamp: hlc.Timestamp{WallTime: int64(j + 1), Logical: 0}}, []byte("foobar"))
  1009  			if err != nil {
  1010  				b.Fatal(err)
  1011  			}
  1012  		}
  1013  	}
  1014  	if err := batch.Commit(true); err != nil {
  1015  		b.Fatal(err)
  1016  	}
  1017  	batch.Close()
  1018  	if err := engine.Flush(); err != nil {
  1019  		b.Fatal(err)
  1020  	}
  1021  
  1022  	b.ResetTimer()
  1023  	for i := 0; i < b.N; i++ {
  1024  		startTS := hlc.Timestamp{WallTime: int64(numRevisions / 2)}
  1025  		endTS := hlc.Timestamp{WallTime: int64(numRevisions + 2)}
  1026  		_, _, _, err := engine.ExportToSst(roachpb.KeyMin, roachpb.KeyMax, startTS, endTS, exportAllRevisions, 0 /* targetSize */, 0 /* maxSize */, IterOptions{
  1027  			LowerBound: roachpb.KeyMin,
  1028  			UpperBound: roachpb.KeyMax,
  1029  		})
  1030  		if err != nil {
  1031  			b.Fatal(err)
  1032  		}
  1033  	}
  1034  	b.StopTimer()
  1035  }