github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/storageccl/engineccl/bench_test.go (about)

     1  // Copyright 2017 The Cockroach Authors.
     2  //
     3  // Licensed as a CockroachDB Enterprise file under the Cockroach Community
     4  // License (the "License"); you may not use this file except in compliance with
     5  // the License. You may obtain a copy of the License at
     6  //
     7  //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
     8  
     9  package engineccl
    10  
    11  import (
    12  	"context"
    13  	"fmt"
    14  	"math/rand"
    15  	"os"
    16  	"path/filepath"
    17  	"testing"
    18  
    19  	"github.com/cockroachdb/cockroach/pkg/base"
    20  	"github.com/cockroachdb/cockroach/pkg/roachpb"
    21  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    22  	"github.com/cockroachdb/cockroach/pkg/storage"
    23  	"github.com/cockroachdb/cockroach/pkg/testutils"
    24  	"github.com/cockroachdb/cockroach/pkg/util/encoding"
    25  	"github.com/cockroachdb/cockroach/pkg/util/hlc"
    26  	"github.com/cockroachdb/cockroach/pkg/util/log"
    27  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    28  )
    29  
    30  // loadTestData writes numKeys keys in numBatches separate batches. Keys are
    31  // written in order. Every key in a given batch has the same MVCC timestamp;
    32  // batch timestamps start at batchTimeSpan and increase in intervals of
    33  // batchTimeSpan.
    34  //
    35  // Importantly, writing keys in order convinces RocksDB to output one SST per
    36  // batch, where each SST contains keys of only one timestamp. E.g., writing A,B
    37  // at t0 and C at t1 will create two SSTs: one for A,B that only contains keys
    38  // at t0, and one for C that only contains keys at t1. Conversely, writing A, C
    39  // at t0 and B at t1 would create just one SST that contained A,B,C (due to an
    40  // immediate compaction).
    41  //
    42  // The creation of the database is time consuming, so the caller can choose
    43  // whether to use a temporary or permanent location.
    44  func loadTestData(
    45  	dir string, numKeys, numBatches, batchTimeSpan, valueBytes int,
    46  ) (storage.Engine, error) {
    47  	ctx := context.Background()
    48  
    49  	exists := true
    50  	if _, err := os.Stat(dir); os.IsNotExist(err) {
    51  		exists = false
    52  	}
    53  
    54  	eng, err := storage.NewRocksDB(
    55  		storage.RocksDBConfig{
    56  			StorageConfig: base.StorageConfig{
    57  				Settings: cluster.MakeTestingClusterSettings(),
    58  				Dir:      dir,
    59  			},
    60  		},
    61  		storage.RocksDBCache{},
    62  	)
    63  	if err != nil {
    64  		return nil, err
    65  	}
    66  
    67  	if exists {
    68  		testutils.ReadAllFiles(filepath.Join(dir, "*"))
    69  		return eng, nil
    70  	}
    71  
    72  	log.Infof(context.Background(), "creating test data: %s", dir)
    73  
    74  	// Generate the same data every time.
    75  	rng := rand.New(rand.NewSource(1449168817))
    76  
    77  	keys := make([]roachpb.Key, numKeys)
    78  	for i := 0; i < numKeys; i++ {
    79  		keys[i] = roachpb.Key(encoding.EncodeUvarintAscending([]byte("key-"), uint64(i)))
    80  	}
    81  
    82  	sstTimestamps := make([]int64, numBatches)
    83  	for i := 0; i < len(sstTimestamps); i++ {
    84  		sstTimestamps[i] = int64((i + 1) * batchTimeSpan)
    85  	}
    86  
    87  	var batch storage.Batch
    88  	var minWallTime int64
    89  	for i, key := range keys {
    90  		if scaled := len(keys) / numBatches; (i % scaled) == 0 {
    91  			if i > 0 {
    92  				log.Infof(ctx, "committing (%d/~%d)", i/scaled, numBatches)
    93  				if err := batch.Commit(false /* sync */); err != nil {
    94  					return nil, err
    95  				}
    96  				batch.Close()
    97  				if err := eng.Flush(); err != nil {
    98  					return nil, err
    99  				}
   100  			}
   101  			batch = eng.NewBatch()
   102  			minWallTime = sstTimestamps[i/scaled]
   103  		}
   104  		timestamp := hlc.Timestamp{WallTime: minWallTime + rand.Int63n(int64(batchTimeSpan))}
   105  		value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueBytes))
   106  		value.InitChecksum(key)
   107  		if err := storage.MVCCPut(ctx, batch, nil, key, timestamp, value, nil); err != nil {
   108  			return nil, err
   109  		}
   110  	}
   111  	if err := batch.Commit(false /* sync */); err != nil {
   112  		return nil, err
   113  	}
   114  	batch.Close()
   115  	if err := eng.Flush(); err != nil {
   116  		return nil, err
   117  	}
   118  
   119  	return eng, nil
   120  }
   121  
   122  // runIterate benchmarks iteration over the entire keyspace within time bounds
   123  // derived by the loadFactor. A loadFactor of 0.5 means that approximately 50%
   124  // of the SSTs contain keys in the range [startTime, endTime].
   125  func runIterate(
   126  	b *testing.B,
   127  	loadFactor float32,
   128  	makeIterator func(storage.Engine, hlc.Timestamp, hlc.Timestamp) storage.Iterator,
   129  ) {
   130  	const numKeys = 100000
   131  	const numBatches = 100
   132  	const batchTimeSpan = 10
   133  	const valueBytes = 512
   134  
   135  	// Store the database in this directory so we don't have to regenerate it on
   136  	// each benchmark run.
   137  	eng, err := loadTestData("mvcc_data", numKeys, numBatches, batchTimeSpan, valueBytes)
   138  	if err != nil {
   139  		b.Fatal(err)
   140  	}
   141  	defer eng.Close()
   142  
   143  	b.SetBytes(int64(numKeys * valueBytes))
   144  	b.ResetTimer()
   145  
   146  	for i := 0; i < b.N; i++ {
   147  		n := 0
   148  		startTime := hlc.MinTimestamp
   149  		endTime := hlc.Timestamp{WallTime: int64(loadFactor * numBatches * batchTimeSpan)}
   150  		it := makeIterator(eng, startTime, endTime)
   151  		defer it.Close()
   152  		for it.SeekGE(storage.MVCCKey{}); ; it.Next() {
   153  			if ok, err := it.Valid(); !ok {
   154  				if err != nil {
   155  					b.Fatal(err)
   156  				}
   157  				break
   158  			}
   159  			n++
   160  		}
   161  		if e := int(loadFactor * numKeys); n < e {
   162  			b.Fatalf("expected at least %d keys, but got %d\n", e, n)
   163  		}
   164  	}
   165  
   166  	b.StopTimer()
   167  }
   168  
   169  func BenchmarkTimeBoundIterate(b *testing.B) {
   170  	for _, loadFactor := range []float32{1.0, 0.5, 0.1, 0.05, 0.0} {
   171  		b.Run(fmt.Sprintf("LoadFactor=%.2f", loadFactor), func(b *testing.B) {
   172  			b.Run("NormalIterator", func(b *testing.B) {
   173  				runIterate(b, loadFactor, func(e storage.Engine, _, _ hlc.Timestamp) storage.Iterator {
   174  					return e.NewIterator(storage.IterOptions{UpperBound: roachpb.KeyMax})
   175  				})
   176  			})
   177  			b.Run("TimeBoundIterator", func(b *testing.B) {
   178  				runIterate(b, loadFactor, func(e storage.Engine, startTime, endTime hlc.Timestamp) storage.Iterator {
   179  					return e.NewIterator(storage.IterOptions{
   180  						MinTimestampHint: startTime,
   181  						MaxTimestampHint: endTime,
   182  						UpperBound:       roachpb.KeyMax,
   183  					})
   184  				})
   185  			})
   186  		})
   187  	}
   188  }