github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/storage/bench_test.go (about) 1 // Copyright 2014 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package storage 12 13 import ( 14 "context" 15 "fmt" 16 "math" 17 "math/rand" 18 "os" 19 "path/filepath" 20 "testing" 21 "time" 22 23 "github.com/cockroachdb/cockroach/pkg/base" 24 "github.com/cockroachdb/cockroach/pkg/roachpb" 25 "github.com/cockroachdb/cockroach/pkg/settings/cluster" 26 "github.com/cockroachdb/cockroach/pkg/storage/enginepb" 27 "github.com/cockroachdb/cockroach/pkg/testutils" 28 "github.com/cockroachdb/cockroach/pkg/util/encoding" 29 "github.com/cockroachdb/cockroach/pkg/util/fileutil" 30 "github.com/cockroachdb/cockroach/pkg/util/hlc" 31 "github.com/cockroachdb/cockroach/pkg/util/log" 32 "github.com/cockroachdb/cockroach/pkg/util/randutil" 33 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 34 ) 35 36 const overhead = 48 // Per key/value overhead (empirically determined) 37 38 type engineMaker func(testing.TB, string) Engine 39 40 type benchDataOptions struct { 41 numVersions int 42 numKeys int 43 valueBytes int 44 45 // In transactional mode, data is written by writing and later resolving 46 // intents. In non-transactional mode, data is written directly, without 47 // leaving intents. Transactional mode notably stresses RocksDB deletion 48 // tombstones, as the metadata key is repeatedly written and deleted. 49 // 50 // Both modes are reflective of real workloads. Transactional mode simulates 51 // data that has recently been INSERTed into a table, while non-transactional 52 // mode simulates data that has been RESTOREd or is old enough to have been 53 // fully compacted. 54 transactional bool 55 } 56 57 // loadTestData writes numKeys keys in numBatches separate batches. Keys are 58 // written in order. Every key in a given batch has the same MVCC timestamp; 59 // batch timestamps start at batchTimeSpan and increase in intervals of 60 // batchTimeSpan. 61 // 62 // Importantly, writing keys in order convinces RocksDB to output one SST per 63 // batch, where each SST contains keys of only one timestamp. E.g., writing A,B 64 // at t0 and C at t1 will create two SSTs: one for A,B that only contains keys 65 // at t0, and one for C that only contains keys at t1. Conversely, writing A, C 66 // at t0 and B at t1 would create just one SST that contained A,B,C (due to an 67 // immediate compaction). 68 // 69 // The creation of the database is time consuming, so the caller can choose 70 // whether to use a temporary or permanent location. 71 func loadTestData(dir string, numKeys, numBatches, batchTimeSpan, valueBytes int) (Engine, error) { 72 ctx := context.Background() 73 74 exists := true 75 if _, err := os.Stat(dir); os.IsNotExist(err) { 76 exists = false 77 } 78 79 eng, err := NewRocksDB( 80 RocksDBConfig{ 81 StorageConfig: base.StorageConfig{ 82 Settings: cluster.MakeTestingClusterSettings(), 83 Dir: dir, 84 }, 85 }, 86 RocksDBCache{}, 87 ) 88 if err != nil { 89 return nil, err 90 } 91 92 if exists { 93 testutils.ReadAllFiles(filepath.Join(dir, "*")) 94 return eng, nil 95 } 96 97 log.Infof(context.Background(), "creating test data: %s", dir) 98 99 // Generate the same data every time. 100 rng := rand.New(rand.NewSource(1449168817)) 101 102 keys := make([]roachpb.Key, numKeys) 103 for i := 0; i < numKeys; i++ { 104 keys[i] = roachpb.Key(encoding.EncodeUvarintAscending([]byte("key-"), uint64(i))) 105 } 106 107 sstTimestamps := make([]int64, numBatches) 108 for i := 0; i < len(sstTimestamps); i++ { 109 sstTimestamps[i] = int64((i + 1) * batchTimeSpan) 110 } 111 112 var batch Batch 113 var minWallTime int64 114 for i, key := range keys { 115 if scaled := len(keys) / numBatches; (i % scaled) == 0 { 116 if i > 0 { 117 log.Infof(ctx, "committing (%d/~%d)", i/scaled, numBatches) 118 if err := batch.Commit(false /* sync */); err != nil { 119 return nil, err 120 } 121 batch.Close() 122 if err := eng.Flush(); err != nil { 123 return nil, err 124 } 125 } 126 batch = eng.NewBatch() 127 minWallTime = sstTimestamps[i/scaled] 128 } 129 timestamp := hlc.Timestamp{WallTime: minWallTime + rand.Int63n(int64(batchTimeSpan))} 130 value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueBytes)) 131 value.InitChecksum(key) 132 if err := MVCCPut(ctx, batch, nil, key, timestamp, value, nil); err != nil { 133 return nil, err 134 } 135 } 136 if err := batch.Commit(false /* sync */); err != nil { 137 return nil, err 138 } 139 batch.Close() 140 if err := eng.Flush(); err != nil { 141 return nil, err 142 } 143 144 return eng, nil 145 } 146 147 // setupMVCCData writes up to numVersions values at each of numKeys 148 // keys. The number of versions written for each key is chosen 149 // randomly according to a uniform distribution. Each successive 150 // version is written starting at 5ns and then in 5ns increments. This 151 // allows scans at various times, starting at t=5ns, and continuing to 152 // t=5ns*(numVersions+1). A version for each key will be read on every 153 // such scan, but the dynamics of the scan will change depending on 154 // the historical timestamp. Earlier timestamps mean scans which must 155 // skip more historical versions; later timestamps mean scans which 156 // skip fewer. 157 // 158 // The creation of the database is time consuming, especially for larger 159 // numbers of versions. The database is persisted between runs and stored in 160 // the current directory as "mvcc_scan_<versions>_<keys>_<valueBytes>" (which 161 // is also returned). 162 func setupMVCCData( 163 ctx context.Context, b *testing.B, emk engineMaker, opts benchDataOptions, 164 ) (Engine, string) { 165 loc := fmt.Sprintf("mvcc_data_%d_%d_%d", opts.numVersions, opts.numKeys, opts.valueBytes) 166 if opts.transactional { 167 loc += "_txn" 168 } 169 170 exists := true 171 if _, err := os.Stat(loc); os.IsNotExist(err) { 172 exists = false 173 } else if err != nil { 174 b.Fatal(err) 175 } 176 177 eng := emk(b, loc) 178 179 if exists { 180 testutils.ReadAllFiles(filepath.Join(loc, "*")) 181 return eng, loc 182 } 183 184 log.Infof(ctx, "creating mvcc data: %s", loc) 185 186 // Generate the same data every time. 187 rng := rand.New(rand.NewSource(1449168817)) 188 189 keys := make([]roachpb.Key, opts.numKeys) 190 var order []int 191 for i := 0; i < opts.numKeys; i++ { 192 keys[i] = roachpb.Key(encoding.EncodeUvarintAscending([]byte("key-"), uint64(i))) 193 keyVersions := rng.Intn(opts.numVersions) + 1 194 for j := 0; j < keyVersions; j++ { 195 order = append(order, i) 196 } 197 } 198 199 // Randomize the order in which the keys are written. 200 for i, n := 0, len(order); i < n-1; i++ { 201 j := i + rng.Intn(n-i) 202 order[i], order[j] = order[j], order[i] 203 } 204 205 counts := make([]int, opts.numKeys) 206 207 var txn *roachpb.Transaction 208 if opts.transactional { 209 txnCopy := *txn1Commit 210 txn = &txnCopy 211 } 212 213 writeKey := func(batch Batch, idx int) { 214 key := keys[idx] 215 value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, opts.valueBytes)) 216 value.InitChecksum(key) 217 counts[idx]++ 218 ts := hlc.Timestamp{WallTime: int64(counts[idx] * 5)} 219 if txn != nil { 220 txn.ReadTimestamp = ts 221 txn.WriteTimestamp = ts 222 } 223 if err := MVCCPut(ctx, batch, nil /* ms */, key, ts, value, txn); err != nil { 224 b.Fatal(err) 225 } 226 } 227 228 resolveLastIntent := func(batch Batch, idx int) { 229 key := keys[idx] 230 txnMeta := txn.TxnMeta 231 txnMeta.WriteTimestamp = hlc.Timestamp{WallTime: int64(counts[idx]) * 5} 232 if _, err := MVCCResolveWriteIntent(ctx, batch, nil /* ms */, roachpb.LockUpdate{ 233 Span: roachpb.Span{Key: key}, 234 Status: roachpb.COMMITTED, 235 Txn: txnMeta, 236 }); err != nil { 237 b.Fatal(err) 238 } 239 } 240 241 batch := eng.NewBatch() 242 for i, idx := range order { 243 // Output the keys in ~20 batches. If we used a single batch to output all 244 // of the keys rocksdb would create a single sstable. We want multiple 245 // sstables in order to exercise filtering of which sstables are examined 246 // during iterator seeking. We fix the number of batches we output so that 247 // optimizations which change the data size result in the same number of 248 // sstables. 249 if scaled := len(order) / 20; i > 0 && (i%scaled) == 0 { 250 log.Infof(ctx, "committing (%d/~%d)", i/scaled, 20) 251 if err := batch.Commit(false /* sync */); err != nil { 252 b.Fatal(err) 253 } 254 batch.Close() 255 batch = eng.NewBatch() 256 if err := eng.Flush(); err != nil { 257 b.Fatal(err) 258 } 259 } 260 261 if opts.transactional { 262 // If we've previously written this key transactionally, we need to 263 // resolve the intent we left. We don't do this immediately after writing 264 // the key to introduce the possibility that the intent's resolution ends 265 // up in a different batch than writing the intent itself. Note that the 266 // first time through this loop for any given key we'll attempt to resolve 267 // a non-existent intent, but that's OK. 268 resolveLastIntent(batch, idx) 269 } 270 writeKey(batch, idx) 271 } 272 if opts.transactional { 273 // If we were writing transactionally, we need to do one last round of 274 // intent resolution. Just stuff it all into the last batch. 275 for idx := range keys { 276 resolveLastIntent(batch, idx) 277 } 278 } 279 if err := batch.Commit(false /* sync */); err != nil { 280 b.Fatal(err) 281 } 282 batch.Close() 283 if err := eng.Flush(); err != nil { 284 b.Fatal(err) 285 } 286 287 return eng, loc 288 } 289 290 type benchScanOptions struct { 291 benchDataOptions 292 numRows int 293 reverse bool 294 } 295 296 // runMVCCScan first creates test data (and resets the benchmarking 297 // timer). It then performs b.N MVCCScans in increments of numRows 298 // keys over all of the data in the Engine instance, restarting at 299 // the beginning of the keyspace, as many times as necessary. 300 func runMVCCScan(ctx context.Context, b *testing.B, emk engineMaker, opts benchScanOptions) { 301 // Use the same number of keys for all of the mvcc scan 302 // benchmarks. Using a different number of keys per test gives 303 // preferential treatment to tests with fewer keys. Note that the 304 // datasets all fit in cache and the cache is pre-warmed. 305 if opts.numKeys != 0 { 306 b.Fatal("test error: cannot call runMVCCScan with non-zero numKeys") 307 } 308 opts.numKeys = 100000 309 310 eng, _ := setupMVCCData(ctx, b, emk, opts.benchDataOptions) 311 defer eng.Close() 312 313 { 314 // Pull all of the sstables into the RocksDB cache in order to make the 315 // timings more stable. Otherwise, the first run will be penalized pulling 316 // data into the cache while later runs will not. 317 iter := eng.NewIterator(IterOptions{UpperBound: roachpb.KeyMax}) 318 _, _ = iter.ComputeStats(roachpb.KeyMin, roachpb.KeyMax, 0) 319 iter.Close() 320 } 321 322 b.SetBytes(int64(opts.numRows * opts.valueBytes)) 323 b.ResetTimer() 324 325 startKeyBuf := append(make([]byte, 0, 64), []byte("key-")...) 326 endKeyBuf := append(make([]byte, 0, 64), []byte("key-")...) 327 for i := 0; i < b.N; i++ { 328 // Choose a random key to start scan. 329 keyIdx := rand.Int31n(int32(opts.numKeys - opts.numRows)) 330 startKey := roachpb.Key(encoding.EncodeUvarintAscending(startKeyBuf[:4], uint64(keyIdx))) 331 endKey := roachpb.Key(encoding.EncodeUvarintAscending(endKeyBuf[:4], uint64(keyIdx+int32(opts.numRows)-1))) 332 endKey = endKey.Next() 333 walltime := int64(5 * (rand.Int31n(int32(opts.numVersions)) + 1)) 334 ts := hlc.Timestamp{WallTime: walltime} 335 res, err := MVCCScan(ctx, eng, startKey, endKey, ts, MVCCScanOptions{ 336 MaxKeys: int64(opts.numRows), 337 Reverse: opts.reverse, 338 }) 339 if err != nil { 340 b.Fatalf("failed scan: %+v", err) 341 } 342 if len(res.KVs) != opts.numRows { 343 b.Fatalf("failed to scan: %d != %d", len(res.KVs), opts.numRows) 344 } 345 } 346 347 b.StopTimer() 348 } 349 350 // runMVCCGet first creates test data (and resets the benchmarking 351 // timer). It then performs b.N MVCCGets. 352 func runMVCCGet(ctx context.Context, b *testing.B, emk engineMaker, opts benchDataOptions) { 353 // Use the same number of keys for all of the mvcc scan 354 // benchmarks. Using a different number of keys per test gives 355 // preferential treatment to tests with fewer keys. Note that the 356 // datasets all fit in cache and the cache is pre-warmed. 357 if opts.numKeys != 0 { 358 b.Fatal("test error: cannot call runMVCCGet with non-zero numKeys") 359 } 360 opts.numKeys = 100000 361 362 eng, _ := setupMVCCData(ctx, b, emk, opts) 363 defer eng.Close() 364 365 b.SetBytes(int64(opts.valueBytes)) 366 b.ResetTimer() 367 368 keyBuf := append(make([]byte, 0, 64), []byte("key-")...) 369 for i := 0; i < b.N; i++ { 370 // Choose a random key to retrieve. 371 keyIdx := rand.Int31n(int32(opts.numKeys)) 372 key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(keyIdx))) 373 walltime := int64(5 * (rand.Int31n(int32(opts.numVersions)) + 1)) 374 ts := hlc.Timestamp{WallTime: walltime} 375 if v, _, err := MVCCGet(ctx, eng, key, ts, MVCCGetOptions{}); err != nil { 376 b.Fatalf("failed get: %+v", err) 377 } else if v == nil { 378 b.Fatalf("failed get (key not found): %d@%d", keyIdx, walltime) 379 } else if valueBytes, err := v.GetBytes(); err != nil { 380 b.Fatal(err) 381 } else if len(valueBytes) != opts.valueBytes { 382 b.Fatalf("unexpected value size: %d", len(valueBytes)) 383 } 384 } 385 386 b.StopTimer() 387 } 388 389 func runMVCCPut(ctx context.Context, b *testing.B, emk engineMaker, valueSize int) { 390 rng, _ := randutil.NewPseudoRand() 391 value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize)) 392 keyBuf := append(make([]byte, 0, 64), []byte("key-")...) 393 394 eng := emk(b, fmt.Sprintf("put_%d", valueSize)) 395 defer eng.Close() 396 397 b.SetBytes(int64(valueSize)) 398 b.ResetTimer() 399 400 for i := 0; i < b.N; i++ { 401 key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i))) 402 ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()} 403 if err := MVCCPut(ctx, eng, nil, key, ts, value, nil); err != nil { 404 b.Fatalf("failed put: %+v", err) 405 } 406 } 407 408 b.StopTimer() 409 } 410 411 func runMVCCBlindPut(ctx context.Context, b *testing.B, emk engineMaker, valueSize int) { 412 rng, _ := randutil.NewPseudoRand() 413 value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize)) 414 keyBuf := append(make([]byte, 0, 64), []byte("key-")...) 415 416 eng := emk(b, fmt.Sprintf("put_%d", valueSize)) 417 defer eng.Close() 418 419 b.SetBytes(int64(valueSize)) 420 b.ResetTimer() 421 422 for i := 0; i < b.N; i++ { 423 key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i))) 424 ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()} 425 if err := MVCCBlindPut(ctx, eng, nil, key, ts, value, nil); err != nil { 426 b.Fatalf("failed put: %+v", err) 427 } 428 } 429 430 b.StopTimer() 431 } 432 433 func runMVCCConditionalPut( 434 ctx context.Context, b *testing.B, emk engineMaker, valueSize int, createFirst bool, 435 ) { 436 rng, _ := randutil.NewPseudoRand() 437 value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize)) 438 keyBuf := append(make([]byte, 0, 64), []byte("key-")...) 439 440 eng := emk(b, fmt.Sprintf("cput_%d", valueSize)) 441 defer eng.Close() 442 443 b.SetBytes(int64(valueSize)) 444 var expected *roachpb.Value 445 if createFirst { 446 for i := 0; i < b.N; i++ { 447 key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i))) 448 ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()} 449 if err := MVCCPut(ctx, eng, nil, key, ts, value, nil); err != nil { 450 b.Fatalf("failed put: %+v", err) 451 } 452 } 453 expected = &value 454 } 455 456 b.ResetTimer() 457 458 for i := 0; i < b.N; i++ { 459 key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i))) 460 ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()} 461 if err := MVCCConditionalPut(ctx, eng, nil, key, ts, value, expected, CPutFailIfMissing, nil); err != nil { 462 b.Fatalf("failed put: %+v", err) 463 } 464 } 465 466 b.StopTimer() 467 } 468 469 func runMVCCBlindConditionalPut(ctx context.Context, b *testing.B, emk engineMaker, valueSize int) { 470 rng, _ := randutil.NewPseudoRand() 471 value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize)) 472 keyBuf := append(make([]byte, 0, 64), []byte("key-")...) 473 474 eng := emk(b, fmt.Sprintf("cput_%d", valueSize)) 475 defer eng.Close() 476 477 b.SetBytes(int64(valueSize)) 478 b.ResetTimer() 479 480 for i := 0; i < b.N; i++ { 481 key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i))) 482 ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()} 483 if err := MVCCBlindConditionalPut(ctx, eng, nil, key, ts, value, nil, CPutFailIfMissing, nil); err != nil { 484 b.Fatalf("failed put: %+v", err) 485 } 486 } 487 488 b.StopTimer() 489 } 490 491 func runMVCCInitPut(ctx context.Context, b *testing.B, emk engineMaker, valueSize int) { 492 rng, _ := randutil.NewPseudoRand() 493 value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize)) 494 keyBuf := append(make([]byte, 0, 64), []byte("key-")...) 495 496 eng := emk(b, fmt.Sprintf("iput_%d", valueSize)) 497 defer eng.Close() 498 499 b.SetBytes(int64(valueSize)) 500 b.ResetTimer() 501 502 for i := 0; i < b.N; i++ { 503 key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i))) 504 ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()} 505 if err := MVCCInitPut(ctx, eng, nil, key, ts, value, false, nil); err != nil { 506 b.Fatalf("failed put: %+v", err) 507 } 508 } 509 510 b.StopTimer() 511 } 512 513 func runMVCCBlindInitPut(ctx context.Context, b *testing.B, emk engineMaker, valueSize int) { 514 rng, _ := randutil.NewPseudoRand() 515 value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize)) 516 keyBuf := append(make([]byte, 0, 64), []byte("key-")...) 517 518 eng := emk(b, fmt.Sprintf("iput_%d", valueSize)) 519 defer eng.Close() 520 521 b.SetBytes(int64(valueSize)) 522 b.ResetTimer() 523 524 for i := 0; i < b.N; i++ { 525 key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(i))) 526 ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()} 527 if err := MVCCBlindInitPut(ctx, eng, nil, key, ts, value, false, nil); err != nil { 528 b.Fatalf("failed put: %+v", err) 529 } 530 } 531 532 b.StopTimer() 533 } 534 535 func runMVCCBatchPut(ctx context.Context, b *testing.B, emk engineMaker, valueSize, batchSize int) { 536 rng, _ := randutil.NewPseudoRand() 537 value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize)) 538 keyBuf := append(make([]byte, 0, 64), []byte("key-")...) 539 540 eng := emk(b, fmt.Sprintf("batch_put_%d_%d", valueSize, batchSize)) 541 defer eng.Close() 542 543 b.SetBytes(int64(valueSize)) 544 b.ResetTimer() 545 546 for i := 0; i < b.N; i += batchSize { 547 end := i + batchSize 548 if end > b.N { 549 end = b.N 550 } 551 552 batch := eng.NewBatch() 553 554 for j := i; j < end; j++ { 555 key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(j))) 556 ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()} 557 if err := MVCCPut(ctx, batch, nil, key, ts, value, nil); err != nil { 558 b.Fatalf("failed put: %+v", err) 559 } 560 } 561 562 if err := batch.Commit(false /* sync */); err != nil { 563 b.Fatal(err) 564 } 565 566 batch.Close() 567 } 568 569 b.StopTimer() 570 } 571 572 // Benchmark batch time series merge operations. This benchmark does not 573 // perform any reads and is only used to measure the cost of the periodic time 574 // series updates. 575 func runMVCCBatchTimeSeries(ctx context.Context, b *testing.B, emk engineMaker, batchSize int) { 576 // Precompute keys so we don't waste time formatting them at each iteration. 577 numKeys := batchSize 578 keys := make([]roachpb.Key, numKeys) 579 for i := 0; i < numKeys; i++ { 580 keys[i] = roachpb.Key(fmt.Sprintf("key-%d", i)) 581 } 582 583 // We always write the same time series data (containing a single unchanging 584 // sample). This isn't realistic but is fine because we're never reading the 585 // data. 586 var value roachpb.Value 587 if err := value.SetProto(&roachpb.InternalTimeSeriesData{ 588 StartTimestampNanos: 0, 589 SampleDurationNanos: 1000, 590 Samples: []roachpb.InternalTimeSeriesSample{ 591 {Offset: 0, Count: 1, Sum: 5.0}, 592 }, 593 }); err != nil { 594 b.Fatal(err) 595 } 596 597 eng := emk(b, fmt.Sprintf("batch_merge_%d", batchSize)) 598 defer eng.Close() 599 600 b.ResetTimer() 601 602 var ts hlc.Timestamp 603 for i := 0; i < b.N; i++ { 604 batch := eng.NewBatch() 605 606 for j := 0; j < batchSize; j++ { 607 ts.Logical++ 608 if err := MVCCMerge(ctx, batch, nil, keys[j], ts, value); err != nil { 609 b.Fatalf("failed put: %+v", err) 610 } 611 } 612 613 if err := batch.Commit(false /* sync */); err != nil { 614 b.Fatal(err) 615 } 616 batch.Close() 617 } 618 619 b.StopTimer() 620 } 621 622 // runMVCCMerge merges value into numKeys separate keys. 623 func runMVCCMerge( 624 ctx context.Context, b *testing.B, emk engineMaker, value *roachpb.Value, numKeys int, 625 ) { 626 eng := emk(b, fmt.Sprintf("merge_%d", numKeys)) 627 defer eng.Close() 628 629 // Precompute keys so we don't waste time formatting them at each iteration. 630 keys := make([]roachpb.Key, numKeys) 631 for i := 0; i < numKeys; i++ { 632 keys[i] = roachpb.Key(fmt.Sprintf("key-%d", i)) 633 } 634 635 b.ResetTimer() 636 637 ts := hlc.Timestamp{} 638 // Use parallelism if specified when test is run. 639 b.RunParallel(func(pb *testing.PB) { 640 for pb.Next() { 641 ms := enginepb.MVCCStats{} 642 ts.Logical++ 643 err := MVCCMerge(ctx, eng, &ms, keys[rand.Intn(numKeys)], ts, *value) 644 if err != nil { 645 b.Fatal(err) 646 } 647 } 648 }) 649 b.StopTimer() 650 651 // Read values out to force merge. 652 for _, key := range keys { 653 val, _, err := MVCCGet(ctx, eng, key, hlc.Timestamp{}, MVCCGetOptions{}) 654 if err != nil { 655 b.Fatal(err) 656 } else if val == nil { 657 continue 658 } 659 } 660 } 661 662 // runMVCCGetMergedValue reads merged values for numKeys separate keys and mergesPerKey 663 // operands per key. 664 func runMVCCGetMergedValue( 665 ctx context.Context, b *testing.B, emk engineMaker, numKeys, mergesPerKey int, 666 ) { 667 eng := emk(b, fmt.Sprintf("get_merged_%d_%d", numKeys, mergesPerKey)) 668 defer eng.Close() 669 670 // Precompute keys so we don't waste time formatting them at each iteration. 671 keys := make([]roachpb.Key, numKeys) 672 for i := 0; i < numKeys; i++ { 673 keys[i] = roachpb.Key(fmt.Sprintf("key-%d", i)) 674 } 675 676 timestamp := hlc.Timestamp{} 677 for i := 0; i < numKeys; i++ { 678 for j := 0; j < mergesPerKey; j++ { 679 timeseries := &roachpb.InternalTimeSeriesData{ 680 StartTimestampNanos: 0, 681 SampleDurationNanos: 1000, 682 Samples: []roachpb.InternalTimeSeriesSample{ 683 {Offset: int32(j), Count: 1, Sum: 5.0}, 684 }, 685 } 686 var value roachpb.Value 687 if err := value.SetProto(timeseries); err != nil { 688 b.Fatal(err) 689 } 690 ms := enginepb.MVCCStats{} 691 timestamp.Logical++ 692 err := MVCCMerge(ctx, eng, &ms, keys[i], timestamp, value) 693 if err != nil { 694 b.Fatal(err) 695 } 696 } 697 } 698 699 b.ResetTimer() 700 for i := 0; i < b.N; i++ { 701 _, _, err := MVCCGet(ctx, eng, keys[rand.Intn(numKeys)], timestamp, MVCCGetOptions{}) 702 if err != nil { 703 b.Fatal(err) 704 } 705 } 706 b.StopTimer() 707 } 708 709 func runMVCCDeleteRange(ctx context.Context, b *testing.B, emk engineMaker, valueBytes int) { 710 // 512 KB ranges so the benchmark doesn't take forever 711 const rangeBytes = 512 * 1024 712 numKeys := rangeBytes / (overhead + valueBytes) 713 eng, dir := setupMVCCData(ctx, b, emk, benchDataOptions{ 714 numVersions: 1, 715 numKeys: numKeys, 716 valueBytes: valueBytes, 717 }) 718 eng.Close() 719 720 b.SetBytes(rangeBytes) 721 b.StopTimer() 722 b.ResetTimer() 723 724 locDirty := dir + "_dirty" 725 726 for i := 0; i < b.N; i++ { 727 if err := os.RemoveAll(locDirty); err != nil { 728 b.Fatal(err) 729 } 730 if err := fileutil.CopyDir(dir, locDirty); err != nil { 731 b.Fatal(err) 732 } 733 func() { 734 eng := emk(b, locDirty) 735 defer eng.Close() 736 737 b.StartTimer() 738 if _, _, _, err := MVCCDeleteRange( 739 ctx, 740 eng, 741 &enginepb.MVCCStats{}, 742 roachpb.KeyMin, 743 roachpb.KeyMax, 744 math.MaxInt64, 745 hlc.MaxTimestamp, 746 nil, 747 false, 748 ); err != nil { 749 b.Fatal(err) 750 } 751 b.StopTimer() 752 }() 753 } 754 } 755 756 func runClearRange( 757 ctx context.Context, 758 b *testing.B, 759 emk engineMaker, 760 clearRange func(e Engine, b Batch, start, end MVCCKey) error, 761 ) { 762 const rangeBytes = 64 << 20 763 const valueBytes = 92 764 numKeys := rangeBytes / (overhead + valueBytes) 765 eng, _ := setupMVCCData(ctx, b, emk, benchDataOptions{ 766 numVersions: 1, 767 numKeys: numKeys, 768 valueBytes: valueBytes, 769 }) 770 defer eng.Close() 771 772 // It is not currently possible to ClearRange(NilKey, MVCCKeyMax) thanks to a 773 // variety of hacks inside of ClearRange that explode if provided the NilKey. 774 // So instead we start our ClearRange at the first key that actually exists. 775 // 776 // TODO(benesch): when those hacks are removed, don't bother computing the 777 // first key and simply ClearRange(NilKey, MVCCKeyMax). 778 iter := eng.NewIterator(IterOptions{UpperBound: roachpb.KeyMax}) 779 defer iter.Close() 780 iter.SeekGE(NilKey) 781 if ok, err := iter.Valid(); !ok { 782 b.Fatalf("unable to find first key (err: %v)", err) 783 } 784 firstKey := iter.Key() 785 786 b.SetBytes(rangeBytes) 787 b.ResetTimer() 788 789 for i := 0; i < b.N; i++ { 790 batch := eng.NewWriteOnlyBatch() 791 if err := clearRange(eng, batch, firstKey, MVCCKeyMax); err != nil { 792 b.Fatal(err) 793 } 794 // NB: We don't actually commit the batch here as we don't want to delete 795 // the data. Doing so would require repopulating on every iteration of the 796 // loop which was ok when ClearRange was slow but now causes the benchmark 797 // to take an exceptionally long time since ClearRange is very fast. 798 batch.Close() 799 } 800 801 b.StopTimer() 802 } 803 804 // runMVCCComputeStats benchmarks computing MVCC stats on a 64MB range of data. 805 func runMVCCComputeStats(ctx context.Context, b *testing.B, emk engineMaker, valueBytes int) { 806 const rangeBytes = 64 * 1024 * 1024 807 numKeys := rangeBytes / (overhead + valueBytes) 808 eng, _ := setupMVCCData(ctx, b, emk, benchDataOptions{ 809 numVersions: 1, 810 numKeys: numKeys, 811 valueBytes: valueBytes, 812 }) 813 defer eng.Close() 814 815 b.SetBytes(rangeBytes) 816 b.ResetTimer() 817 818 var stats enginepb.MVCCStats 819 var err error 820 for i := 0; i < b.N; i++ { 821 iter := eng.NewIterator(IterOptions{UpperBound: roachpb.KeyMax}) 822 stats, err = iter.ComputeStats(roachpb.KeyMin, roachpb.KeyMax, 0) 823 iter.Close() 824 if err != nil { 825 b.Fatal(err) 826 } 827 } 828 829 b.StopTimer() 830 log.Infof(ctx, "live_bytes: %d", stats.LiveBytes) 831 } 832 833 // runMVCCCFindSplitKey benchmarks MVCCFindSplitKey on a 64MB range of data. 834 func runMVCCFindSplitKey(ctx context.Context, b *testing.B, emk engineMaker, valueBytes int) { 835 const rangeBytes = 64 * 1024 * 1024 836 numKeys := rangeBytes / (overhead + valueBytes) 837 eng, _ := setupMVCCData(ctx, b, emk, benchDataOptions{ 838 numVersions: 1, 839 numKeys: numKeys, 840 valueBytes: valueBytes, 841 }) 842 defer eng.Close() 843 844 b.SetBytes(rangeBytes) 845 b.ResetTimer() 846 847 var err error 848 for i := 0; i < b.N; i++ { 849 _, err = MVCCFindSplitKey(ctx, eng, roachpb.RKeyMin, 850 roachpb.RKeyMax, rangeBytes/2) 851 if err != nil { 852 b.Fatal(err) 853 } 854 } 855 856 b.StopTimer() 857 } 858 859 type benchGarbageCollectOptions struct { 860 benchDataOptions 861 keyBytes int 862 deleteVersions int 863 } 864 865 func runMVCCGarbageCollect( 866 ctx context.Context, b *testing.B, emk engineMaker, opts benchGarbageCollectOptions, 867 ) { 868 rng, _ := randutil.NewPseudoRand() 869 eng := emk(b, "mvcc_gc") 870 defer eng.Close() 871 872 ts := hlc.Timestamp{}.Add(time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC).UnixNano(), 0) 873 val := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, opts.valueBytes)) 874 875 // We write values at ts+(0,i), set now=ts+(1,0) so that we're ahead of all 876 // the writes. This value doesn't matter in practice, as it's used only for 877 // stats updates. 878 now := ts.Add(1, 0) 879 880 // Write 'numKeys' of the given 'keySize' and 'valSize' to the given engine. 881 // For each key, write 'numVersions' versions, and add a GCRequest_GCKey to 882 // the returned slice that affects the oldest 'deleteVersions' versions. The 883 // first write for each key will be at `ts`, the second one at `ts+(0,1)`, 884 // etc. 885 // 886 // NB: a real invocation of MVCCGarbageCollect typically has most of the keys 887 // in sorted order. Here they will be ordered randomly. 888 setup := func() (gcKeys []roachpb.GCRequest_GCKey) { 889 batch := eng.NewBatch() 890 for i := 0; i < opts.numKeys; i++ { 891 key := randutil.RandBytes(rng, opts.keyBytes) 892 if opts.deleteVersions > 0 { 893 gcKeys = append(gcKeys, roachpb.GCRequest_GCKey{ 894 Timestamp: ts.Add(0, int32(opts.deleteVersions-1)), 895 Key: key, 896 }) 897 } 898 for j := 0; j < opts.numVersions; j++ { 899 if err := MVCCPut(ctx, batch, nil /* ms */, key, ts.Add(0, int32(j)), val, nil); err != nil { 900 b.Fatal(err) 901 } 902 } 903 } 904 if err := batch.Commit(false); err != nil { 905 b.Fatal(err) 906 } 907 batch.Close() 908 return gcKeys 909 } 910 911 gcKeys := setup() 912 913 b.ResetTimer() 914 for i := 0; i < b.N; i++ { 915 batch := eng.NewWriteOnlyBatch() 916 distinct := batch.Distinct() 917 if err := MVCCGarbageCollect(ctx, distinct, nil /* ms */, gcKeys, now); err != nil { 918 b.Fatal(err) 919 } 920 distinct.Close() 921 batch.Close() 922 } 923 } 924 925 func runBatchApplyBatchRepr( 926 ctx context.Context, 927 b *testing.B, 928 emk engineMaker, 929 indexed, sequential bool, 930 valueSize, batchSize int, 931 ) { 932 rng, _ := randutil.NewPseudoRand() 933 value := roachpb.MakeValueFromBytes(randutil.RandBytes(rng, valueSize)) 934 keyBuf := append(make([]byte, 0, 64), []byte("key-")...) 935 936 eng := emk(b, fmt.Sprintf("batch_apply_batch_repr_%d_%d", valueSize, batchSize)) 937 defer eng.Close() 938 939 var repr []byte 940 { 941 order := make([]int, batchSize) 942 for i := range order { 943 order[i] = i 944 } 945 if !sequential { 946 rng.Shuffle(len(order), func(i, j int) { 947 order[i], order[j] = order[j], order[i] 948 }) 949 } 950 951 batch := eng.NewWriteOnlyBatch() 952 defer batch.Close() // NB: hold open so batch.Repr() doesn't get reused 953 954 for i := 0; i < batchSize; i++ { 955 key := roachpb.Key(encoding.EncodeUvarintAscending(keyBuf[:4], uint64(order[i]))) 956 ts := hlc.Timestamp{WallTime: timeutil.Now().UnixNano()} 957 if err := MVCCBlindPut(ctx, batch, nil, key, ts, value, nil); err != nil { 958 b.Fatal(err) 959 } 960 } 961 repr = batch.Repr() 962 } 963 964 b.SetBytes(int64(len(repr))) 965 b.ResetTimer() 966 967 for i := 0; i < b.N; i++ { 968 var batch Batch 969 if !indexed { 970 batch = eng.NewWriteOnlyBatch() 971 } else { 972 batch = eng.NewBatch() 973 } 974 if err := batch.ApplyBatchRepr(repr, false /* sync */); err != nil { 975 b.Fatal(err) 976 } 977 if r, ok := batch.(*rocksDBBatch); ok { 978 // Ensure mutations are flushed for RocksDB indexed batches. 979 r.flushMutations() 980 } 981 batch.Close() 982 } 983 984 b.StopTimer() 985 } 986 987 func runExportToSst( 988 ctx context.Context, 989 b *testing.B, 990 emk engineMaker, 991 numKeys int, 992 numRevisions int, 993 exportAllRevisions bool, 994 contention bool, 995 ) { 996 dir, cleanup := testutils.TempDir(b) 997 defer cleanup() 998 engine := emk(b, dir) 999 defer engine.Close() 1000 1001 batch := engine.NewWriteOnlyBatch() 1002 for i := 0; i < numKeys; i++ { 1003 key := make([]byte, 16) 1004 key = append(key, 'a', 'a', 'a') 1005 key = encoding.EncodeUint32Ascending(key, uint32(i)) 1006 1007 for j := 0; j < numRevisions; j++ { 1008 err := batch.Put(MVCCKey{Key: key, Timestamp: hlc.Timestamp{WallTime: int64(j + 1), Logical: 0}}, []byte("foobar")) 1009 if err != nil { 1010 b.Fatal(err) 1011 } 1012 } 1013 } 1014 if err := batch.Commit(true); err != nil { 1015 b.Fatal(err) 1016 } 1017 batch.Close() 1018 if err := engine.Flush(); err != nil { 1019 b.Fatal(err) 1020 } 1021 1022 b.ResetTimer() 1023 for i := 0; i < b.N; i++ { 1024 startTS := hlc.Timestamp{WallTime: int64(numRevisions / 2)} 1025 endTS := hlc.Timestamp{WallTime: int64(numRevisions + 2)} 1026 _, _, _, err := engine.ExportToSst(roachpb.KeyMin, roachpb.KeyMax, startTS, endTS, exportAllRevisions, 0 /* targetSize */, 0 /* maxSize */, IterOptions{ 1027 LowerBound: roachpb.KeyMin, 1028 UpperBound: roachpb.KeyMax, 1029 }) 1030 if err != nil { 1031 b.Fatal(err) 1032 } 1033 } 1034 b.StopTimer() 1035 }