github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/replay/replay_test.go (about) 1 package replay 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/hex" 7 "fmt" 8 "io" 9 "math/rand" 10 "os" 11 "path/filepath" 12 "sort" 13 "strconv" 14 "strings" 15 "sync" 16 "sync/atomic" 17 "testing" 18 "time" 19 20 "github.com/cockroachdb/datadriven" 21 "github.com/cockroachdb/pebble" 22 "github.com/cockroachdb/pebble/internal/base" 23 "github.com/cockroachdb/pebble/internal/datatest" 24 "github.com/cockroachdb/pebble/internal/humanize" 25 "github.com/cockroachdb/pebble/internal/invariants" 26 "github.com/cockroachdb/pebble/internal/testkeys" 27 "github.com/cockroachdb/pebble/rangekey" 28 "github.com/cockroachdb/pebble/vfs" 29 "github.com/stretchr/testify/require" 30 ) 31 32 func runReplayTest(t *testing.T, path string) { 33 fs := vfs.NewMem() 34 var ctx context.Context 35 var r Runner 36 var ct *datatest.CompactionTracker 37 datadriven.RunTest(t, path, func(t *testing.T, td *datadriven.TestData) string { 38 switch td.Cmd { 39 case "cat": 40 var buf bytes.Buffer 41 for _, arg := range td.CmdArgs { 42 f, err := fs.Open(arg.String()) 43 if err != nil { 44 fmt.Fprintf(&buf, "%s: %s\n", arg, err) 45 continue 46 } 47 io.Copy(&buf, f) 48 require.NoError(t, f.Close()) 49 } 50 return buf.String() 51 case "corpus": 52 for _, arg := range td.CmdArgs { 53 t.Run(fmt.Sprintf("corpus/%s", arg.String()), func(t *testing.T) { 54 collectCorpus(t, fs, arg.String()) 55 }) 56 } 57 return "" 58 case "list-files": 59 return runListFiles(t, fs, td) 60 case "replay": 61 name := td.CmdArgs[0].String() 62 pacerVariant := td.CmdArgs[1].String() 63 var pacer Pacer 64 if pacerVariant == "reference" { 65 pacer = PaceByReferenceReadAmp{} 66 } else if pacerVariant == "fixed" { 67 i, err := strconv.Atoi(td.CmdArgs[2].String()) 68 require.NoError(t, err) 69 pacer = PaceByFixedReadAmp(i) 70 } else { 71 pacer = Unpaced{} 72 } 73 74 // Convert the testdata/replay:235 datadriven command position into 75 // a run directory suffixed with the line number: eg, 'run-235' 76 lineOffset := strings.LastIndexByte(td.Pos, ':') 77 require.Positive(t, lineOffset) 78 runDir := fmt.Sprintf("run-%s", td.Pos[lineOffset+1:]) 79 if err := fs.MkdirAll(runDir, os.ModePerm); err != nil { 80 return err.Error() 81 } 82 83 checkpointDir := fs.PathJoin(name, "checkpoint") 84 ok, err := vfs.Clone(fs, fs, checkpointDir, runDir) 85 if err != nil { 86 return err.Error() 87 } else if !ok { 88 return fmt.Sprintf("%q does not exist", checkpointDir) 89 } 90 91 opts := &pebble.Options{ 92 FS: fs, 93 Comparer: testkeys.Comparer, 94 FormatMajorVersion: pebble.FormatRangeKeys, 95 L0CompactionFileThreshold: 1, 96 } 97 setDefaultExperimentalOpts(opts) 98 ct = datatest.NewCompactionTracker(opts) 99 100 r = Runner{ 101 RunDir: runDir, 102 WorkloadFS: fs, 103 WorkloadPath: name, 104 Pacer: pacer, 105 Opts: opts, 106 } 107 ctx = context.Background() 108 if err := r.Run(ctx); err != nil { 109 return err.Error() 110 } 111 return "" 112 case "scan-keys": 113 var buf bytes.Buffer 114 it, _ := r.d.NewIter(nil) 115 defer it.Close() 116 for valid := it.First(); valid; valid = it.Next() { 117 fmt.Fprintf(&buf, "%s: %s\n", it.Key(), it.Value()) 118 } 119 if err := it.Error(); err != nil { 120 fmt.Fprintln(&buf, err.Error()) 121 } 122 return buf.String() 123 case "tree": 124 return fs.String() 125 case "wait-for-compactions": 126 var target int 127 if len(td.CmdArgs) == 1 { 128 i, err := strconv.Atoi(td.CmdArgs[0].String()) 129 require.NoError(t, err) 130 target = i 131 } 132 ct.WaitForInflightCompactionsToEqual(target) 133 return "" 134 case "wait": 135 m, err := r.Wait() 136 if err != nil { 137 return err.Error() 138 } 139 return fmt.Sprintf("replayed %s in writes", humanize.Bytes.Uint64(m.WriteBytes)) 140 case "close": 141 if err := r.Close(); err != nil { 142 return err.Error() 143 } 144 return "" 145 default: 146 return fmt.Sprintf("unrecognized command %q", td.Cmd) 147 } 148 }) 149 } 150 151 func setDefaultExperimentalOpts(opts *pebble.Options) { 152 opts.Experimental.TableCacheShards = 2 153 } 154 155 func TestReplay(t *testing.T) { 156 runReplayTest(t, "testdata/replay") 157 } 158 159 func TestReplayPaced(t *testing.T) { 160 runReplayTest(t, "testdata/replay_paced") 161 } 162 163 func TestLoadFlushedSSTableKeys(t *testing.T) { 164 var buf bytes.Buffer 165 var diskFileNums []base.DiskFileNum 166 opts := &pebble.Options{ 167 DisableAutomaticCompactions: true, 168 EventListener: &pebble.EventListener{ 169 FlushEnd: func(info pebble.FlushInfo) { 170 for _, tbl := range info.Output { 171 diskFileNums = append(diskFileNums, tbl.FileNum.DiskFileNum()) 172 } 173 }, 174 }, 175 FS: vfs.NewMem(), 176 Comparer: testkeys.Comparer, 177 FormatMajorVersion: pebble.FormatRangeKeys, 178 } 179 d, err := pebble.Open("", opts) 180 require.NoError(t, err) 181 defer d.Close() 182 183 var flushBufs flushBuffers 184 datadriven.RunTest(t, "testdata/flushed_sstable_keys", func(t *testing.T, td *datadriven.TestData) string { 185 switch td.Cmd { 186 case "commit": 187 b := d.NewIndexedBatch() 188 if err := datatest.DefineBatch(td, b); err != nil { 189 return err.Error() 190 } 191 if err := b.Commit(nil); err != nil { 192 return err.Error() 193 } 194 return "" 195 case "flush": 196 if err := d.Flush(); err != nil { 197 return err.Error() 198 } 199 200 b := d.NewBatch() 201 err := loadFlushedSSTableKeys(b, opts.FS, "", diskFileNums, opts.MakeReaderOptions(), &flushBufs) 202 if err != nil { 203 b.Close() 204 return err.Error() 205 } 206 207 br, _ := pebble.ReadBatch(b.Repr()) 208 kind, ukey, v, ok, err := br.Next() 209 for ; ok; kind, ukey, v, ok, err = br.Next() { 210 fmt.Fprintf(&buf, "%s.%s", ukey, kind) 211 switch kind { 212 case base.InternalKeyKindRangeDelete, 213 base.InternalKeyKindRangeKeyDelete: 214 fmt.Fprintf(&buf, "-%s", v) 215 case base.InternalKeyKindSet, 216 base.InternalKeyKindMerge: 217 fmt.Fprintf(&buf, ": %s", v) 218 case base.InternalKeyKindRangeKeySet, base.InternalKeyKindRangeKeyUnset: 219 s, err := rangekey.Decode(base.MakeInternalKey(ukey, 0, kind), v, nil) 220 if err != nil { 221 return err.Error() 222 } 223 if kind == base.InternalKeyKindRangeKeySet { 224 fmt.Fprintf(&buf, "-%s: %s → %s", s.End, s.Keys[0].Suffix, s.Keys[0].Value) 225 } else { 226 fmt.Fprintf(&buf, "-%s: %s", s.End, s.Keys[0].Suffix) 227 } 228 case base.InternalKeyKindDelete, base.InternalKeyKindSingleDelete: 229 default: 230 fmt.Fprintf(&buf, ": %x", v) 231 } 232 fmt.Fprintln(&buf) 233 } 234 if err != nil { 235 fmt.Fprintf(&buf, "err: %s\n", err) 236 } 237 238 s := buf.String() 239 buf.Reset() 240 require.NoError(t, b.Close()) 241 242 diskFileNums = diskFileNums[:0] 243 return s 244 default: 245 return fmt.Sprintf("unrecognized command %q", td.Cmd) 246 } 247 }) 248 } 249 250 func collectCorpus(t *testing.T, fs *vfs.MemFS, name string) { 251 require.NoError(t, fs.RemoveAll("build")) 252 require.NoError(t, fs.MkdirAll("build", os.ModePerm)) 253 254 var d *pebble.DB 255 var wc *WorkloadCollector 256 defer func() { 257 if d != nil { 258 require.NoError(t, d.Close()) 259 } 260 }() 261 datadriven.RunTest(t, filepath.Join("testdata", "corpus", name), func(t *testing.T, td *datadriven.TestData) string { 262 switch td.Cmd { 263 case "commit": 264 b := d.NewBatch() 265 if err := datatest.DefineBatch(td, b); err != nil { 266 return err.Error() 267 } 268 if err := b.Commit(nil); err != nil { 269 return err.Error() 270 } 271 return "" 272 case "flush": 273 require.NoError(t, d.Flush()) 274 return "" 275 case "list-files": 276 if d != nil { 277 d.TestOnlyWaitForCleaning() 278 } 279 return runListFiles(t, fs, td) 280 case "open": 281 wc = NewWorkloadCollector("build") 282 opts := &pebble.Options{ 283 Comparer: testkeys.Comparer, 284 DisableAutomaticCompactions: true, 285 FormatMajorVersion: pebble.FormatRangeKeys, 286 FS: fs, 287 MaxManifestFileSize: 96, 288 } 289 setDefaultExperimentalOpts(opts) 290 wc.Attach(opts) 291 var err error 292 d, err = pebble.Open("build", opts) 293 require.NoError(t, err) 294 return "" 295 case "close": 296 err := d.Close() 297 require.NoError(t, err) 298 d = nil 299 return "" 300 case "start": 301 require.NoError(t, fs.MkdirAll(name, os.ModePerm)) 302 require.NotNil(t, wc) 303 wc.Start(fs, name) 304 require.NoError(t, d.Checkpoint(fs.PathJoin(name, "checkpoint"), pebble.WithFlushedWAL())) 305 return "started" 306 case "stat": 307 var buf bytes.Buffer 308 for _, arg := range td.CmdArgs { 309 fi, err := fs.Stat(arg.String()) 310 if err != nil { 311 fmt.Fprintf(&buf, "%s: %s\n", arg.String(), err) 312 continue 313 } 314 fmt.Fprintf(&buf, "%s:\n", arg.String()) 315 fmt.Fprintf(&buf, " size: %d\n", fi.Size()) 316 } 317 return buf.String() 318 case "stop": 319 wc.mu.Lock() 320 for wc.mu.tablesEnqueued != wc.mu.tablesCopied { 321 wc.mu.copyCond.Wait() 322 } 323 wc.mu.Unlock() 324 wc.Stop() 325 return "stopped" 326 case "tree": 327 return fs.String() 328 case "make-file": 329 dir := td.CmdArgs[0].String() 330 require.NoError(t, fs.MkdirAll(dir, os.ModePerm)) 331 fT := td.CmdArgs[1].String() 332 filePath := fs.PathJoin(dir, td.CmdArgs[2].String()) 333 334 if fT != "file" { 335 fileNumInt, err := strconv.Atoi(td.CmdArgs[2].String()) 336 require.NoError(t, err) 337 fileNum := base.FileNum(fileNumInt) 338 switch fT { 339 case "table": 340 filePath = base.MakeFilepath(fs, dir, base.FileTypeTable, fileNum.DiskFileNum()) 341 case "log": 342 filePath = base.MakeFilepath(fs, dir, base.FileTypeLog, fileNum.DiskFileNum()) 343 case "manifest": 344 filePath = base.MakeFilepath(fs, dir, base.FileTypeManifest, fileNum.DiskFileNum()) 345 } 346 } 347 f, err := fs.Create(filePath) 348 require.NoError(t, err) 349 b, err := hex.DecodeString(strings.ReplaceAll(td.Input, "\n", "")) 350 require.NoError(t, err) 351 _, err = f.Write(b) 352 require.NoError(t, err) 353 return "created" 354 case "find-workload-files": 355 var buf bytes.Buffer 356 dir := td.CmdArgs[0].String() 357 m, s, err := findWorkloadFiles(dir, fs) 358 359 fmt.Fprintln(&buf, "manifests") 360 sort.Strings(m) 361 for _, elem := range m { 362 fmt.Fprintf(&buf, " %s\n", elem) 363 } 364 var res []string 365 for key := range s { 366 res = append(res, key.String()) 367 } 368 sort.Strings(res) 369 370 fmt.Fprintln(&buf, "sstables") 371 for _, elem := range res { 372 fmt.Fprintf(&buf, " %s\n", elem) 373 } 374 fmt.Fprintln(&buf, "error") 375 if err != nil { 376 fmt.Fprintf(&buf, " %s\n", err.Error()) 377 } 378 return buf.String() 379 case "find-manifest-start": 380 var buf bytes.Buffer 381 dir := td.CmdArgs[0].String() 382 m, _, err := findWorkloadFiles(dir, fs) 383 sort.Strings(m) 384 require.NoError(t, err) 385 i, o, err := findManifestStart(dir, fs, m) 386 errString := "nil" 387 if err != nil { 388 errString = err.Error() 389 } 390 fmt.Fprintf(&buf, "index: %d, offset: %d, error: %s\n", i, o, errString) 391 return buf.String() 392 case "delete-all": 393 err := fs.RemoveAll(td.CmdArgs[0].String()) 394 if err != nil { 395 return err.Error() 396 } 397 return "" 398 default: 399 return fmt.Sprintf("unrecognized command %q", td.Cmd) 400 } 401 }) 402 } 403 404 func TestCollectCorpus(t *testing.T) { 405 fs := vfs.NewMem() 406 datadriven.Walk(t, "testdata/corpus", func(t *testing.T, path string) { 407 collectCorpus(t, fs, filepath.Base(path)) 408 fs = vfs.NewMem() 409 }) 410 } 411 412 func runListFiles(t *testing.T, fs vfs.FS, td *datadriven.TestData) string { 413 var buf bytes.Buffer 414 for _, arg := range td.CmdArgs { 415 listFiles(t, fs, &buf, arg.String()) 416 } 417 return buf.String() 418 } 419 420 func TestBenchmarkString(t *testing.T) { 421 m := Metrics{ 422 Final: &pebble.Metrics{}, 423 EstimatedDebt: SampledMetric{samples: []sample{{value: 5 << 25}}}, 424 PaceDuration: time.Second / 4, 425 QuiesceDuration: time.Second / 2, 426 ReadAmp: SampledMetric{samples: []sample{{value: 10}}}, 427 TombstoneCount: SampledMetric{samples: []sample{{value: 295}}}, 428 TotalSize: SampledMetric{samples: []sample{{value: 5 << 30}}}, 429 TotalWriteAmp: 5.6, 430 WorkloadDuration: time.Second, 431 WriteBytes: 30 * (1 << 20), 432 WriteStalls: map[string]int{"memtable": 1, "L0": 2}, 433 WriteStallsDuration: map[string]time.Duration{"memtable": time.Minute, "L0": time.Hour}, 434 } 435 m.Ingest.BytesIntoL0 = 5 << 20 436 m.Ingest.BytesWeightedByLevel = 9 << 20 437 438 var buf bytes.Buffer 439 require.NoError(t, m.WriteBenchmarkString("tpcc", &buf)) 440 require.Equal(t, strings.TrimSpace(` 441 BenchmarkBenchmarkReplay/tpcc/CompactionCounts 1 0 compactions 0 default 0 delete 0 elision 0 move 0 read 0 rewrite 0 multilevel 442 BenchmarkBenchmarkReplay/tpcc/DatabaseSize/mean 1 5.36870912e+09 bytes 443 BenchmarkBenchmarkReplay/tpcc/DatabaseSize/max 1 5.36870912e+09 bytes 444 BenchmarkBenchmarkReplay/tpcc/DurationWorkload 1 1 sec/op 445 BenchmarkBenchmarkReplay/tpcc/DurationQuiescing 1 0.5 sec/op 446 BenchmarkBenchmarkReplay/tpcc/DurationPaceDelay 1 0.25 sec/op 447 BenchmarkBenchmarkReplay/tpcc/EstimatedDebt/mean 1 1.6777216e+08 bytes 448 BenchmarkBenchmarkReplay/tpcc/EstimatedDebt/max 1 1.6777216e+08 bytes 449 BenchmarkBenchmarkReplay/tpcc/FlushUtilization 1 0 util 450 BenchmarkBenchmarkReplay/tpcc/IngestedIntoL0 1 5.24288e+06 bytes 451 BenchmarkBenchmarkReplay/tpcc/IngestWeightedByLevel 1 9.437184e+06 bytes 452 BenchmarkBenchmarkReplay/tpcc/ReadAmp/mean 1 10 files 453 BenchmarkBenchmarkReplay/tpcc/ReadAmp/max 1 10 files 454 BenchmarkBenchmarkReplay/tpcc/TombstoneCount/mean 1 295 tombstones 455 BenchmarkBenchmarkReplay/tpcc/TombstoneCount/max 1 295 tombstones 456 BenchmarkBenchmarkReplay/tpcc/Throughput 1 2.097152e+07 B/s 457 BenchmarkBenchmarkReplay/tpcc/WriteAmp 1 5.6 wamp 458 BenchmarkBenchmarkReplay/tpcc/WriteStall/L0 1 2 stalls 3600 stallsec/op 459 BenchmarkBenchmarkReplay/tpcc/WriteStall/memtable 1 1 stalls 60 stallsec/op`), 460 strings.TrimSpace(buf.String())) 461 } 462 463 func listFiles(t *testing.T, fs vfs.FS, w io.Writer, name string) { 464 ls, err := fs.List(name) 465 if err != nil { 466 fmt.Fprintf(w, "%s: %s\n", name, err) 467 return 468 } 469 sort.Strings(ls) 470 fmt.Fprintf(w, "%s:\n", name) 471 for _, dirent := range ls { 472 fmt.Fprintf(w, " %s\n", dirent) 473 } 474 } 475 476 // TestCompactionsQuiesce replays a workload that produces a nontrivial number of 477 // compactions several times. It's intended to exercise Waits termination, which 478 // is dependent on compactions quiescing. 479 func TestCompactionsQuiesce(t *testing.T) { 480 const replayCount = 1 481 workloadFS := getHeavyWorkload(t) 482 fs := vfs.NewMem() 483 var done [replayCount]atomic.Bool 484 for i := 0; i < replayCount; i++ { 485 func(i int) { 486 runDir := fmt.Sprintf("run%d", i) 487 require.NoError(t, fs.MkdirAll(runDir, os.ModePerm)) 488 r := Runner{ 489 RunDir: runDir, 490 WorkloadFS: workloadFS, 491 WorkloadPath: "workload", 492 Pacer: Unpaced{}, 493 Opts: &pebble.Options{ 494 Comparer: testkeys.Comparer, 495 FS: fs, 496 FormatMajorVersion: pebble.FormatNewest, 497 LBaseMaxBytes: 1, 498 }, 499 } 500 r.Opts.Experimental.LevelMultiplier = 2 501 require.NoError(t, r.Run(context.Background())) 502 defer r.Close() 503 504 var m Metrics 505 var err error 506 go func() { 507 m, err = r.Wait() 508 done[i].Store(true) 509 }() 510 511 wait := 30 * time.Second 512 if invariants.Enabled { 513 wait = time.Minute 514 } 515 516 // The above call to [Wait] should eventually return. [Wait] blocks 517 // until the workload has replayed AND compactions have quiesced. A 518 // bug in either could prevent [Wait] from ever returning. 519 require.Eventually(t, func() bool { return done[i].Load() }, 520 wait, time.Millisecond, "(*replay.Runner).Wait didn't terminate") 521 require.NoError(t, err) 522 // Require at least 5 compactions. 523 require.Greater(t, m.Final.Compact.Count, int64(5)) 524 require.Equal(t, int64(0), m.Final.Compact.NumInProgress) 525 for l := 0; l < len(m.Final.Levels)-1; l++ { 526 require.Less(t, m.Final.Levels[l].Score, 1.0) 527 } 528 }(i) 529 } 530 } 531 532 // getHeavyWorkload returns a FS containing a workload in the `workload` 533 // directory that flushes enough randomly generated keys that replaying it 534 // should generate a non-trivial number of compactions. 535 func getHeavyWorkload(t *testing.T) vfs.FS { 536 heavyWorkload.Once.Do(func() { 537 t.Run("buildHeavyWorkload", func(t *testing.T) { 538 heavyWorkload.fs = buildHeavyWorkload(t) 539 }) 540 }) 541 return heavyWorkload.fs 542 } 543 544 var heavyWorkload struct { 545 sync.Once 546 fs vfs.FS 547 } 548 549 func buildHeavyWorkload(t *testing.T) vfs.FS { 550 o := &pebble.Options{ 551 Comparer: testkeys.Comparer, 552 FS: vfs.NewMem(), 553 FormatMajorVersion: pebble.FormatNewest, 554 } 555 wc := NewWorkloadCollector("") 556 wc.Attach(o) 557 d, err := pebble.Open("", o) 558 require.NoError(t, err) 559 560 destFS := vfs.NewMem() 561 require.NoError(t, destFS.MkdirAll("workload", os.ModePerm)) 562 wc.Start(destFS, "workload") 563 564 ks := testkeys.Alpha(5) 565 var bufKey = make([]byte, ks.MaxLen()) 566 var bufVal [512]byte 567 rng := rand.New(rand.NewSource(time.Now().UnixNano())) 568 for i := 0; i < 100; i++ { 569 b := d.NewBatch() 570 for j := 0; j < 1000; j++ { 571 rng.Read(bufVal[:]) 572 n := testkeys.WriteKey(bufKey[:], ks, rng.Int63n(ks.Count())) 573 require.NoError(t, b.Set(bufKey[:n], bufVal[:], pebble.NoSync)) 574 } 575 require.NoError(t, b.Commit(pebble.NoSync)) 576 require.NoError(t, d.Flush()) 577 } 578 wc.WaitAndStop() 579 580 defer d.Close() 581 return destFS 582 }