github.com/cockroachdb/pebble@v1.1.2/metamorphic/meta.go (about) 1 // Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 // Package metamorphic provides a testing framework for running randomized tests 6 // over multiple Pebble databases with varying configurations. Logically 7 // equivalent operations should result in equivalent output across all 8 // configurations. 9 package metamorphic 10 11 import ( 12 "context" 13 "fmt" 14 "io" 15 "os" 16 "os/exec" 17 "path" 18 "path/filepath" 19 "regexp" 20 "sort" 21 "testing" 22 "time" 23 24 "github.com/cockroachdb/pebble/internal/base" 25 "github.com/cockroachdb/pebble/internal/randvar" 26 "github.com/cockroachdb/pebble/internal/testkeys" 27 "github.com/cockroachdb/pebble/vfs" 28 "github.com/cockroachdb/pebble/vfs/errorfs" 29 "github.com/pmezard/go-difflib/difflib" 30 "github.com/stretchr/testify/require" 31 "golang.org/x/exp/rand" 32 "golang.org/x/sync/errgroup" 33 ) 34 35 type runAndCompareOptions struct { 36 seed uint64 37 ops randvar.Static 38 previousOpsPath string 39 initialStatePath string 40 initialStateDesc string 41 traceFile string 42 innerBinary string 43 mutateTestOptions []func(*TestOptions) 44 customRuns map[string]string 45 runOnceOptions 46 } 47 48 // A RunOption configures the behavior of RunAndCompare. 49 type RunOption interface { 50 apply(*runAndCompareOptions) 51 } 52 53 // Seed configures generation to use the provided seed. Seed may be used to 54 // deterministically reproduce the same run. 55 type Seed uint64 56 57 func (s Seed) apply(ro *runAndCompareOptions) { ro.seed = uint64(s) } 58 59 // ExtendPreviousRun configures RunAndCompare to use the output of a previous 60 // metamorphic test run to seed the this run. It's used in the crossversion 61 // metamorphic tests, in which a data directory is upgraded through multiple 62 // versions of Pebble, exercising upgrade code paths and cross-version 63 // compatibility. 64 // 65 // The opsPath should be the filesystem path to the ops file containing the 66 // operations run within the previous iteration of the metamorphic test. It's 67 // used to inform operation generation to prefer using keys used in the previous 68 // run, which are therefore more likely to be "interesting." 69 // 70 // The initialStatePath argument should be the filesystem path to the data 71 // directory containing the database where the previous run of the metamorphic 72 // test left off. 73 // 74 // The initialStateDesc argument is presentational and should hold a 75 // human-readable description of the initial state. 76 func ExtendPreviousRun(opsPath, initialStatePath, initialStateDesc string) RunOption { 77 return closureOpt(func(ro *runAndCompareOptions) { 78 ro.previousOpsPath = opsPath 79 ro.initialStatePath = initialStatePath 80 ro.initialStateDesc = initialStateDesc 81 }) 82 } 83 84 var ( 85 // UseDisk configures RunAndCompare to use the physical filesystem for all 86 // generated runs. 87 UseDisk = closureOpt(func(ro *runAndCompareOptions) { 88 ro.mutateTestOptions = append(ro.mutateTestOptions, func(to *TestOptions) { to.useDisk = true }) 89 }) 90 // UseInMemory configures RunAndCompare to use an in-memory virtual 91 // filesystem for all generated runs. 92 UseInMemory = closureOpt(func(ro *runAndCompareOptions) { 93 ro.mutateTestOptions = append(ro.mutateTestOptions, func(to *TestOptions) { to.useDisk = false }) 94 }) 95 ) 96 97 // OpCount configures the random variable for the number of operations to 98 // generate. 99 func OpCount(rv randvar.Static) RunOption { 100 return closureOpt(func(ro *runAndCompareOptions) { ro.ops = rv }) 101 } 102 103 // RuntimeTrace configures each test run to collect a runtime trace and output 104 // it with the provided filename. 105 func RuntimeTrace(name string) RunOption { 106 return closureOpt(func(ro *runAndCompareOptions) { ro.traceFile = name }) 107 } 108 109 // InnerBinary configures the binary that is called for each run. If not 110 // specified, this binary (os.Args[0]) is called. 111 func InnerBinary(path string) RunOption { 112 return closureOpt(func(ro *runAndCompareOptions) { ro.innerBinary = path }) 113 } 114 115 // ParseCustomTestOption adds support for parsing the provided CustomOption from 116 // OPTIONS files serialized by the metamorphic tests. This RunOption alone does 117 // not cause the metamorphic tests to run with any variant of the provided 118 // CustomOption set. 119 func ParseCustomTestOption(name string, parseFn func(value string) (CustomOption, bool)) RunOption { 120 return closureOpt(func(ro *runAndCompareOptions) { ro.customOptionParsers[name] = parseFn }) 121 } 122 123 // AddCustomRun adds an additional run of the metamorphic tests, using the 124 // provided OPTIONS file contents. The default options will be used, except 125 // those options that are overriden by the provided OPTIONS string. 126 func AddCustomRun(name string, serializedOptions string) RunOption { 127 return closureOpt(func(ro *runAndCompareOptions) { ro.customRuns[name] = serializedOptions }) 128 } 129 130 type closureOpt func(*runAndCompareOptions) 131 132 func (f closureOpt) apply(ro *runAndCompareOptions) { f(ro) } 133 134 // RunAndCompare runs the metamorphic tests, using the provided root directory 135 // to hold test data. 136 func RunAndCompare(t *testing.T, rootDir string, rOpts ...RunOption) { 137 runOpts := runAndCompareOptions{ 138 ops: randvar.NewUniform(1000, 10000), 139 customRuns: map[string]string{}, 140 runOnceOptions: runOnceOptions{ 141 customOptionParsers: map[string]func(string) (CustomOption, bool){}, 142 }, 143 } 144 for _, o := range rOpts { 145 o.apply(&runOpts) 146 } 147 if runOpts.seed == 0 { 148 runOpts.seed = uint64(time.Now().UnixNano()) 149 } 150 151 require.NoError(t, os.MkdirAll(rootDir, 0755)) 152 metaDir, err := os.MkdirTemp(rootDir, time.Now().Format("060102-150405.000")) 153 require.NoError(t, err) 154 require.NoError(t, os.MkdirAll(metaDir, 0755)) 155 defer func() { 156 if !t.Failed() && !runOpts.keep { 157 _ = os.RemoveAll(metaDir) 158 } 159 }() 160 161 rng := rand.New(rand.NewSource(runOpts.seed)) 162 opCount := runOpts.ops.Uint64(rng) 163 164 // Generate a new set of random ops, writing them to <dir>/ops. These will be 165 // read by the child processes when performing a test run. 166 km := newKeyManager() 167 cfg := presetConfigs[rng.Intn(len(presetConfigs))] 168 if runOpts.previousOpsPath != "" { 169 // During cross-version testing, we load keys from an `ops` file 170 // produced by a metamorphic test run of an earlier Pebble version. 171 // Seeding the keys ensure we generate interesting operations, including 172 // ones with key shadowing, merging, etc. 173 opsPath := filepath.Join(filepath.Dir(filepath.Clean(runOpts.previousOpsPath)), "ops") 174 opsData, err := os.ReadFile(opsPath) 175 require.NoError(t, err) 176 ops, err := parse(opsData) 177 require.NoError(t, err) 178 loadPrecedingKeys(t, ops, &cfg, km) 179 } 180 ops := generate(rng, opCount, cfg, km) 181 opsPath := filepath.Join(metaDir, "ops") 182 formattedOps := formatOps(ops) 183 require.NoError(t, os.WriteFile(opsPath, []byte(formattedOps), 0644)) 184 185 // runOptions performs a particular test run with the specified options. The 186 // options are written to <run-dir>/OPTIONS and a child process is created to 187 // actually execute the test. 188 runOptions := func(t *testing.T, opts *TestOptions) { 189 if opts.Opts.Cache != nil { 190 defer opts.Opts.Cache.Unref() 191 } 192 for _, fn := range runOpts.mutateTestOptions { 193 fn(opts) 194 } 195 runDir := filepath.Join(metaDir, path.Base(t.Name())) 196 require.NoError(t, os.MkdirAll(runDir, 0755)) 197 198 optionsPath := filepath.Join(runDir, "OPTIONS") 199 optionsStr := optionsToString(opts) 200 require.NoError(t, os.WriteFile(optionsPath, []byte(optionsStr), 0644)) 201 202 args := []string{ 203 "-keep=" + fmt.Sprint(runOpts.keep), 204 "-run-dir=" + runDir, 205 "-test.run=" + t.Name() + "$", 206 } 207 if runOpts.traceFile != "" { 208 args = append(args, "-test.trace="+filepath.Join(runDir, runOpts.traceFile)) 209 } 210 211 binary := os.Args[0] 212 if runOpts.innerBinary != "" { 213 binary = runOpts.innerBinary 214 } 215 cmd := exec.Command(binary, args...) 216 out, err := cmd.CombinedOutput() 217 if err != nil { 218 t.Fatalf(` 219 ===== SEED ===== 220 %d 221 ===== ERR ===== 222 %v 223 ===== OUT ===== 224 %s 225 ===== OPTIONS ===== 226 %s 227 ===== OPS ===== 228 %s 229 ===== HISTORY ===== 230 %s`, runOpts.seed, err, out, optionsStr, formattedOps, readFile(filepath.Join(runDir, "history"))) 231 } 232 } 233 234 var names []string 235 options := map[string]*TestOptions{} 236 237 // Create the standard options. 238 for i, opts := range standardOptions() { 239 name := fmt.Sprintf("standard-%03d", i) 240 names = append(names, name) 241 options[name] = opts 242 } 243 244 // Create the custom option runs, if any. 245 for name, customOptsStr := range runOpts.customRuns { 246 options[name] = defaultTestOptions() 247 if err := parseOptions(options[name], customOptsStr, runOpts.customOptionParsers); err != nil { 248 t.Fatalf("custom opts %q: %s", name, err) 249 } 250 } 251 // Sort the custom options names for determinism (they're currently in 252 // random order from map iteration). 253 sort.Strings(names[len(names)-len(runOpts.customRuns):]) 254 255 // Create random options. We make an arbitrary choice to run with as many 256 // random options as we have standard options. 257 nOpts := len(options) 258 for i := 0; i < nOpts; i++ { 259 name := fmt.Sprintf("random-%03d", i) 260 names = append(names, name) 261 opts := randomOptions(rng, runOpts.customOptionParsers) 262 options[name] = opts 263 } 264 265 // If the user provided the path to an initial database state to use, update 266 // all the options to pull from it. 267 if runOpts.initialStatePath != "" { 268 for _, o := range options { 269 var err error 270 o.initialStatePath, err = filepath.Abs(runOpts.initialStatePath) 271 require.NoError(t, err) 272 o.initialStateDesc = runOpts.initialStateDesc 273 } 274 } 275 276 // Run the options. 277 t.Run("execution", func(t *testing.T) { 278 for _, name := range names { 279 name := name 280 t.Run(name, func(t *testing.T) { 281 t.Parallel() 282 runOptions(t, options[name]) 283 }) 284 } 285 }) 286 // NB: The above 'execution' subtest will not complete until all of the 287 // individual execution/ subtests have completed. The grouping within the 288 // `execution` subtest ensures all the histories are available when we 289 // proceed to comparing against the base history. 290 291 // Don't bother comparing output if we've already failed. 292 if t.Failed() { 293 return 294 } 295 296 t.Run("compare", func(t *testing.T) { 297 getHistoryPath := func(name string) string { 298 return filepath.Join(metaDir, name, "history") 299 } 300 301 base := readHistory(t, getHistoryPath(names[0])) 302 base = reorderHistory(base) 303 for i := 1; i < len(names); i++ { 304 t.Run(names[i], func(t *testing.T) { 305 lines := readHistory(t, getHistoryPath(names[i])) 306 lines = reorderHistory(lines) 307 diff := difflib.UnifiedDiff{ 308 A: base, 309 B: lines, 310 Context: 5, 311 } 312 text, err := difflib.GetUnifiedDiffString(diff) 313 require.NoError(t, err) 314 if text != "" { 315 // NB: We force an exit rather than using t.Fatal because the latter 316 // will run another instance of the test if -count is specified, while 317 // we're happy to exit on the first failure. 318 optionsStrA := optionsToString(options[names[0]]) 319 optionsStrB := optionsToString(options[names[i]]) 320 321 fmt.Printf(` 322 ===== SEED ===== 323 %d 324 ===== DIFF ===== 325 %s/{%s,%s} 326 %s 327 ===== OPTIONS %s ===== 328 %s 329 ===== OPTIONS %s ===== 330 %s 331 ===== OPS ===== 332 %s 333 `, runOpts.seed, metaDir, names[0], names[i], text, names[0], optionsStrA, names[i], optionsStrB, formattedOps) 334 os.Exit(1) 335 } 336 }) 337 } 338 }) 339 } 340 341 type runOnceOptions struct { 342 keep bool 343 maxThreads int 344 errorRate float64 345 failRegexp *regexp.Regexp 346 customOptionParsers map[string]func(string) (CustomOption, bool) 347 } 348 349 // A RunOnceOption configures the behavior of a single run of the metamorphic 350 // tests. 351 type RunOnceOption interface { 352 applyOnce(*runOnceOptions) 353 } 354 355 // KeepData keeps the database directory, even on successful runs. If the test 356 // used an in-memory filesystem, the in-memory filesystem will be persisted to 357 // the run directory. 358 type KeepData struct{} 359 360 func (KeepData) apply(ro *runAndCompareOptions) { ro.keep = true } 361 func (KeepData) applyOnce(ro *runOnceOptions) { ro.keep = true } 362 363 // InjectErrorsRate configures the run to inject errors into read-only 364 // filesystem operations and retry injected errors. 365 type InjectErrorsRate float64 366 367 func (r InjectErrorsRate) apply(ro *runAndCompareOptions) { ro.errorRate = float64(r) } 368 func (r InjectErrorsRate) applyOnce(ro *runOnceOptions) { ro.errorRate = float64(r) } 369 370 // MaxThreads sets an upper bound on the number of parallel execution threads 371 // during replay. 372 type MaxThreads int 373 374 func (m MaxThreads) apply(ro *runAndCompareOptions) { ro.maxThreads = int(m) } 375 func (m MaxThreads) applyOnce(ro *runOnceOptions) { ro.maxThreads = int(m) } 376 377 // FailOnMatch configures the run to fail immediately if the history matches the 378 // provided regular expression. 379 type FailOnMatch struct { 380 *regexp.Regexp 381 } 382 383 func (f FailOnMatch) apply(ro *runAndCompareOptions) { ro.failRegexp = f.Regexp } 384 func (f FailOnMatch) applyOnce(ro *runOnceOptions) { ro.failRegexp = f.Regexp } 385 386 // RunOnce performs one run of the metamorphic tests. RunOnce expects the 387 // directory named by `runDir` to already exist and contain an `OPTIONS` file 388 // containing the test run's configuration. The history of the run is persisted 389 // to a file at the path `historyPath`. 390 // 391 // The `seed` parameter is not functional; it's used for context in logging. 392 func RunOnce(t TestingT, runDir string, seed uint64, historyPath string, rOpts ...RunOnceOption) { 393 runOpts := runOnceOptions{ 394 customOptionParsers: map[string]func(string) (CustomOption, bool){}, 395 } 396 for _, o := range rOpts { 397 o.applyOnce(&runOpts) 398 } 399 400 opsPath := filepath.Join(filepath.Dir(filepath.Clean(runDir)), "ops") 401 opsData, err := os.ReadFile(opsPath) 402 require.NoError(t, err) 403 404 ops, err := parse(opsData) 405 require.NoError(t, err) 406 _ = ops 407 408 optionsPath := filepath.Join(runDir, "OPTIONS") 409 optionsData, err := os.ReadFile(optionsPath) 410 require.NoError(t, err) 411 412 // NB: It's important to use defaultTestOptions() here as the base into 413 // which we parse the serialized options. It contains the relevant defaults, 414 // like the appropriate block-property collectors. 415 testOpts := defaultTestOptions() 416 opts := testOpts.Opts 417 require.NoError(t, parseOptions(testOpts, string(optionsData), runOpts.customOptionParsers)) 418 419 // Always use our custom comparer which provides a Split method, splitting 420 // keys at the trailing '@'. 421 opts.Comparer = testkeys.Comparer 422 // Use an archive cleaner to ease post-mortem debugging. 423 opts.Cleaner = base.ArchiveCleaner{} 424 425 // Set up the filesystem to use for the test. Note that by default we use an 426 // in-memory FS. 427 if testOpts.useDisk { 428 opts.FS = vfs.Default 429 require.NoError(t, os.RemoveAll(opts.FS.PathJoin(runDir, "data"))) 430 } else { 431 opts.Cleaner = base.ArchiveCleaner{} 432 if testOpts.strictFS { 433 opts.FS = vfs.NewStrictMem() 434 } else { 435 opts.FS = vfs.NewMem() 436 } 437 } 438 opts.WithFSDefaults() 439 440 threads := testOpts.threads 441 if runOpts.maxThreads < threads { 442 threads = runOpts.maxThreads 443 } 444 445 dir := opts.FS.PathJoin(runDir, "data") 446 // Set up the initial database state if configured to start from a non-empty 447 // database. By default tests start from an empty database, but split 448 // version testing may configure a previous metamorphic tests's database 449 // state as the initial state. 450 if testOpts.initialStatePath != "" { 451 require.NoError(t, setupInitialState(dir, testOpts)) 452 } 453 454 // Wrap the filesystem with one that will inject errors into read 455 // operations with *errorRate probability. 456 opts.FS = errorfs.Wrap(opts.FS, errorfs.WithProbability(errorfs.OpKindRead, runOpts.errorRate)) 457 458 if opts.WALDir != "" { 459 opts.WALDir = opts.FS.PathJoin(runDir, opts.WALDir) 460 } 461 462 historyFile, err := os.Create(historyPath) 463 require.NoError(t, err) 464 defer historyFile.Close() 465 writers := []io.Writer{historyFile} 466 467 if testing.Verbose() { 468 writers = append(writers, os.Stdout) 469 } 470 h := newHistory(runOpts.failRegexp, writers...) 471 472 m := newTest(ops) 473 require.NoError(t, m.init(h, dir, testOpts)) 474 475 if threads <= 1 { 476 for m.step(h) { 477 if err := h.Error(); err != nil { 478 fmt.Fprintf(os.Stderr, "Seed: %d\n", seed) 479 fmt.Fprintln(os.Stderr, err) 480 m.maybeSaveData() 481 os.Exit(1) 482 } 483 } 484 } else { 485 eg, ctx := errgroup.WithContext(context.Background()) 486 for t := 0; t < threads; t++ { 487 t := t // bind loop var to scope 488 eg.Go(func() error { 489 for idx := 0; idx < len(m.ops); idx++ { 490 // Skip any operations whose receiver object hashes to a 491 // different thread. All operations with the same receiver 492 // are performed from the same thread. This goroutine is 493 // only responsible for executing operations that hash to 494 // `t`. 495 if hashThread(m.ops[idx].receiver(), threads) != t { 496 continue 497 } 498 499 // Some operations have additional synchronization 500 // dependencies. If this operation has any, wait for its 501 // dependencies to complete before executing. 502 for _, waitOnIdx := range m.opsWaitOn[idx] { 503 select { 504 case <-ctx.Done(): 505 // Exit if some other thread already errored out. 506 return ctx.Err() 507 case <-m.opsDone[waitOnIdx]: 508 } 509 } 510 511 m.ops[idx].run(m, h.recorder(t, idx)) 512 513 // If this operation has a done channel, close it so that 514 // other operations that synchronize on this operation know 515 // that it's been completed. 516 if ch := m.opsDone[idx]; ch != nil { 517 close(ch) 518 } 519 520 if err := h.Error(); err != nil { 521 return err 522 } 523 } 524 return nil 525 }) 526 } 527 if err := eg.Wait(); err != nil { 528 fmt.Fprintf(os.Stderr, "Seed: %d\n", seed) 529 fmt.Fprintln(os.Stderr, err) 530 m.maybeSaveData() 531 os.Exit(1) 532 } 533 } 534 535 if runOpts.keep && !testOpts.useDisk { 536 m.maybeSaveData() 537 } 538 } 539 540 func hashThread(objID objID, numThreads int) int { 541 // Fibonacci hash https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/ 542 return int((11400714819323198485 * uint64(objID)) % uint64(numThreads)) 543 } 544 545 // Compare runs the metamorphic tests in the provided runDirs and compares their 546 // histories. 547 func Compare(t TestingT, rootDir string, seed uint64, runDirs []string, rOpts ...RunOnceOption) { 548 historyPaths := make([]string, len(runDirs)) 549 for i := 0; i < len(runDirs); i++ { 550 historyPath := filepath.Join(rootDir, runDirs[i]+"-"+time.Now().Format("060102-150405.000")) 551 runDirs[i] = filepath.Join(rootDir, runDirs[i]) 552 _ = os.Remove(historyPath) 553 historyPaths[i] = historyPath 554 } 555 defer func() { 556 for _, path := range historyPaths { 557 _ = os.Remove(path) 558 } 559 }() 560 561 for i, runDir := range runDirs { 562 RunOnce(t, runDir, seed, historyPaths[i], rOpts...) 563 } 564 565 if t.Failed() { 566 return 567 } 568 569 i, diff := CompareHistories(t, historyPaths) 570 if i != 0 { 571 fmt.Printf(` 572 ===== DIFF ===== 573 %s/{%s,%s} 574 %s 575 `, rootDir, runDirs[0], runDirs[i], diff) 576 os.Exit(1) 577 } 578 } 579 580 // TestingT is an interface wrapper around *testing.T 581 type TestingT interface { 582 require.TestingT 583 Failed() bool 584 } 585 586 func readFile(path string) string { 587 history, err := os.ReadFile(path) 588 if err != nil { 589 return fmt.Sprintf("err: %v", err) 590 } 591 592 return string(history) 593 }