github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/cmd/pebble/fsbench.go (about) 1 package main 2 3 import ( 4 "bytes" 5 "fmt" 6 "log" 7 "os" 8 "path" 9 "sync" 10 "sync/atomic" 11 "time" 12 13 "github.com/cockroachdb/errors" 14 "github.com/cockroachdb/pebble/vfs" 15 "github.com/spf13/cobra" 16 ) 17 18 var fsBenchCmd = &cobra.Command{ 19 Use: "fs <dir>", 20 Short: "Run file system benchmarks.", 21 Long: ` 22 Run file system benchmarks. Each benchmark is predefined and can be 23 run using the command "bench fs <dir> --bench-name <benchmark>". 24 Each possible <benchmark> which can be run is defined in the code. 25 Benchmarks may require the specification of a --duration or 26 --max-ops flag, to prevent the benchmark from running forever 27 or running out of memory. 28 29 The --num-times flag can be used to run the entire benchmark, more than 30 once. If the flag isn't provided, then the benchmark is only run once. 31 `, 32 Args: cobra.ExactArgs(1), 33 RunE: runFsBench, 34 } 35 36 const writeBatchSize = 1 << 10 37 38 var fsConfig struct { 39 // An upper limit on the number of ops which can be run. 40 maxOps int 41 42 // Benchmark to run. 43 benchname string 44 45 // Number of times each benchmark should be run. 46 numTimes int 47 48 fs vfs.FS 49 50 precomputedWriteBatch []byte 51 } 52 53 func init() { 54 fsBenchCmd.Flags().IntVar( 55 &fsConfig.maxOps, "max-ops", 0, 56 "Maximum number of times the operation which is being benchmarked should be run.", 57 ) 58 59 fsBenchCmd.Flags().StringVar( 60 &fsConfig.benchname, "bench-name", "", "The benchmark to run.") 61 fsBenchCmd.MarkFlagRequired("bench-name") 62 63 fsBenchCmd.Flags().IntVar( 64 &fsConfig.numTimes, "num-times", 1, 65 "Number of times each benchmark should be run.") 66 67 // Add subcommand to list 68 fsBenchCmd.AddCommand(listFsBench) 69 70 // Just use the default vfs implementation for now. 71 fsConfig.fs = vfs.Default 72 73 fsConfig.precomputedWriteBatch = bytes.Repeat([]byte("a"), writeBatchSize) 74 } 75 76 // State relevant to a benchmark. 77 type fsBench struct { 78 // A short name for the benchmark. 79 name string 80 81 // A one line description for the benchmark. 82 description string 83 84 // numOps is the total number of ops which 85 // have been run for the benchmark. This is used 86 // to make sure that we don't benchmark the operation 87 // more than max-ops times. 88 numOps int 89 90 // directory under which the benchmark is run. 91 dir vfs.File 92 dirName string 93 94 // Stats associated with the benchmark. 95 reg *histogramRegistry 96 97 // The operation which we're benchmarking. This 98 // will be called over and over again. 99 // Returns false if run should no longer be called. 100 run func(*namedHistogram) bool 101 102 // Stop the benchmark from executing any further. 103 // Stop is safe to call concurrently with run. 104 stop func() 105 106 // A cleanup func which must be called after 107 // the benchmark has finished running. 108 // Clean should be only called after making sure 109 // that the run function is no longer executing. 110 clean func() 111 } 112 113 // createFile can be used to create an empty file. 114 // Invariant: File shouldn't already exist. 115 func createFile(filepath string) vfs.File { 116 fh, err := fsConfig.fs.Create(filepath) 117 if err != nil { 118 log.Fatalln(err) 119 } 120 return fh 121 } 122 123 // Invariant: file with filepath should exist. 124 func deleteFile(filepath string) { 125 err := fsConfig.fs.Remove(filepath) 126 if err != nil { 127 log.Fatalln(err) 128 } 129 } 130 131 // Write size bytes to the file in batches. 132 func writeToFile(fh vfs.File, size int64) { 133 for size > 0 { 134 var toWrite []byte 135 if size >= writeBatchSize { 136 toWrite = fsConfig.precomputedWriteBatch 137 } else { 138 toWrite = fsConfig.precomputedWriteBatch[:size] 139 } 140 written, err := fh.Write(toWrite) 141 if err != nil { 142 log.Fatalln(err) 143 } 144 if written != len(toWrite) { 145 log.Fatalf("Couldn't write %d bytes to file\n", size) 146 } 147 size -= int64(len(toWrite)) 148 } 149 } 150 151 func syncFile(fh vfs.File) { 152 err := fh.Sync() 153 if err != nil { 154 log.Fatalln(err) 155 } 156 } 157 158 func closeFile(fh vfs.File) { 159 err := fh.Close() 160 if err != nil { 161 log.Fatalln(err) 162 } 163 } 164 165 func getDiskUsage(filepath string) { 166 _, err := fsConfig.fs.GetDiskUsage(filepath) 167 if err != nil { 168 log.Fatalln(err) 169 } 170 } 171 172 func openDir(filepath string) vfs.File { 173 fh, err := fsConfig.fs.OpenDir(filepath) 174 if err != nil { 175 log.Fatalln(err) 176 } 177 return fh 178 } 179 180 func mkDir(filepath string) { 181 err := fsConfig.fs.MkdirAll(filepath, 0755) 182 if err != nil { 183 log.Fatalln(err) 184 } 185 } 186 187 func removeAllFiles(filepath string) { 188 err := fsConfig.fs.RemoveAll(filepath) 189 if err != nil { 190 log.Fatalln(err) 191 } 192 } 193 194 // fileSize is in bytes. 195 func createBench(benchName string, benchDescription string) fsBenchmark { 196 createBench := func(dirpath string) *fsBench { 197 bench := &fsBench{} 198 mkDir(dirpath) 199 fh := openDir(dirpath) 200 201 bench.dir = fh 202 bench.dirName = dirpath 203 bench.reg = newHistogramRegistry() 204 bench.numOps = 0 205 bench.name = benchName 206 bench.description = benchDescription 207 208 // setup the operation to benchmark, and the cleanup functions. 209 pref := "temp_" 210 var numFiles int 211 var done atomic.Bool 212 213 bench.run = func(hist *namedHistogram) bool { 214 if done.Load() { 215 return false 216 } 217 218 start := time.Now() 219 fh := createFile(path.Join(dirpath, fmt.Sprintf("%s%d", pref, numFiles))) 220 syncFile(bench.dir) 221 hist.Record(time.Since(start)) 222 223 closeFile(fh) 224 numFiles++ 225 return true 226 } 227 228 bench.stop = func() { 229 done.Store(true) 230 } 231 232 bench.clean = func() { 233 removeAllFiles(dirpath) 234 closeFile(bench.dir) 235 } 236 237 return bench 238 } 239 240 return fsBenchmark{ 241 createBench, 242 benchName, 243 benchDescription, 244 } 245 } 246 247 // This benchmark prepopulates a directory with some files of a given size. Then, it creates and deletes 248 // a file of some size, while measuring only the performance of the delete. 249 func deleteBench( 250 benchName string, benchDescription string, preNumFiles int, preFileSize int64, fileSize int64, 251 ) fsBenchmark { 252 253 createBench := func(dirpath string) *fsBench { 254 bench := &fsBench{} 255 mkDir(dirpath) 256 fh := openDir(dirpath) 257 258 bench.dir = fh 259 bench.dirName = dirpath 260 bench.reg = newHistogramRegistry() 261 bench.numOps = 0 262 bench.name = benchName 263 bench.description = benchDescription 264 265 // prepopulate the directory 266 prePref := "pre_temp_" 267 for i := 0; i < preNumFiles; i++ { 268 fh := createFile(path.Join(dirpath, fmt.Sprintf("%s%d", prePref, i))) 269 if preFileSize > 0 { 270 writeToFile(fh, preFileSize) 271 syncFile(fh) 272 } 273 closeFile(fh) 274 } 275 syncFile(bench.dir) 276 277 var done atomic.Bool 278 bench.run = func(hist *namedHistogram) bool { 279 if done.Load() { 280 return false 281 } 282 283 filename := "newfile" 284 fh := createFile(path.Join(dirpath, filename)) 285 writeToFile(fh, fileSize) 286 syncFile(fh) 287 288 start := time.Now() 289 deleteFile(path.Join(dirpath, filename)) 290 hist.Record(time.Since(start)) 291 292 return true 293 } 294 295 bench.stop = func() { 296 done.Store(true) 297 } 298 299 bench.clean = func() { 300 removeAllFiles(dirpath) 301 closeFile(bench.dir) 302 } 303 304 return bench 305 } 306 307 return fsBenchmark{ 308 createBench, 309 benchName, 310 benchDescription, 311 } 312 } 313 314 // This benchmark creates some files in a directory, and then measures the performance 315 // of the vfs.Remove function. 316 // fileSize is in bytes. 317 func deleteUniformBench( 318 benchName string, benchDescription string, numFiles int, fileSize int64, 319 ) fsBenchmark { 320 createBench := func(dirpath string) *fsBench { 321 bench := &fsBench{} 322 mkDir(dirpath) 323 fh := openDir(dirpath) 324 325 bench.dir = fh 326 bench.dirName = dirpath 327 bench.reg = newHistogramRegistry() 328 bench.numOps = 0 329 bench.name = benchName 330 bench.description = benchDescription 331 332 // setup the operation to benchmark, and the cleaup functions. 333 pref := "temp_" 334 for i := 0; i < numFiles; i++ { 335 fh := createFile(path.Join(dirpath, fmt.Sprintf("%s%d", pref, i))) 336 if fileSize > 0 { 337 writeToFile(fh, fileSize) 338 syncFile(fh) 339 } 340 closeFile(fh) 341 } 342 syncFile(bench.dir) 343 344 var done atomic.Bool 345 bench.run = func(hist *namedHistogram) bool { 346 if done.Load() { 347 return false 348 } 349 350 if numFiles == 0 { 351 return false 352 } 353 354 start := time.Now() 355 deleteFile(path.Join(dirpath, fmt.Sprintf("%s%d", pref, numFiles-1))) 356 hist.Record(time.Since(start)) 357 358 numFiles-- 359 return true 360 } 361 362 bench.stop = func() { 363 done.Store(true) 364 } 365 366 bench.clean = func() { 367 removeAll(dirpath) 368 closeFile(bench.dir) 369 } 370 371 return bench 372 } 373 374 return fsBenchmark{ 375 createBench, 376 benchName, 377 benchDescription, 378 } 379 } 380 381 // Tests the performance of syncing data to disk. 382 // Only measures the sync performance. 383 // The writes will be synced after every writeSize bytes have been written. 384 func writeSyncBench( 385 benchName string, benchDescription string, maxFileSize int64, writeSize int64, 386 ) fsBenchmark { 387 388 if writeSize > maxFileSize { 389 log.Fatalln("File write threshold is greater than max file size.") 390 } 391 392 createBench := func(dirpath string) *fsBench { 393 bench := &fsBench{} 394 mkDir(dirpath) 395 fh := openDir(dirpath) 396 397 bench.dir = fh 398 bench.dirName = dirpath 399 bench.reg = newHistogramRegistry() 400 bench.numOps = 0 401 bench.name = benchName 402 bench.description = benchDescription 403 404 pref := "temp_" 405 var benchData struct { 406 done atomic.Bool 407 fh vfs.File 408 fileNum int 409 bytesWritten int64 410 } 411 benchData.fh = createFile(path.Join(dirpath, fmt.Sprintf("%s%d", pref, benchData.fileNum))) 412 413 bench.run = func(hist *namedHistogram) bool { 414 if benchData.done.Load() { 415 return false 416 } 417 418 if benchData.bytesWritten+writeSize > maxFileSize { 419 closeFile(benchData.fh) 420 benchData.fileNum++ 421 benchData.bytesWritten = 0 422 benchData.fh = createFile(path.Join(dirpath, fmt.Sprintf("%s%d", pref, benchData.fileNum))) 423 } 424 425 benchData.bytesWritten += writeSize 426 writeToFile(benchData.fh, writeSize) 427 428 start := time.Now() 429 syncFile(benchData.fh) 430 hist.Record(time.Since(start)) 431 432 return true 433 } 434 435 bench.stop = func() { 436 benchData.done.Store(true) 437 } 438 439 bench.clean = func() { 440 closeFile(benchData.fh) 441 removeAllFiles(dirpath) 442 closeFile(bench.dir) 443 } 444 445 return bench 446 } 447 448 return fsBenchmark{ 449 createBench, 450 benchName, 451 benchDescription, 452 } 453 } 454 455 // Tests the peformance of calling the vfs.GetDiskUsage call on a directory, 456 // as the number of files/total size of files in the directory grows. 457 func diskUsageBench( 458 benchName string, benchDescription string, maxFileSize int64, writeSize int64, 459 ) fsBenchmark { 460 461 if writeSize > maxFileSize { 462 log.Fatalln("File write threshold is greater than max file size.") 463 } 464 465 createBench := func(dirpath string) *fsBench { 466 bench := &fsBench{} 467 mkDir(dirpath) 468 fh := openDir(dirpath) 469 470 bench.dir = fh 471 bench.dirName = dirpath 472 bench.reg = newHistogramRegistry() 473 bench.numOps = 0 474 bench.name = benchName 475 bench.description = benchDescription 476 477 pref := "temp_" 478 var benchData struct { 479 done atomic.Bool 480 fh vfs.File 481 fileNum int 482 bytesWritten int64 483 } 484 benchData.fh = createFile(path.Join(dirpath, fmt.Sprintf("%s%d", pref, benchData.fileNum))) 485 486 bench.run = func(hist *namedHistogram) bool { 487 if benchData.done.Load() { 488 return false 489 } 490 491 if benchData.bytesWritten+writeSize > maxFileSize { 492 closeFile(benchData.fh) 493 benchData.fileNum++ 494 benchData.bytesWritten = 0 495 benchData.fh = createFile(path.Join(dirpath, fmt.Sprintf("%s%d", pref, benchData.fileNum))) 496 } 497 498 benchData.bytesWritten += writeSize 499 writeToFile(benchData.fh, writeSize) 500 syncFile(benchData.fh) 501 502 start := time.Now() 503 getDiskUsage(dirpath) 504 hist.Record(time.Since(start)) 505 506 return true 507 } 508 509 bench.stop = func() { 510 benchData.done.Store(true) 511 } 512 513 bench.clean = func() { 514 closeFile(benchData.fh) 515 removeAllFiles(dirpath) 516 closeFile(bench.dir) 517 } 518 519 return bench 520 } 521 522 return fsBenchmark{ 523 createBench, 524 benchName, 525 benchDescription, 526 } 527 } 528 529 // A benchmark is a function which takes a directory 530 // as input and returns the fsBench struct which has 531 // all the information required to run the benchmark. 532 type fsBenchmark struct { 533 createBench func(string) *fsBench 534 name string 535 description string 536 } 537 538 // The various benchmarks which can be run. 539 var benchmarks = map[string]fsBenchmark{ 540 "create_empty": createBench("create_empty", "create empty file, sync par dir"), 541 "delete_10k_2MiB": deleteUniformBench( 542 "delete_10k_2MiB", "create 10k 2MiB size files, measure deletion times", 10_000, 2<<20, 543 ), 544 "delete_100k_2MiB": deleteUniformBench( 545 "delete_100k_2MiB", "create 100k 2MiB size files, measure deletion times", 100_000, 2<<20, 546 ), 547 "delete_200k_2MiB": deleteUniformBench( 548 "delete_200k_2MiB", "create 200k 2MiB size files, measure deletion times", 200_000, 2<<20, 549 ), 550 "write_sync_1MiB": writeSyncBench( 551 "write_sync_1MiB", "Write 1MiB to a file, then sync, while timing the sync.", 2<<30, 1<<20, 552 ), 553 "write_sync_16MiB": writeSyncBench( 554 "write_sync_16MiB", "Write 16MiB to a file, then sync, while timing the sync.", 2<<30, 16<<20, 555 ), 556 "write_sync_128MiB": writeSyncBench( 557 "write_sync_128MiB", "Write 128MiB to a file, then sync, while timing the sync.", 2<<30, 128<<20, 558 ), 559 "disk_usage_128MB": diskUsageBench( 560 "disk_usage_128MB", 561 "Write 128MiB to a file, measure GetDiskUsage call. Create a new file, when file size is 1GB.", 562 1<<30, 128<<20, 563 ), 564 "disk_usage_many_files": diskUsageBench( 565 "disk_usage_many_files", 566 "Create new file, Write 128KiB to a file, measure GetDiskUsage call.", 567 128<<10, 128<<10, 568 ), 569 "delete_large_dir_256MiB": deleteBench( 570 "delete_large_dir_256MiB", "Prepopulate directory with 100k 1MiB files, measure delete peformance of 256MiB files", 571 1e5, 1<<20, 256<<20, 572 ), 573 "delete_large_dir_2MiB": deleteBench( 574 "delete_large_dir_2MiB", "Prepopulate directory with 100k 1MiB files, measure delete peformance of 2MiB files", 575 1e5, 1<<20, 2<<20, 576 ), 577 "delete_small_dir_2GiB": deleteBench( 578 "delete_small_dir_2GiB", "Prepopulate directory with 1k 1MiB files, measure delete peformance of 2GiB files", 579 1e3, 1<<20, 2<<30, 580 ), 581 "delete_small_dir_256MiB": deleteBench( 582 "delete_small_dir_256MiB", "Prepopulate directory with 1k 1MiB files, measure delete peformance of 256MiB files", 583 1e3, 1<<20, 256<<20, 584 ), 585 "delete_small_dir_2MiB": deleteBench( 586 "delete_small_dir_2MiB", "Prepopulate directory with 1k 1MiB files, measure delete peformance of 2MiB files", 587 1e3, 1<<20, 2<<20, 588 ), 589 } 590 591 func runFsBench(_ *cobra.Command, args []string) error { 592 benchmark, ok := benchmarks[fsConfig.benchname] 593 if !ok { 594 return errors.Errorf("trying to run an unknown benchmark: %s", fsConfig.benchname) 595 } 596 597 // Run the benchmark a comple of times. 598 fmt.Printf("The benchmark will be run %d time(s).\n", fsConfig.numTimes) 599 for i := 0; i < fsConfig.numTimes; i++ { 600 fmt.Println("Starting benchmark:", i) 601 benchStruct := benchmark.createBench(args[0]) 602 runTestWithoutDB(testWithoutDB{ 603 init: benchStruct.init, 604 tick: benchStruct.tick, 605 done: benchStruct.done, 606 }) 607 } 608 return nil 609 } 610 611 func (bench *fsBench) init(wg *sync.WaitGroup) { 612 fmt.Println("Running benchmark:", bench.name) 613 fmt.Println("Description:", bench.description) 614 615 wg.Add(1) 616 go bench.execute(wg) 617 } 618 619 func (bench *fsBench) execute(wg *sync.WaitGroup) { 620 defer wg.Done() 621 622 latencyHist := bench.reg.Register(bench.name) 623 624 for { 625 // run the op which we're benchmarking. 626 bench.numOps++ 627 628 // The running function will determine exactly what to latency 629 // it wants to measure. 630 continueBench := bench.run(latencyHist) 631 if !continueBench || (fsConfig.maxOps > 0 && bench.numOps >= fsConfig.maxOps) { 632 break 633 } 634 } 635 } 636 637 func (bench *fsBench) tick(elapsed time.Duration, i int) { 638 if i%20 == 0 { 639 fmt.Println("____optype__elapsed__ops/sec(inst)___ops/sec(cum)__p50(ms)__p95(ms)__p99(ms)__pMax(ms)") 640 } 641 bench.reg.Tick(func(tick histogramTick) { 642 h := tick.Hist 643 644 fmt.Printf("%10s %8s %14.1f %14.1f %5.6f %5.6f %5.6f %5.6f\n", 645 tick.Name[:10], 646 time.Duration(elapsed.Seconds()+0.5)*time.Second, 647 float64(h.TotalCount())/tick.Elapsed.Seconds(), 648 float64(tick.Cumulative.TotalCount())/elapsed.Seconds(), 649 time.Duration(h.ValueAtQuantile(50)).Seconds()*1000, 650 time.Duration(h.ValueAtQuantile(95)).Seconds()*1000, 651 time.Duration(h.ValueAtQuantile(99)).Seconds()*1000, 652 time.Duration(h.ValueAtQuantile(100)).Seconds()*1000, 653 ) 654 }) 655 } 656 657 func (bench *fsBench) done(wg *sync.WaitGroup, elapsed time.Duration) { 658 // Do the cleanup. 659 bench.stop() 660 wg.Wait() 661 defer bench.clean() 662 663 fmt.Println("\n____optype__elapsed_____ops(total)___ops/sec(cum)__avg(ms)__p50(ms)__p95(ms)__p99(ms)__pMax(ms)") 664 665 resultTick := histogramTick{} 666 bench.reg.Tick(func(tick histogramTick) { 667 h := tick.Cumulative 668 if resultTick.Cumulative == nil { 669 resultTick.Now = tick.Now 670 resultTick.Cumulative = h 671 } else { 672 resultTick.Cumulative.Merge(h) 673 } 674 675 fmt.Printf("%10s %7.1fs %14d %14.1f %5.6f %5.6f %5.6f %5.6f %5.6f\n", 676 tick.Name[:10], elapsed.Seconds(), h.TotalCount(), 677 float64(h.TotalCount())/elapsed.Seconds(), 678 time.Duration(h.Mean()).Seconds()*1000, 679 time.Duration(h.ValueAtQuantile(50)).Seconds()*1000, 680 time.Duration(h.ValueAtQuantile(95)).Seconds()*1000, 681 time.Duration(h.ValueAtQuantile(99)).Seconds()*1000, 682 time.Duration(h.ValueAtQuantile(100)).Seconds()*1000, 683 ) 684 }) 685 fmt.Println() 686 687 resultHist := resultTick.Cumulative 688 689 fmt.Printf("Benchmarkfsbench/%s %d %0.1f ops/sec\n\n", 690 bench.name, 691 resultHist.TotalCount(), 692 float64(resultHist.TotalCount())/elapsed.Seconds(), 693 ) 694 } 695 696 func verbosef(fmtstr string, args ...interface{}) { 697 if verbose { 698 fmt.Printf(fmtstr, args...) 699 } 700 } 701 702 func removeAll(dir string) { 703 verbosef("Removing %q.\n", dir) 704 if err := os.RemoveAll(dir); err != nil { 705 log.Fatal(err) 706 } 707 }