github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/testing/benchmark.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package testing 6 7 import ( 8 "flag" 9 "fmt" 10 "internal/race" 11 "internal/sysinfo" 12 "io" 13 "math" 14 "os" 15 "runtime" 16 "sort" 17 "strconv" 18 "strings" 19 "sync" 20 "sync/atomic" 21 "time" 22 "unicode" 23 ) 24 25 func initBenchmarkFlags() { 26 matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`") 27 benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks") 28 flag.Var(&benchTime, "test.benchtime", "run each benchmark for duration `d`") 29 } 30 31 var ( 32 matchBenchmarks *string 33 benchmarkMemory *bool 34 35 benchTime = durationOrCountFlag{d: 1 * time.Second} // changed during test of testing package 36 ) 37 38 type durationOrCountFlag struct { 39 d time.Duration 40 n int 41 allowZero bool 42 } 43 44 func (f *durationOrCountFlag) String() string { 45 if f.n > 0 { 46 return fmt.Sprintf("%dx", f.n) 47 } 48 return f.d.String() 49 } 50 51 func (f *durationOrCountFlag) Set(s string) error { 52 if strings.HasSuffix(s, "x") { 53 n, err := strconv.ParseInt(s[:len(s)-1], 10, 0) 54 if err != nil || n < 0 || (!f.allowZero && n == 0) { 55 return fmt.Errorf("invalid count") 56 } 57 *f = durationOrCountFlag{n: int(n)} 58 return nil 59 } 60 d, err := time.ParseDuration(s) 61 if err != nil || d < 0 || (!f.allowZero && d == 0) { 62 return fmt.Errorf("invalid duration") 63 } 64 *f = durationOrCountFlag{d: d} 65 return nil 66 } 67 68 // Global lock to ensure only one benchmark runs at a time. 69 var benchmarkLock sync.Mutex 70 71 // Used for every benchmark for measuring memory. 72 var memStats runtime.MemStats 73 74 // InternalBenchmark is an internal type but exported because it is cross-package; 75 // it is part of the implementation of the "go test" command. 76 type InternalBenchmark struct { 77 Name string 78 F func(b *B) 79 } 80 81 // B is a type passed to Benchmark functions to manage benchmark 82 // timing and to specify the number of iterations to run. 83 // 84 // A benchmark ends when its Benchmark function returns or calls any of the methods 85 // FailNow, Fatal, Fatalf, SkipNow, Skip, or Skipf. Those methods must be called 86 // only from the goroutine running the Benchmark function. 87 // The other reporting methods, such as the variations of Log and Error, 88 // may be called simultaneously from multiple goroutines. 89 // 90 // Like in tests, benchmark logs are accumulated during execution 91 // and dumped to standard output when done. Unlike in tests, benchmark logs 92 // are always printed, so as not to hide output whose existence may be 93 // affecting benchmark results. 94 type B struct { 95 common 96 importPath string // import path of the package containing the benchmark 97 context *benchContext 98 N int 99 previousN int // number of iterations in the previous run 100 previousDuration time.Duration // total duration of the previous run 101 benchFunc func(b *B) 102 benchTime durationOrCountFlag 103 bytes int64 104 missingBytes bool // one of the subbenchmarks does not have bytes set. 105 timerOn bool 106 showAllocResult bool 107 result BenchmarkResult 108 parallelism int // RunParallel creates parallelism*GOMAXPROCS goroutines 109 // The initial states of memStats.Mallocs and memStats.TotalAlloc. 110 startAllocs uint64 111 startBytes uint64 112 // The net total of this test after being run. 113 netAllocs uint64 114 netBytes uint64 115 // Extra metrics collected by ReportMetric. 116 extra map[string]float64 117 } 118 119 // StartTimer starts timing a test. This function is called automatically 120 // before a benchmark starts, but it can also be used to resume timing after 121 // a call to StopTimer. 122 func (b *B) StartTimer() { 123 if !b.timerOn { 124 runtime.ReadMemStats(&memStats) 125 b.startAllocs = memStats.Mallocs 126 b.startBytes = memStats.TotalAlloc 127 b.start = time.Now() 128 b.timerOn = true 129 } 130 } 131 132 // StopTimer stops timing a test. This can be used to pause the timer 133 // while performing complex initialization that you don't 134 // want to measure. 135 func (b *B) StopTimer() { 136 if b.timerOn { 137 b.duration += time.Since(b.start) 138 runtime.ReadMemStats(&memStats) 139 b.netAllocs += memStats.Mallocs - b.startAllocs 140 b.netBytes += memStats.TotalAlloc - b.startBytes 141 b.timerOn = false 142 } 143 } 144 145 // ResetTimer zeroes the elapsed benchmark time and memory allocation counters 146 // and deletes user-reported metrics. 147 // It does not affect whether the timer is running. 148 func (b *B) ResetTimer() { 149 if b.extra == nil { 150 // Allocate the extra map before reading memory stats. 151 // Pre-size it to make more allocation unlikely. 152 b.extra = make(map[string]float64, 16) 153 } else { 154 for k := range b.extra { 155 delete(b.extra, k) 156 } 157 } 158 if b.timerOn { 159 runtime.ReadMemStats(&memStats) 160 b.startAllocs = memStats.Mallocs 161 b.startBytes = memStats.TotalAlloc 162 b.start = time.Now() 163 } 164 b.duration = 0 165 b.netAllocs = 0 166 b.netBytes = 0 167 } 168 169 // SetBytes records the number of bytes processed in a single operation. 170 // If this is called, the benchmark will report ns/op and MB/s. 171 func (b *B) SetBytes(n int64) { b.bytes = n } 172 173 // ReportAllocs enables malloc statistics for this benchmark. 174 // It is equivalent to setting -test.benchmem, but it only affects the 175 // benchmark function that calls ReportAllocs. 176 func (b *B) ReportAllocs() { 177 b.showAllocResult = true 178 } 179 180 // runN runs a single benchmark for the specified number of iterations. 181 func (b *B) runN(n int) { 182 benchmarkLock.Lock() 183 defer benchmarkLock.Unlock() 184 defer b.runCleanup(normalPanic) 185 // Try to get a comparable environment for each run 186 // by clearing garbage from previous runs. 187 runtime.GC() 188 b.raceErrors = -race.Errors() 189 b.N = n 190 b.parallelism = 1 191 b.ResetTimer() 192 b.StartTimer() 193 b.benchFunc(b) 194 b.StopTimer() 195 b.previousN = n 196 b.previousDuration = b.duration 197 b.raceErrors += race.Errors() 198 if b.raceErrors > 0 { 199 b.Errorf("race detected during execution of benchmark") 200 } 201 } 202 203 func min(x, y int64) int64 { 204 if x > y { 205 return y 206 } 207 return x 208 } 209 210 func max(x, y int64) int64 { 211 if x < y { 212 return y 213 } 214 return x 215 } 216 217 // run1 runs the first iteration of benchFunc. It reports whether more 218 // iterations of this benchmarks should be run. 219 func (b *B) run1() bool { 220 if ctx := b.context; ctx != nil { 221 // Extend maxLen, if needed. 222 if n := len(b.name) + ctx.extLen + 1; n > ctx.maxLen { 223 ctx.maxLen = n + 8 // Add additional slack to avoid too many jumps in size. 224 } 225 } 226 go func() { 227 // Signal that we're done whether we return normally 228 // or by FailNow's runtime.Goexit. 229 defer func() { 230 b.signal <- true 231 }() 232 233 b.runN(1) 234 }() 235 <-b.signal 236 if b.failed { 237 fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), b.name, b.output) 238 return false 239 } 240 // Only print the output if we know we are not going to proceed. 241 // Otherwise it is printed in processBench. 242 b.mu.RLock() 243 finished := b.finished 244 b.mu.RUnlock() 245 if b.hasSub.Load() || finished { 246 tag := "BENCH" 247 if b.skipped { 248 tag = "SKIP" 249 } 250 if b.chatty != nil && (len(b.output) > 0 || finished) { 251 b.trimOutput() 252 fmt.Fprintf(b.w, "%s--- %s: %s\n%s", b.chatty.prefix(), tag, b.name, b.output) 253 } 254 return false 255 } 256 return true 257 } 258 259 var labelsOnce sync.Once 260 261 // run executes the benchmark in a separate goroutine, including all of its 262 // subbenchmarks. b must not have subbenchmarks. 263 func (b *B) run() { 264 labelsOnce.Do(func() { 265 fmt.Fprintf(b.w, "goos: %s\n", runtime.GOOS) 266 fmt.Fprintf(b.w, "goarch: %s\n", runtime.GOARCH) 267 if b.importPath != "" { 268 fmt.Fprintf(b.w, "pkg: %s\n", b.importPath) 269 } 270 if cpu := sysinfo.CPU.Name(); cpu != "" { 271 fmt.Fprintf(b.w, "cpu: %s\n", cpu) 272 } 273 }) 274 if b.context != nil { 275 // Running go test --test.bench 276 b.context.processBench(b) // Must call doBench. 277 } else { 278 // Running func Benchmark. 279 b.doBench() 280 } 281 } 282 283 func (b *B) doBench() BenchmarkResult { 284 go b.launch() 285 <-b.signal 286 return b.result 287 } 288 289 // launch launches the benchmark function. It gradually increases the number 290 // of benchmark iterations until the benchmark runs for the requested benchtime. 291 // launch is run by the doBench function as a separate goroutine. 292 // run1 must have been called on b. 293 func (b *B) launch() { 294 // Signal that we're done whether we return normally 295 // or by FailNow's runtime.Goexit. 296 defer func() { 297 b.signal <- true 298 }() 299 300 // Run the benchmark for at least the specified amount of time. 301 if b.benchTime.n > 0 { 302 // We already ran a single iteration in run1. 303 // If -benchtime=1x was requested, use that result. 304 // See https://golang.org/issue/32051. 305 if b.benchTime.n > 1 { 306 b.runN(b.benchTime.n) 307 } 308 } else { 309 d := b.benchTime.d 310 for n := int64(1); !b.failed && b.duration < d && n < 1e9; { 311 last := n 312 // Predict required iterations. 313 goalns := d.Nanoseconds() 314 prevIters := int64(b.N) 315 prevns := b.duration.Nanoseconds() 316 if prevns <= 0 { 317 // Round up, to avoid div by zero. 318 prevns = 1 319 } 320 // Order of operations matters. 321 // For very fast benchmarks, prevIters ~= prevns. 322 // If you divide first, you get 0 or 1, 323 // which can hide an order of magnitude in execution time. 324 // So multiply first, then divide. 325 n = goalns * prevIters / prevns 326 // Run more iterations than we think we'll need (1.2x). 327 n += n / 5 328 // Don't grow too fast in case we had timing errors previously. 329 n = min(n, 100*last) 330 // Be sure to run at least one more than last time. 331 n = max(n, last+1) 332 // Don't run more than 1e9 times. (This also keeps n in int range on 32 bit platforms.) 333 n = min(n, 1e9) 334 b.runN(int(n)) 335 } 336 } 337 b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra} 338 } 339 340 // Elapsed returns the measured elapsed time of the benchmark. 341 // The duration reported by Elapsed matches the one measured by 342 // StartTimer, StopTimer, and ResetTimer. 343 func (b *B) Elapsed() time.Duration { 344 d := b.duration 345 if b.timerOn { 346 d += time.Since(b.start) 347 } 348 return d 349 } 350 351 // ReportMetric adds "n unit" to the reported benchmark results. 352 // If the metric is per-iteration, the caller should divide by b.N, 353 // and by convention units should end in "/op". 354 // ReportMetric overrides any previously reported value for the same unit. 355 // ReportMetric panics if unit is the empty string or if unit contains 356 // any whitespace. 357 // If unit is a unit normally reported by the benchmark framework itself 358 // (such as "allocs/op"), ReportMetric will override that metric. 359 // Setting "ns/op" to 0 will suppress that built-in metric. 360 func (b *B) ReportMetric(n float64, unit string) { 361 if unit == "" { 362 panic("metric unit must not be empty") 363 } 364 if strings.IndexFunc(unit, unicode.IsSpace) >= 0 { 365 panic("metric unit must not contain whitespace") 366 } 367 b.extra[unit] = n 368 } 369 370 // BenchmarkResult contains the results of a benchmark run. 371 type BenchmarkResult struct { 372 N int // The number of iterations. 373 T time.Duration // The total time taken. 374 Bytes int64 // Bytes processed in one iteration. 375 MemAllocs uint64 // The total number of memory allocations. 376 MemBytes uint64 // The total number of bytes allocated. 377 378 // Extra records additional metrics reported by ReportMetric. 379 Extra map[string]float64 380 } 381 382 // NsPerOp returns the "ns/op" metric. 383 func (r BenchmarkResult) NsPerOp() int64 { 384 if v, ok := r.Extra["ns/op"]; ok { 385 return int64(v) 386 } 387 if r.N <= 0 { 388 return 0 389 } 390 return r.T.Nanoseconds() / int64(r.N) 391 } 392 393 // mbPerSec returns the "MB/s" metric. 394 func (r BenchmarkResult) mbPerSec() float64 { 395 if v, ok := r.Extra["MB/s"]; ok { 396 return v 397 } 398 if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 { 399 return 0 400 } 401 return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds() 402 } 403 404 // AllocsPerOp returns the "allocs/op" metric, 405 // which is calculated as r.MemAllocs / r.N. 406 func (r BenchmarkResult) AllocsPerOp() int64 { 407 if v, ok := r.Extra["allocs/op"]; ok { 408 return int64(v) 409 } 410 if r.N <= 0 { 411 return 0 412 } 413 return int64(r.MemAllocs) / int64(r.N) 414 } 415 416 // AllocedBytesPerOp returns the "B/op" metric, 417 // which is calculated as r.MemBytes / r.N. 418 func (r BenchmarkResult) AllocedBytesPerOp() int64 { 419 if v, ok := r.Extra["B/op"]; ok { 420 return int64(v) 421 } 422 if r.N <= 0 { 423 return 0 424 } 425 return int64(r.MemBytes) / int64(r.N) 426 } 427 428 // String returns a summary of the benchmark results. 429 // It follows the benchmark result line format from 430 // https://golang.org/design/14313-benchmark-format, not including the 431 // benchmark name. 432 // Extra metrics override built-in metrics of the same name. 433 // String does not include allocs/op or B/op, since those are reported 434 // by MemString. 435 func (r BenchmarkResult) String() string { 436 buf := new(strings.Builder) 437 fmt.Fprintf(buf, "%8d", r.N) 438 439 // Get ns/op as a float. 440 ns, ok := r.Extra["ns/op"] 441 if !ok { 442 ns = float64(r.T.Nanoseconds()) / float64(r.N) 443 } 444 if ns != 0 { 445 buf.WriteByte('\t') 446 prettyPrint(buf, ns, "ns/op") 447 } 448 449 if mbs := r.mbPerSec(); mbs != 0 { 450 fmt.Fprintf(buf, "\t%7.2f MB/s", mbs) 451 } 452 453 // Print extra metrics that aren't represented in the standard 454 // metrics. 455 var extraKeys []string 456 for k := range r.Extra { 457 switch k { 458 case "ns/op", "MB/s", "B/op", "allocs/op": 459 // Built-in metrics reported elsewhere. 460 continue 461 } 462 extraKeys = append(extraKeys, k) 463 } 464 sort.Strings(extraKeys) 465 for _, k := range extraKeys { 466 buf.WriteByte('\t') 467 prettyPrint(buf, r.Extra[k], k) 468 } 469 return buf.String() 470 } 471 472 func prettyPrint(w io.Writer, x float64, unit string) { 473 // Print all numbers with 10 places before the decimal point 474 // and small numbers with four sig figs. Field widths are 475 // chosen to fit the whole part in 10 places while aligning 476 // the decimal point of all fractional formats. 477 var format string 478 switch y := math.Abs(x); { 479 case y == 0 || y >= 999.95: 480 format = "%10.0f %s" 481 case y >= 99.995: 482 format = "%12.1f %s" 483 case y >= 9.9995: 484 format = "%13.2f %s" 485 case y >= 0.99995: 486 format = "%14.3f %s" 487 case y >= 0.099995: 488 format = "%15.4f %s" 489 case y >= 0.0099995: 490 format = "%16.5f %s" 491 case y >= 0.00099995: 492 format = "%17.6f %s" 493 default: 494 format = "%18.7f %s" 495 } 496 fmt.Fprintf(w, format, x, unit) 497 } 498 499 // MemString returns r.AllocedBytesPerOp and r.AllocsPerOp in the same format as 'go test'. 500 func (r BenchmarkResult) MemString() string { 501 return fmt.Sprintf("%8d B/op\t%8d allocs/op", 502 r.AllocedBytesPerOp(), r.AllocsPerOp()) 503 } 504 505 // benchmarkName returns full name of benchmark including procs suffix. 506 func benchmarkName(name string, n int) string { 507 if n != 1 { 508 return fmt.Sprintf("%s-%d", name, n) 509 } 510 return name 511 } 512 513 type benchContext struct { 514 match *matcher 515 516 maxLen int // The largest recorded benchmark name. 517 extLen int // Maximum extension length. 518 } 519 520 // RunBenchmarks is an internal function but exported because it is cross-package; 521 // it is part of the implementation of the "go test" command. 522 func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) { 523 runBenchmarks("", matchString, benchmarks) 524 } 525 526 func runBenchmarks(importPath string, matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool { 527 // If no flag was specified, don't run benchmarks. 528 if len(*matchBenchmarks) == 0 { 529 return true 530 } 531 // Collect matching benchmarks and determine longest name. 532 maxprocs := 1 533 for _, procs := range cpuList { 534 if procs > maxprocs { 535 maxprocs = procs 536 } 537 } 538 ctx := &benchContext{ 539 match: newMatcher(matchString, *matchBenchmarks, "-test.bench", *skip), 540 extLen: len(benchmarkName("", maxprocs)), 541 } 542 var bs []InternalBenchmark 543 for _, Benchmark := range benchmarks { 544 if _, matched, _ := ctx.match.fullName(nil, Benchmark.Name); matched { 545 bs = append(bs, Benchmark) 546 benchName := benchmarkName(Benchmark.Name, maxprocs) 547 if l := len(benchName) + ctx.extLen + 1; l > ctx.maxLen { 548 ctx.maxLen = l 549 } 550 } 551 } 552 main := &B{ 553 common: common{ 554 name: "Main", 555 w: os.Stdout, 556 bench: true, 557 }, 558 importPath: importPath, 559 benchFunc: func(b *B) { 560 for _, Benchmark := range bs { 561 b.Run(Benchmark.Name, Benchmark.F) 562 } 563 }, 564 benchTime: benchTime, 565 context: ctx, 566 } 567 if Verbose() { 568 main.chatty = newChattyPrinter(main.w) 569 } 570 main.runN(1) 571 return !main.failed 572 } 573 574 // processBench runs bench b for the configured CPU counts and prints the results. 575 func (ctx *benchContext) processBench(b *B) { 576 for i, procs := range cpuList { 577 for j := uint(0); j < *count; j++ { 578 runtime.GOMAXPROCS(procs) 579 benchName := benchmarkName(b.name, procs) 580 581 // If it's chatty, we've already printed this information. 582 if b.chatty == nil { 583 fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName) 584 } 585 // Recompute the running time for all but the first iteration. 586 if i > 0 || j > 0 { 587 b = &B{ 588 common: common{ 589 signal: make(chan bool), 590 name: b.name, 591 w: b.w, 592 chatty: b.chatty, 593 bench: true, 594 }, 595 benchFunc: b.benchFunc, 596 benchTime: b.benchTime, 597 } 598 b.run1() 599 } 600 r := b.doBench() 601 if b.failed { 602 // The output could be very long here, but probably isn't. 603 // We print it all, regardless, because we don't want to trim the reason 604 // the benchmark failed. 605 fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), benchName, b.output) 606 continue 607 } 608 results := r.String() 609 if b.chatty != nil { 610 fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName) 611 } 612 if *benchmarkMemory || b.showAllocResult { 613 results += "\t" + r.MemString() 614 } 615 fmt.Fprintln(b.w, results) 616 // Unlike with tests, we ignore the -chatty flag and always print output for 617 // benchmarks since the output generation time will skew the results. 618 if len(b.output) > 0 { 619 b.trimOutput() 620 fmt.Fprintf(b.w, "%s--- BENCH: %s\n%s", b.chatty.prefix(), benchName, b.output) 621 } 622 if p := runtime.GOMAXPROCS(-1); p != procs { 623 fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p) 624 } 625 if b.chatty != nil && b.chatty.json { 626 b.chatty.Updatef("", "=== NAME %s\n", "") 627 } 628 } 629 } 630 } 631 632 // If hideStdoutForTesting is true, Run does not print the benchName. 633 // This avoids a spurious print during 'go test' on package testing itself, 634 // which invokes b.Run in its own tests (see sub_test.go). 635 var hideStdoutForTesting = false 636 637 // Run benchmarks f as a subbenchmark with the given name. It reports 638 // whether there were any failures. 639 // 640 // A subbenchmark is like any other benchmark. A benchmark that calls Run at 641 // least once will not be measured itself and will be called once with N=1. 642 func (b *B) Run(name string, f func(b *B)) bool { 643 // Since b has subbenchmarks, we will no longer run it as a benchmark itself. 644 // Release the lock and acquire it on exit to ensure locks stay paired. 645 b.hasSub.Store(true) 646 benchmarkLock.Unlock() 647 defer benchmarkLock.Lock() 648 649 benchName, ok, partial := b.name, true, false 650 if b.context != nil { 651 benchName, ok, partial = b.context.match.fullName(&b.common, name) 652 } 653 if !ok { 654 return true 655 } 656 var pc [maxStackLen]uintptr 657 n := runtime.Callers(2, pc[:]) 658 sub := &B{ 659 common: common{ 660 signal: make(chan bool), 661 name: benchName, 662 parent: &b.common, 663 level: b.level + 1, 664 creator: pc[:n], 665 w: b.w, 666 chatty: b.chatty, 667 bench: true, 668 }, 669 importPath: b.importPath, 670 benchFunc: f, 671 benchTime: b.benchTime, 672 context: b.context, 673 } 674 if partial { 675 // Partial name match, like -bench=X/Y matching BenchmarkX. 676 // Only process sub-benchmarks, if any. 677 sub.hasSub.Store(true) 678 } 679 680 if b.chatty != nil { 681 labelsOnce.Do(func() { 682 fmt.Printf("goos: %s\n", runtime.GOOS) 683 fmt.Printf("goarch: %s\n", runtime.GOARCH) 684 if b.importPath != "" { 685 fmt.Printf("pkg: %s\n", b.importPath) 686 } 687 if cpu := sysinfo.CPU.Name(); cpu != "" { 688 fmt.Printf("cpu: %s\n", cpu) 689 } 690 }) 691 692 if !hideStdoutForTesting { 693 if b.chatty.json { 694 b.chatty.Updatef(benchName, "=== RUN %s\n", benchName) 695 } 696 fmt.Println(benchName) 697 } 698 } 699 700 if sub.run1() { 701 sub.run() 702 } 703 b.add(sub.result) 704 return !sub.failed 705 } 706 707 // add simulates running benchmarks in sequence in a single iteration. It is 708 // used to give some meaningful results in case func Benchmark is used in 709 // combination with Run. 710 func (b *B) add(other BenchmarkResult) { 711 r := &b.result 712 // The aggregated BenchmarkResults resemble running all subbenchmarks as 713 // in sequence in a single benchmark. 714 r.N = 1 715 r.T += time.Duration(other.NsPerOp()) 716 if other.Bytes == 0 { 717 // Summing Bytes is meaningless in aggregate if not all subbenchmarks 718 // set it. 719 b.missingBytes = true 720 r.Bytes = 0 721 } 722 if !b.missingBytes { 723 r.Bytes += other.Bytes 724 } 725 r.MemAllocs += uint64(other.AllocsPerOp()) 726 r.MemBytes += uint64(other.AllocedBytesPerOp()) 727 } 728 729 // trimOutput shortens the output from a benchmark, which can be very long. 730 func (b *B) trimOutput() { 731 // The output is likely to appear multiple times because the benchmark 732 // is run multiple times, but at least it will be seen. This is not a big deal 733 // because benchmarks rarely print, but just in case, we trim it if it's too long. 734 const maxNewlines = 10 735 for nlCount, j := 0, 0; j < len(b.output); j++ { 736 if b.output[j] == '\n' { 737 nlCount++ 738 if nlCount >= maxNewlines { 739 b.output = append(b.output[:j], "\n\t... [output truncated]\n"...) 740 break 741 } 742 } 743 } 744 } 745 746 // A PB is used by RunParallel for running parallel benchmarks. 747 type PB struct { 748 globalN *uint64 // shared between all worker goroutines iteration counter 749 grain uint64 // acquire that many iterations from globalN at once 750 cache uint64 // local cache of acquired iterations 751 bN uint64 // total number of iterations to execute (b.N) 752 } 753 754 // Next reports whether there are more iterations to execute. 755 func (pb *PB) Next() bool { 756 if pb.cache == 0 { 757 n := atomic.AddUint64(pb.globalN, pb.grain) 758 if n <= pb.bN { 759 pb.cache = pb.grain 760 } else if n < pb.bN+pb.grain { 761 pb.cache = pb.bN + pb.grain - n 762 } else { 763 return false 764 } 765 } 766 pb.cache-- 767 return true 768 } 769 770 // RunParallel runs a benchmark in parallel. 771 // It creates multiple goroutines and distributes b.N iterations among them. 772 // The number of goroutines defaults to GOMAXPROCS. To increase parallelism for 773 // non-CPU-bound benchmarks, call SetParallelism before RunParallel. 774 // RunParallel is usually used with the go test -cpu flag. 775 // 776 // The body function will be run in each goroutine. It should set up any 777 // goroutine-local state and then iterate until pb.Next returns false. 778 // It should not use the StartTimer, StopTimer, or ResetTimer functions, 779 // because they have global effect. It should also not call Run. 780 // 781 // RunParallel reports ns/op values as wall time for the benchmark as a whole, 782 // not the sum of wall time or CPU time over each parallel goroutine. 783 func (b *B) RunParallel(body func(*PB)) { 784 if b.N == 0 { 785 return // Nothing to do when probing. 786 } 787 // Calculate grain size as number of iterations that take ~100µs. 788 // 100µs is enough to amortize the overhead and provide sufficient 789 // dynamic load balancing. 790 grain := uint64(0) 791 if b.previousN > 0 && b.previousDuration > 0 { 792 grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration) 793 } 794 if grain < 1 { 795 grain = 1 796 } 797 // We expect the inner loop and function call to take at least 10ns, 798 // so do not do more than 100µs/10ns=1e4 iterations. 799 if grain > 1e4 { 800 grain = 1e4 801 } 802 803 n := uint64(0) 804 numProcs := b.parallelism * runtime.GOMAXPROCS(0) 805 var wg sync.WaitGroup 806 wg.Add(numProcs) 807 for p := 0; p < numProcs; p++ { 808 go func() { 809 defer wg.Done() 810 pb := &PB{ 811 globalN: &n, 812 grain: grain, 813 bN: uint64(b.N), 814 } 815 body(pb) 816 }() 817 } 818 wg.Wait() 819 if n <= uint64(b.N) && !b.Failed() { 820 b.Fatal("RunParallel: body exited without pb.Next() == false") 821 } 822 } 823 824 // SetParallelism sets the number of goroutines used by RunParallel to p*GOMAXPROCS. 825 // There is usually no need to call SetParallelism for CPU-bound benchmarks. 826 // If p is less than 1, this call will have no effect. 827 func (b *B) SetParallelism(p int) { 828 if p >= 1 { 829 b.parallelism = p 830 } 831 } 832 833 // Benchmark benchmarks a single function. It is useful for creating 834 // custom benchmarks that do not use the "go test" command. 835 // 836 // If f depends on testing flags, then Init must be used to register 837 // those flags before calling Benchmark and before calling flag.Parse. 838 // 839 // If f calls Run, the result will be an estimate of running all its 840 // subbenchmarks that don't call Run in sequence in a single benchmark. 841 func Benchmark(f func(b *B)) BenchmarkResult { 842 b := &B{ 843 common: common{ 844 signal: make(chan bool), 845 w: discard{}, 846 }, 847 benchFunc: f, 848 benchTime: benchTime, 849 } 850 if b.run1() { 851 b.run() 852 } 853 return b.result 854 } 855 856 type discard struct{} 857 858 func (discard) Write(b []byte) (n int, err error) { return len(b), nil }