github.com/hikaru7719/go@v0.0.0-20181025140707-c8b2ac68906a/src/testing/benchmark.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package testing 6 7 import ( 8 "flag" 9 "fmt" 10 "internal/race" 11 "os" 12 "runtime" 13 "strconv" 14 "strings" 15 "sync" 16 "sync/atomic" 17 "time" 18 ) 19 20 var matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`") 21 var benchTime = benchTimeFlag{d: 1 * time.Second} 22 var benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks") 23 24 func init() { 25 flag.Var(&benchTime, "test.benchtime", "run each benchmark for duration `d`") 26 } 27 28 type benchTimeFlag struct { 29 d time.Duration 30 n int 31 } 32 33 func (f *benchTimeFlag) String() string { 34 if f.n > 0 { 35 return fmt.Sprintf("%dx", f.n) 36 } 37 return time.Duration(f.d).String() 38 } 39 40 func (f *benchTimeFlag) Set(s string) error { 41 if strings.HasSuffix(s, "x") { 42 n, err := strconv.ParseInt(s[:len(s)-1], 10, 0) 43 if err != nil || n <= 0 { 44 return fmt.Errorf("invalid count") 45 } 46 *f = benchTimeFlag{n: int(n)} 47 return nil 48 } 49 d, err := time.ParseDuration(s) 50 if err != nil || d <= 0 { 51 return fmt.Errorf("invalid duration") 52 } 53 *f = benchTimeFlag{d: d} 54 return nil 55 } 56 57 // Global lock to ensure only one benchmark runs at a time. 58 var benchmarkLock sync.Mutex 59 60 // Used for every benchmark for measuring memory. 61 var memStats runtime.MemStats 62 63 // An internal type but exported because it is cross-package; part of the implementation 64 // of the "go test" command. 65 type InternalBenchmark struct { 66 Name string 67 F func(b *B) 68 } 69 70 // B is a type passed to Benchmark functions to manage benchmark 71 // timing and to specify the number of iterations to run. 72 // 73 // A benchmark ends when its Benchmark function returns or calls any of the methods 74 // FailNow, Fatal, Fatalf, SkipNow, Skip, or Skipf. Those methods must be called 75 // only from the goroutine running the Benchmark function. 76 // The other reporting methods, such as the variations of Log and Error, 77 // may be called simultaneously from multiple goroutines. 78 // 79 // Like in tests, benchmark logs are accumulated during execution 80 // and dumped to standard error when done. Unlike in tests, benchmark logs 81 // are always printed, so as not to hide output whose existence may be 82 // affecting benchmark results. 83 type B struct { 84 common 85 importPath string // import path of the package containing the benchmark 86 context *benchContext 87 N int 88 previousN int // number of iterations in the previous run 89 previousDuration time.Duration // total duration of the previous run 90 benchFunc func(b *B) 91 benchTime benchTimeFlag 92 bytes int64 93 missingBytes bool // one of the subbenchmarks does not have bytes set. 94 timerOn bool 95 showAllocResult bool 96 result BenchmarkResult 97 parallelism int // RunParallel creates parallelism*GOMAXPROCS goroutines 98 // The initial states of memStats.Mallocs and memStats.TotalAlloc. 99 startAllocs uint64 100 startBytes uint64 101 // The net total of this test after being run. 102 netAllocs uint64 103 netBytes uint64 104 } 105 106 // StartTimer starts timing a test. This function is called automatically 107 // before a benchmark starts, but it can also used to resume timing after 108 // a call to StopTimer. 109 func (b *B) StartTimer() { 110 if !b.timerOn { 111 runtime.ReadMemStats(&memStats) 112 b.startAllocs = memStats.Mallocs 113 b.startBytes = memStats.TotalAlloc 114 b.start = time.Now() 115 b.timerOn = true 116 } 117 } 118 119 // StopTimer stops timing a test. This can be used to pause the timer 120 // while performing complex initialization that you don't 121 // want to measure. 122 func (b *B) StopTimer() { 123 if b.timerOn { 124 b.duration += time.Since(b.start) 125 runtime.ReadMemStats(&memStats) 126 b.netAllocs += memStats.Mallocs - b.startAllocs 127 b.netBytes += memStats.TotalAlloc - b.startBytes 128 b.timerOn = false 129 } 130 } 131 132 // ResetTimer zeros the elapsed benchmark time and memory allocation counters. 133 // It does not affect whether the timer is running. 134 func (b *B) ResetTimer() { 135 if b.timerOn { 136 runtime.ReadMemStats(&memStats) 137 b.startAllocs = memStats.Mallocs 138 b.startBytes = memStats.TotalAlloc 139 b.start = time.Now() 140 } 141 b.duration = 0 142 b.netAllocs = 0 143 b.netBytes = 0 144 } 145 146 // SetBytes records the number of bytes processed in a single operation. 147 // If this is called, the benchmark will report ns/op and MB/s. 148 func (b *B) SetBytes(n int64) { b.bytes = n } 149 150 // ReportAllocs enables malloc statistics for this benchmark. 151 // It is equivalent to setting -test.benchmem, but it only affects the 152 // benchmark function that calls ReportAllocs. 153 func (b *B) ReportAllocs() { 154 b.showAllocResult = true 155 } 156 157 func (b *B) nsPerOp() int64 { 158 if b.N <= 0 { 159 return 0 160 } 161 return b.duration.Nanoseconds() / int64(b.N) 162 } 163 164 // runN runs a single benchmark for the specified number of iterations. 165 func (b *B) runN(n int) { 166 benchmarkLock.Lock() 167 defer benchmarkLock.Unlock() 168 // Try to get a comparable environment for each run 169 // by clearing garbage from previous runs. 170 runtime.GC() 171 b.raceErrors = -race.Errors() 172 b.N = n 173 b.parallelism = 1 174 b.ResetTimer() 175 b.StartTimer() 176 b.benchFunc(b) 177 b.StopTimer() 178 b.previousN = n 179 b.previousDuration = b.duration 180 b.raceErrors += race.Errors() 181 if b.raceErrors > 0 { 182 b.Errorf("race detected during execution of benchmark") 183 } 184 } 185 186 func min(x, y int) int { 187 if x > y { 188 return y 189 } 190 return x 191 } 192 193 func max(x, y int) int { 194 if x < y { 195 return y 196 } 197 return x 198 } 199 200 // roundDown10 rounds a number down to the nearest power of 10. 201 func roundDown10(n int) int { 202 var tens = 0 203 // tens = floor(log_10(n)) 204 for n >= 10 { 205 n = n / 10 206 tens++ 207 } 208 // result = 10^tens 209 result := 1 210 for i := 0; i < tens; i++ { 211 result *= 10 212 } 213 return result 214 } 215 216 // roundUp rounds x up to a number of the form [1eX, 2eX, 3eX, 5eX]. 217 func roundUp(n int) int { 218 base := roundDown10(n) 219 switch { 220 case n <= base: 221 return base 222 case n <= (2 * base): 223 return 2 * base 224 case n <= (3 * base): 225 return 3 * base 226 case n <= (5 * base): 227 return 5 * base 228 default: 229 return 10 * base 230 } 231 } 232 233 // run1 runs the first iteration of benchFunc. It returns whether more 234 // iterations of this benchmarks should be run. 235 func (b *B) run1() bool { 236 if ctx := b.context; ctx != nil { 237 // Extend maxLen, if needed. 238 if n := len(b.name) + ctx.extLen + 1; n > ctx.maxLen { 239 ctx.maxLen = n + 8 // Add additional slack to avoid too many jumps in size. 240 } 241 } 242 go func() { 243 // Signal that we're done whether we return normally 244 // or by FailNow's runtime.Goexit. 245 defer func() { 246 b.signal <- true 247 }() 248 249 b.runN(1) 250 }() 251 <-b.signal 252 if b.failed { 253 fmt.Fprintf(b.w, "--- FAIL: %s\n%s", b.name, b.output) 254 return false 255 } 256 // Only print the output if we know we are not going to proceed. 257 // Otherwise it is printed in processBench. 258 if atomic.LoadInt32(&b.hasSub) != 0 || b.finished { 259 tag := "BENCH" 260 if b.skipped { 261 tag = "SKIP" 262 } 263 if b.chatty && (len(b.output) > 0 || b.finished) { 264 b.trimOutput() 265 fmt.Fprintf(b.w, "--- %s: %s\n%s", tag, b.name, b.output) 266 } 267 return false 268 } 269 return true 270 } 271 272 var labelsOnce sync.Once 273 274 // run executes the benchmark in a separate goroutine, including all of its 275 // subbenchmarks. b must not have subbenchmarks. 276 func (b *B) run() { 277 labelsOnce.Do(func() { 278 fmt.Fprintf(b.w, "goos: %s\n", runtime.GOOS) 279 fmt.Fprintf(b.w, "goarch: %s\n", runtime.GOARCH) 280 if b.importPath != "" { 281 fmt.Fprintf(b.w, "pkg: %s\n", b.importPath) 282 } 283 }) 284 if b.context != nil { 285 // Running go test --test.bench 286 b.context.processBench(b) // Must call doBench. 287 } else { 288 // Running func Benchmark. 289 b.doBench() 290 } 291 } 292 293 func (b *B) doBench() BenchmarkResult { 294 go b.launch() 295 <-b.signal 296 return b.result 297 } 298 299 // launch launches the benchmark function. It gradually increases the number 300 // of benchmark iterations until the benchmark runs for the requested benchtime. 301 // launch is run by the doBench function as a separate goroutine. 302 // run1 must have been called on b. 303 func (b *B) launch() { 304 // Signal that we're done whether we return normally 305 // or by FailNow's runtime.Goexit. 306 defer func() { 307 b.signal <- true 308 }() 309 310 // Run the benchmark for at least the specified amount of time. 311 if b.benchTime.n > 0 { 312 b.runN(b.benchTime.n) 313 } else { 314 d := b.benchTime.d 315 for n := 1; !b.failed && b.duration < d && n < 1e9; { 316 last := n 317 // Predict required iterations. 318 n = int(d.Nanoseconds()) 319 if nsop := b.nsPerOp(); nsop != 0 { 320 n /= int(nsop) 321 } 322 // Run more iterations than we think we'll need (1.2x). 323 // Don't grow too fast in case we had timing errors previously. 324 // Be sure to run at least one more than last time. 325 n = max(min(n+n/5, 100*last), last+1) 326 // Round up to something easy to read. 327 n = roundUp(n) 328 b.runN(n) 329 } 330 } 331 b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes} 332 } 333 334 // The results of a benchmark run. 335 type BenchmarkResult struct { 336 N int // The number of iterations. 337 T time.Duration // The total time taken. 338 Bytes int64 // Bytes processed in one iteration. 339 MemAllocs uint64 // The total number of memory allocations. 340 MemBytes uint64 // The total number of bytes allocated. 341 } 342 343 func (r BenchmarkResult) NsPerOp() int64 { 344 if r.N <= 0 { 345 return 0 346 } 347 return r.T.Nanoseconds() / int64(r.N) 348 } 349 350 func (r BenchmarkResult) mbPerSec() float64 { 351 if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 { 352 return 0 353 } 354 return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds() 355 } 356 357 // AllocsPerOp returns r.MemAllocs / r.N. 358 func (r BenchmarkResult) AllocsPerOp() int64 { 359 if r.N <= 0 { 360 return 0 361 } 362 return int64(r.MemAllocs) / int64(r.N) 363 } 364 365 // AllocedBytesPerOp returns r.MemBytes / r.N. 366 func (r BenchmarkResult) AllocedBytesPerOp() int64 { 367 if r.N <= 0 { 368 return 0 369 } 370 return int64(r.MemBytes) / int64(r.N) 371 } 372 373 func (r BenchmarkResult) String() string { 374 mbs := r.mbPerSec() 375 mb := "" 376 if mbs != 0 { 377 mb = fmt.Sprintf("\t%7.2f MB/s", mbs) 378 } 379 nsop := r.NsPerOp() 380 ns := fmt.Sprintf("%10d ns/op", nsop) 381 if r.N > 0 && nsop < 100 { 382 // The format specifiers here make sure that 383 // the ones digits line up for all three possible formats. 384 if nsop < 10 { 385 ns = fmt.Sprintf("%13.2f ns/op", float64(r.T.Nanoseconds())/float64(r.N)) 386 } else { 387 ns = fmt.Sprintf("%12.1f ns/op", float64(r.T.Nanoseconds())/float64(r.N)) 388 } 389 } 390 return fmt.Sprintf("%8d\t%s%s", r.N, ns, mb) 391 } 392 393 // MemString returns r.AllocedBytesPerOp and r.AllocsPerOp in the same format as 'go test'. 394 func (r BenchmarkResult) MemString() string { 395 return fmt.Sprintf("%8d B/op\t%8d allocs/op", 396 r.AllocedBytesPerOp(), r.AllocsPerOp()) 397 } 398 399 // benchmarkName returns full name of benchmark including procs suffix. 400 func benchmarkName(name string, n int) string { 401 if n != 1 { 402 return fmt.Sprintf("%s-%d", name, n) 403 } 404 return name 405 } 406 407 type benchContext struct { 408 match *matcher 409 410 maxLen int // The largest recorded benchmark name. 411 extLen int // Maximum extension length. 412 } 413 414 // An internal function but exported because it is cross-package; part of the implementation 415 // of the "go test" command. 416 func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) { 417 runBenchmarks("", matchString, benchmarks) 418 } 419 420 func runBenchmarks(importPath string, matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool { 421 // If no flag was specified, don't run benchmarks. 422 if len(*matchBenchmarks) == 0 { 423 return true 424 } 425 // Collect matching benchmarks and determine longest name. 426 maxprocs := 1 427 for _, procs := range cpuList { 428 if procs > maxprocs { 429 maxprocs = procs 430 } 431 } 432 ctx := &benchContext{ 433 match: newMatcher(matchString, *matchBenchmarks, "-test.bench"), 434 extLen: len(benchmarkName("", maxprocs)), 435 } 436 var bs []InternalBenchmark 437 for _, Benchmark := range benchmarks { 438 if _, matched, _ := ctx.match.fullName(nil, Benchmark.Name); matched { 439 bs = append(bs, Benchmark) 440 benchName := benchmarkName(Benchmark.Name, maxprocs) 441 if l := len(benchName) + ctx.extLen + 1; l > ctx.maxLen { 442 ctx.maxLen = l 443 } 444 } 445 } 446 main := &B{ 447 common: common{ 448 name: "Main", 449 w: os.Stdout, 450 chatty: *chatty, 451 }, 452 importPath: importPath, 453 benchFunc: func(b *B) { 454 for _, Benchmark := range bs { 455 b.Run(Benchmark.Name, Benchmark.F) 456 } 457 }, 458 benchTime: benchTime, 459 context: ctx, 460 } 461 main.runN(1) 462 return !main.failed 463 } 464 465 // processBench runs bench b for the configured CPU counts and prints the results. 466 func (ctx *benchContext) processBench(b *B) { 467 for i, procs := range cpuList { 468 for j := uint(0); j < *count; j++ { 469 runtime.GOMAXPROCS(procs) 470 benchName := benchmarkName(b.name, procs) 471 fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName) 472 // Recompute the running time for all but the first iteration. 473 if i > 0 || j > 0 { 474 b = &B{ 475 common: common{ 476 signal: make(chan bool), 477 name: b.name, 478 w: b.w, 479 chatty: b.chatty, 480 }, 481 benchFunc: b.benchFunc, 482 benchTime: b.benchTime, 483 } 484 b.run1() 485 } 486 r := b.doBench() 487 if b.failed { 488 // The output could be very long here, but probably isn't. 489 // We print it all, regardless, because we don't want to trim the reason 490 // the benchmark failed. 491 fmt.Fprintf(b.w, "--- FAIL: %s\n%s", benchName, b.output) 492 continue 493 } 494 results := r.String() 495 if *benchmarkMemory || b.showAllocResult { 496 results += "\t" + r.MemString() 497 } 498 fmt.Fprintln(b.w, results) 499 // Unlike with tests, we ignore the -chatty flag and always print output for 500 // benchmarks since the output generation time will skew the results. 501 if len(b.output) > 0 { 502 b.trimOutput() 503 fmt.Fprintf(b.w, "--- BENCH: %s\n%s", benchName, b.output) 504 } 505 if p := runtime.GOMAXPROCS(-1); p != procs { 506 fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p) 507 } 508 } 509 } 510 } 511 512 // Run benchmarks f as a subbenchmark with the given name. It reports 513 // whether there were any failures. 514 // 515 // A subbenchmark is like any other benchmark. A benchmark that calls Run at 516 // least once will not be measured itself and will be called once with N=1. 517 func (b *B) Run(name string, f func(b *B)) bool { 518 // Since b has subbenchmarks, we will no longer run it as a benchmark itself. 519 // Release the lock and acquire it on exit to ensure locks stay paired. 520 atomic.StoreInt32(&b.hasSub, 1) 521 benchmarkLock.Unlock() 522 defer benchmarkLock.Lock() 523 524 benchName, ok, partial := b.name, true, false 525 if b.context != nil { 526 benchName, ok, partial = b.context.match.fullName(&b.common, name) 527 } 528 if !ok { 529 return true 530 } 531 var pc [maxStackLen]uintptr 532 n := runtime.Callers(2, pc[:]) 533 sub := &B{ 534 common: common{ 535 signal: make(chan bool), 536 name: benchName, 537 parent: &b.common, 538 level: b.level + 1, 539 creator: pc[:n], 540 w: b.w, 541 chatty: b.chatty, 542 }, 543 importPath: b.importPath, 544 benchFunc: f, 545 benchTime: b.benchTime, 546 context: b.context, 547 } 548 if partial { 549 // Partial name match, like -bench=X/Y matching BenchmarkX. 550 // Only process sub-benchmarks, if any. 551 atomic.StoreInt32(&sub.hasSub, 1) 552 } 553 if sub.run1() { 554 sub.run() 555 } 556 b.add(sub.result) 557 return !sub.failed 558 } 559 560 // add simulates running benchmarks in sequence in a single iteration. It is 561 // used to give some meaningful results in case func Benchmark is used in 562 // combination with Run. 563 func (b *B) add(other BenchmarkResult) { 564 r := &b.result 565 // The aggregated BenchmarkResults resemble running all subbenchmarks as 566 // in sequence in a single benchmark. 567 r.N = 1 568 r.T += time.Duration(other.NsPerOp()) 569 if other.Bytes == 0 { 570 // Summing Bytes is meaningless in aggregate if not all subbenchmarks 571 // set it. 572 b.missingBytes = true 573 r.Bytes = 0 574 } 575 if !b.missingBytes { 576 r.Bytes += other.Bytes 577 } 578 r.MemAllocs += uint64(other.AllocsPerOp()) 579 r.MemBytes += uint64(other.AllocedBytesPerOp()) 580 } 581 582 // trimOutput shortens the output from a benchmark, which can be very long. 583 func (b *B) trimOutput() { 584 // The output is likely to appear multiple times because the benchmark 585 // is run multiple times, but at least it will be seen. This is not a big deal 586 // because benchmarks rarely print, but just in case, we trim it if it's too long. 587 const maxNewlines = 10 588 for nlCount, j := 0, 0; j < len(b.output); j++ { 589 if b.output[j] == '\n' { 590 nlCount++ 591 if nlCount >= maxNewlines { 592 b.output = append(b.output[:j], "\n\t... [output truncated]\n"...) 593 break 594 } 595 } 596 } 597 } 598 599 // A PB is used by RunParallel for running parallel benchmarks. 600 type PB struct { 601 globalN *uint64 // shared between all worker goroutines iteration counter 602 grain uint64 // acquire that many iterations from globalN at once 603 cache uint64 // local cache of acquired iterations 604 bN uint64 // total number of iterations to execute (b.N) 605 } 606 607 // Next reports whether there are more iterations to execute. 608 func (pb *PB) Next() bool { 609 if pb.cache == 0 { 610 n := atomic.AddUint64(pb.globalN, pb.grain) 611 if n <= pb.bN { 612 pb.cache = pb.grain 613 } else if n < pb.bN+pb.grain { 614 pb.cache = pb.bN + pb.grain - n 615 } else { 616 return false 617 } 618 } 619 pb.cache-- 620 return true 621 } 622 623 // RunParallel runs a benchmark in parallel. 624 // It creates multiple goroutines and distributes b.N iterations among them. 625 // The number of goroutines defaults to GOMAXPROCS. To increase parallelism for 626 // non-CPU-bound benchmarks, call SetParallelism before RunParallel. 627 // RunParallel is usually used with the go test -cpu flag. 628 // 629 // The body function will be run in each goroutine. It should set up any 630 // goroutine-local state and then iterate until pb.Next returns false. 631 // It should not use the StartTimer, StopTimer, or ResetTimer functions, 632 // because they have global effect. It should also not call Run. 633 func (b *B) RunParallel(body func(*PB)) { 634 if b.N == 0 { 635 return // Nothing to do when probing. 636 } 637 // Calculate grain size as number of iterations that take ~100µs. 638 // 100µs is enough to amortize the overhead and provide sufficient 639 // dynamic load balancing. 640 grain := uint64(0) 641 if b.previousN > 0 && b.previousDuration > 0 { 642 grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration) 643 } 644 if grain < 1 { 645 grain = 1 646 } 647 // We expect the inner loop and function call to take at least 10ns, 648 // so do not do more than 100µs/10ns=1e4 iterations. 649 if grain > 1e4 { 650 grain = 1e4 651 } 652 653 n := uint64(0) 654 numProcs := b.parallelism * runtime.GOMAXPROCS(0) 655 var wg sync.WaitGroup 656 wg.Add(numProcs) 657 for p := 0; p < numProcs; p++ { 658 go func() { 659 defer wg.Done() 660 pb := &PB{ 661 globalN: &n, 662 grain: grain, 663 bN: uint64(b.N), 664 } 665 body(pb) 666 }() 667 } 668 wg.Wait() 669 if n <= uint64(b.N) && !b.Failed() { 670 b.Fatal("RunParallel: body exited without pb.Next() == false") 671 } 672 } 673 674 // SetParallelism sets the number of goroutines used by RunParallel to p*GOMAXPROCS. 675 // There is usually no need to call SetParallelism for CPU-bound benchmarks. 676 // If p is less than 1, this call will have no effect. 677 func (b *B) SetParallelism(p int) { 678 if p >= 1 { 679 b.parallelism = p 680 } 681 } 682 683 // Benchmark benchmarks a single function. Useful for creating 684 // custom benchmarks that do not use the "go test" command. 685 // 686 // If f calls Run, the result will be an estimate of running all its 687 // subbenchmarks that don't call Run in sequence in a single benchmark. 688 func Benchmark(f func(b *B)) BenchmarkResult { 689 b := &B{ 690 common: common{ 691 signal: make(chan bool), 692 w: discard{}, 693 }, 694 benchFunc: f, 695 benchTime: benchTime, 696 } 697 if b.run1() { 698 b.run() 699 } 700 return b.result 701 } 702 703 type discard struct{} 704 705 func (discard) Write(b []byte) (n int, err error) { return len(b), nil }