github.com/hlts2/go@v0.0.0-20170904000733-812b34efaed8/src/testing/benchmark.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package testing 6 7 import ( 8 "flag" 9 "fmt" 10 "internal/race" 11 "os" 12 "runtime" 13 "sync" 14 "sync/atomic" 15 "time" 16 ) 17 18 var matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`") 19 var benchTime = flag.Duration("test.benchtime", 1*time.Second, "run each benchmark for duration `d`") 20 var benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks") 21 22 // Global lock to ensure only one benchmark runs at a time. 23 var benchmarkLock sync.Mutex 24 25 // Used for every benchmark for measuring memory. 26 var memStats runtime.MemStats 27 28 // An internal type but exported because it is cross-package; part of the implementation 29 // of the "go test" command. 30 type InternalBenchmark struct { 31 Name string 32 F func(b *B) 33 } 34 35 // B is a type passed to Benchmark functions to manage benchmark 36 // timing and to specify the number of iterations to run. 37 // 38 // A benchmark ends when its Benchmark function returns or calls any of the methods 39 // FailNow, Fatal, Fatalf, SkipNow, Skip, or Skipf. Those methods must be called 40 // only from the goroutine running the Benchmark function. 41 // The other reporting methods, such as the variations of Log and Error, 42 // may be called simultaneously from multiple goroutines. 43 // 44 // Like in tests, benchmark logs are accumulated during execution 45 // and dumped to standard error when done. Unlike in tests, benchmark logs 46 // are always printed, so as not to hide output whose existence may be 47 // affecting benchmark results. 48 type B struct { 49 common 50 importPath string // import path of the package containing the benchmark 51 context *benchContext 52 N int 53 previousN int // number of iterations in the previous run 54 previousDuration time.Duration // total duration of the previous run 55 benchFunc func(b *B) 56 benchTime time.Duration 57 bytes int64 58 missingBytes bool // one of the subbenchmarks does not have bytes set. 59 timerOn bool 60 showAllocResult bool 61 result BenchmarkResult 62 parallelism int // RunParallel creates parallelism*GOMAXPROCS goroutines 63 // The initial states of memStats.Mallocs and memStats.TotalAlloc. 64 startAllocs uint64 65 startBytes uint64 66 // The net total of this test after being run. 67 netAllocs uint64 68 netBytes uint64 69 } 70 71 // StartTimer starts timing a test. This function is called automatically 72 // before a benchmark starts, but it can also used to resume timing after 73 // a call to StopTimer. 74 func (b *B) StartTimer() { 75 if !b.timerOn { 76 runtime.ReadMemStats(&memStats) 77 b.startAllocs = memStats.Mallocs 78 b.startBytes = memStats.TotalAlloc 79 b.start = time.Now() 80 b.timerOn = true 81 } 82 } 83 84 // StopTimer stops timing a test. This can be used to pause the timer 85 // while performing complex initialization that you don't 86 // want to measure. 87 func (b *B) StopTimer() { 88 if b.timerOn { 89 b.duration += time.Now().Sub(b.start) 90 runtime.ReadMemStats(&memStats) 91 b.netAllocs += memStats.Mallocs - b.startAllocs 92 b.netBytes += memStats.TotalAlloc - b.startBytes 93 b.timerOn = false 94 } 95 } 96 97 // ResetTimer zeros the elapsed benchmark time and memory allocation counters. 98 // It does not affect whether the timer is running. 99 func (b *B) ResetTimer() { 100 if b.timerOn { 101 runtime.ReadMemStats(&memStats) 102 b.startAllocs = memStats.Mallocs 103 b.startBytes = memStats.TotalAlloc 104 b.start = time.Now() 105 } 106 b.duration = 0 107 b.netAllocs = 0 108 b.netBytes = 0 109 } 110 111 // SetBytes records the number of bytes processed in a single operation. 112 // If this is called, the benchmark will report ns/op and MB/s. 113 func (b *B) SetBytes(n int64) { b.bytes = n } 114 115 // ReportAllocs enables malloc statistics for this benchmark. 116 // It is equivalent to setting -test.benchmem, but it only affects the 117 // benchmark function that calls ReportAllocs. 118 func (b *B) ReportAllocs() { 119 b.showAllocResult = true 120 } 121 122 func (b *B) nsPerOp() int64 { 123 if b.N <= 0 { 124 return 0 125 } 126 return b.duration.Nanoseconds() / int64(b.N) 127 } 128 129 // runN runs a single benchmark for the specified number of iterations. 130 func (b *B) runN(n int) { 131 benchmarkLock.Lock() 132 defer benchmarkLock.Unlock() 133 // Try to get a comparable environment for each run 134 // by clearing garbage from previous runs. 135 runtime.GC() 136 b.raceErrors = -race.Errors() 137 b.N = n 138 b.parallelism = 1 139 b.ResetTimer() 140 b.StartTimer() 141 b.benchFunc(b) 142 b.StopTimer() 143 b.previousN = n 144 b.previousDuration = b.duration 145 b.raceErrors += race.Errors() 146 if b.raceErrors > 0 { 147 b.Errorf("race detected during execution of benchmark") 148 } 149 } 150 151 func min(x, y int) int { 152 if x > y { 153 return y 154 } 155 return x 156 } 157 158 func max(x, y int) int { 159 if x < y { 160 return y 161 } 162 return x 163 } 164 165 // roundDown10 rounds a number down to the nearest power of 10. 166 func roundDown10(n int) int { 167 var tens = 0 168 // tens = floor(log_10(n)) 169 for n >= 10 { 170 n = n / 10 171 tens++ 172 } 173 // result = 10^tens 174 result := 1 175 for i := 0; i < tens; i++ { 176 result *= 10 177 } 178 return result 179 } 180 181 // roundUp rounds x up to a number of the form [1eX, 2eX, 3eX, 5eX]. 182 func roundUp(n int) int { 183 base := roundDown10(n) 184 switch { 185 case n <= base: 186 return base 187 case n <= (2 * base): 188 return 2 * base 189 case n <= (3 * base): 190 return 3 * base 191 case n <= (5 * base): 192 return 5 * base 193 default: 194 return 10 * base 195 } 196 } 197 198 // run1 runs the first iteration of benchFunc. It returns whether more 199 // iterations of this benchmarks should be run. 200 func (b *B) run1() bool { 201 if ctx := b.context; ctx != nil { 202 // Extend maxLen, if needed. 203 if n := len(b.name) + ctx.extLen + 1; n > ctx.maxLen { 204 ctx.maxLen = n + 8 // Add additional slack to avoid too many jumps in size. 205 } 206 } 207 go func() { 208 // Signal that we're done whether we return normally 209 // or by FailNow's runtime.Goexit. 210 defer func() { 211 b.signal <- true 212 }() 213 214 b.runN(1) 215 }() 216 <-b.signal 217 if b.failed { 218 fmt.Fprintf(b.w, "--- FAIL: %s\n%s", b.name, b.output) 219 return false 220 } 221 // Only print the output if we know we are not going to proceed. 222 // Otherwise it is printed in processBench. 223 if atomic.LoadInt32(&b.hasSub) != 0 || b.finished { 224 tag := "BENCH" 225 if b.skipped { 226 tag = "SKIP" 227 } 228 if b.chatty && (len(b.output) > 0 || b.finished) { 229 b.trimOutput() 230 fmt.Fprintf(b.w, "--- %s: %s\n%s", tag, b.name, b.output) 231 } 232 return false 233 } 234 return true 235 } 236 237 var labelsOnce sync.Once 238 239 // run executes the benchmark in a separate goroutine, including all of its 240 // subbenchmarks. b must not have subbenchmarks. 241 func (b *B) run() { 242 labelsOnce.Do(func() { 243 fmt.Fprintf(b.w, "goos: %s\n", runtime.GOOS) 244 fmt.Fprintf(b.w, "goarch: %s\n", runtime.GOARCH) 245 if b.importPath != "" { 246 fmt.Fprintf(b.w, "pkg: %s\n", b.importPath) 247 } 248 }) 249 if b.context != nil { 250 // Running go test --test.bench 251 b.context.processBench(b) // Must call doBench. 252 } else { 253 // Running func Benchmark. 254 b.doBench() 255 } 256 } 257 258 func (b *B) doBench() BenchmarkResult { 259 go b.launch() 260 <-b.signal 261 return b.result 262 } 263 264 // launch launches the benchmark function. It gradually increases the number 265 // of benchmark iterations until the benchmark runs for the requested benchtime. 266 // launch is run by the doBench function as a separate goroutine. 267 // run1 must have been called on b. 268 func (b *B) launch() { 269 // Signal that we're done whether we return normally 270 // or by FailNow's runtime.Goexit. 271 defer func() { 272 b.signal <- true 273 }() 274 275 // Run the benchmark for at least the specified amount of time. 276 d := b.benchTime 277 for n := 1; !b.failed && b.duration < d && n < 1e9; { 278 last := n 279 // Predict required iterations. 280 n = int(d.Nanoseconds()) 281 if nsop := b.nsPerOp(); nsop != 0 { 282 n /= int(nsop) 283 } 284 // Run more iterations than we think we'll need (1.2x). 285 // Don't grow too fast in case we had timing errors previously. 286 // Be sure to run at least one more than last time. 287 n = max(min(n+n/5, 100*last), last+1) 288 // Round up to something easy to read. 289 n = roundUp(n) 290 b.runN(n) 291 } 292 b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes} 293 } 294 295 // The results of a benchmark run. 296 type BenchmarkResult struct { 297 N int // The number of iterations. 298 T time.Duration // The total time taken. 299 Bytes int64 // Bytes processed in one iteration. 300 MemAllocs uint64 // The total number of memory allocations. 301 MemBytes uint64 // The total number of bytes allocated. 302 } 303 304 func (r BenchmarkResult) NsPerOp() int64 { 305 if r.N <= 0 { 306 return 0 307 } 308 return r.T.Nanoseconds() / int64(r.N) 309 } 310 311 func (r BenchmarkResult) mbPerSec() float64 { 312 if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 { 313 return 0 314 } 315 return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds() 316 } 317 318 // AllocsPerOp returns r.MemAllocs / r.N. 319 func (r BenchmarkResult) AllocsPerOp() int64 { 320 if r.N <= 0 { 321 return 0 322 } 323 return int64(r.MemAllocs) / int64(r.N) 324 } 325 326 // AllocedBytesPerOp returns r.MemBytes / r.N. 327 func (r BenchmarkResult) AllocedBytesPerOp() int64 { 328 if r.N <= 0 { 329 return 0 330 } 331 return int64(r.MemBytes) / int64(r.N) 332 } 333 334 func (r BenchmarkResult) String() string { 335 mbs := r.mbPerSec() 336 mb := "" 337 if mbs != 0 { 338 mb = fmt.Sprintf("\t%7.2f MB/s", mbs) 339 } 340 nsop := r.NsPerOp() 341 ns := fmt.Sprintf("%10d ns/op", nsop) 342 if r.N > 0 && nsop < 100 { 343 // The format specifiers here make sure that 344 // the ones digits line up for all three possible formats. 345 if nsop < 10 { 346 ns = fmt.Sprintf("%13.2f ns/op", float64(r.T.Nanoseconds())/float64(r.N)) 347 } else { 348 ns = fmt.Sprintf("%12.1f ns/op", float64(r.T.Nanoseconds())/float64(r.N)) 349 } 350 } 351 return fmt.Sprintf("%8d\t%s%s", r.N, ns, mb) 352 } 353 354 // MemString returns r.AllocedBytesPerOp and r.AllocsPerOp in the same format as 'go test'. 355 func (r BenchmarkResult) MemString() string { 356 return fmt.Sprintf("%8d B/op\t%8d allocs/op", 357 r.AllocedBytesPerOp(), r.AllocsPerOp()) 358 } 359 360 // benchmarkName returns full name of benchmark including procs suffix. 361 func benchmarkName(name string, n int) string { 362 if n != 1 { 363 return fmt.Sprintf("%s-%d", name, n) 364 } 365 return name 366 } 367 368 type benchContext struct { 369 match *matcher 370 371 maxLen int // The largest recorded benchmark name. 372 extLen int // Maximum extension length. 373 } 374 375 // An internal function but exported because it is cross-package; part of the implementation 376 // of the "go test" command. 377 func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) { 378 runBenchmarks("", matchString, benchmarks) 379 } 380 381 func runBenchmarks(importPath string, matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool { 382 // If no flag was specified, don't run benchmarks. 383 if len(*matchBenchmarks) == 0 { 384 return true 385 } 386 // Collect matching benchmarks and determine longest name. 387 maxprocs := 1 388 for _, procs := range cpuList { 389 if procs > maxprocs { 390 maxprocs = procs 391 } 392 } 393 ctx := &benchContext{ 394 match: newMatcher(matchString, *matchBenchmarks, "-test.bench"), 395 extLen: len(benchmarkName("", maxprocs)), 396 } 397 var bs []InternalBenchmark 398 for _, Benchmark := range benchmarks { 399 if _, matched, _ := ctx.match.fullName(nil, Benchmark.Name); matched { 400 bs = append(bs, Benchmark) 401 benchName := benchmarkName(Benchmark.Name, maxprocs) 402 if l := len(benchName) + ctx.extLen + 1; l > ctx.maxLen { 403 ctx.maxLen = l 404 } 405 } 406 } 407 main := &B{ 408 common: common{ 409 name: "Main", 410 w: os.Stdout, 411 chatty: *chatty, 412 }, 413 importPath: importPath, 414 benchFunc: func(b *B) { 415 for _, Benchmark := range bs { 416 b.Run(Benchmark.Name, Benchmark.F) 417 } 418 }, 419 benchTime: *benchTime, 420 context: ctx, 421 } 422 main.runN(1) 423 return !main.failed 424 } 425 426 // processBench runs bench b for the configured CPU counts and prints the results. 427 func (ctx *benchContext) processBench(b *B) { 428 for i, procs := range cpuList { 429 for j := uint(0); j < *count; j++ { 430 runtime.GOMAXPROCS(procs) 431 benchName := benchmarkName(b.name, procs) 432 fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName) 433 // Recompute the running time for all but the first iteration. 434 if i > 0 || j > 0 { 435 b = &B{ 436 common: common{ 437 signal: make(chan bool), 438 name: b.name, 439 w: b.w, 440 chatty: b.chatty, 441 }, 442 benchFunc: b.benchFunc, 443 benchTime: b.benchTime, 444 } 445 b.run1() 446 } 447 r := b.doBench() 448 if b.failed { 449 // The output could be very long here, but probably isn't. 450 // We print it all, regardless, because we don't want to trim the reason 451 // the benchmark failed. 452 fmt.Fprintf(b.w, "--- FAIL: %s\n%s", benchName, b.output) 453 continue 454 } 455 results := r.String() 456 if *benchmarkMemory || b.showAllocResult { 457 results += "\t" + r.MemString() 458 } 459 fmt.Fprintln(b.w, results) 460 // Unlike with tests, we ignore the -chatty flag and always print output for 461 // benchmarks since the output generation time will skew the results. 462 if len(b.output) > 0 { 463 b.trimOutput() 464 fmt.Fprintf(b.w, "--- BENCH: %s\n%s", benchName, b.output) 465 } 466 if p := runtime.GOMAXPROCS(-1); p != procs { 467 fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p) 468 } 469 } 470 } 471 } 472 473 // Run benchmarks f as a subbenchmark with the given name. It reports 474 // whether there were any failures. 475 // 476 // A subbenchmark is like any other benchmark. A benchmark that calls Run at 477 // least once will not be measured itself and will be called once with N=1. 478 // 479 // Run may be called simultaneously from multiple goroutines, but all such 480 // calls must return before the outer benchmark function for b returns. 481 func (b *B) Run(name string, f func(b *B)) bool { 482 // Since b has subbenchmarks, we will no longer run it as a benchmark itself. 483 // Release the lock and acquire it on exit to ensure locks stay paired. 484 atomic.StoreInt32(&b.hasSub, 1) 485 benchmarkLock.Unlock() 486 defer benchmarkLock.Lock() 487 488 benchName, ok, partial := b.name, true, false 489 if b.context != nil { 490 benchName, ok, partial = b.context.match.fullName(&b.common, name) 491 } 492 if !ok { 493 return true 494 } 495 sub := &B{ 496 common: common{ 497 signal: make(chan bool), 498 name: benchName, 499 parent: &b.common, 500 level: b.level + 1, 501 w: b.w, 502 chatty: b.chatty, 503 }, 504 importPath: b.importPath, 505 benchFunc: f, 506 benchTime: b.benchTime, 507 context: b.context, 508 } 509 if partial { 510 // Partial name match, like -bench=X/Y matching BenchmarkX. 511 // Only process sub-benchmarks, if any. 512 atomic.StoreInt32(&sub.hasSub, 1) 513 } 514 if sub.run1() { 515 sub.run() 516 } 517 b.add(sub.result) 518 return !sub.failed 519 } 520 521 // add simulates running benchmarks in sequence in a single iteration. It is 522 // used to give some meaningful results in case func Benchmark is used in 523 // combination with Run. 524 func (b *B) add(other BenchmarkResult) { 525 r := &b.result 526 // The aggregated BenchmarkResults resemble running all subbenchmarks as 527 // in sequence in a single benchmark. 528 r.N = 1 529 r.T += time.Duration(other.NsPerOp()) 530 if other.Bytes == 0 { 531 // Summing Bytes is meaningless in aggregate if not all subbenchmarks 532 // set it. 533 b.missingBytes = true 534 r.Bytes = 0 535 } 536 if !b.missingBytes { 537 r.Bytes += other.Bytes 538 } 539 r.MemAllocs += uint64(other.AllocsPerOp()) 540 r.MemBytes += uint64(other.AllocedBytesPerOp()) 541 } 542 543 // trimOutput shortens the output from a benchmark, which can be very long. 544 func (b *B) trimOutput() { 545 // The output is likely to appear multiple times because the benchmark 546 // is run multiple times, but at least it will be seen. This is not a big deal 547 // because benchmarks rarely print, but just in case, we trim it if it's too long. 548 const maxNewlines = 10 549 for nlCount, j := 0, 0; j < len(b.output); j++ { 550 if b.output[j] == '\n' { 551 nlCount++ 552 if nlCount >= maxNewlines { 553 b.output = append(b.output[:j], "\n\t... [output truncated]\n"...) 554 break 555 } 556 } 557 } 558 } 559 560 // A PB is used by RunParallel for running parallel benchmarks. 561 type PB struct { 562 globalN *uint64 // shared between all worker goroutines iteration counter 563 grain uint64 // acquire that many iterations from globalN at once 564 cache uint64 // local cache of acquired iterations 565 bN uint64 // total number of iterations to execute (b.N) 566 } 567 568 // Next reports whether there are more iterations to execute. 569 func (pb *PB) Next() bool { 570 if pb.cache == 0 { 571 n := atomic.AddUint64(pb.globalN, pb.grain) 572 if n <= pb.bN { 573 pb.cache = pb.grain 574 } else if n < pb.bN+pb.grain { 575 pb.cache = pb.bN + pb.grain - n 576 } else { 577 return false 578 } 579 } 580 pb.cache-- 581 return true 582 } 583 584 // RunParallel runs a benchmark in parallel. 585 // It creates multiple goroutines and distributes b.N iterations among them. 586 // The number of goroutines defaults to GOMAXPROCS. To increase parallelism for 587 // non-CPU-bound benchmarks, call SetParallelism before RunParallel. 588 // RunParallel is usually used with the go test -cpu flag. 589 // 590 // The body function will be run in each goroutine. It should set up any 591 // goroutine-local state and then iterate until pb.Next returns false. 592 // It should not use the StartTimer, StopTimer, or ResetTimer functions, 593 // because they have global effect. It should also not call Run. 594 func (b *B) RunParallel(body func(*PB)) { 595 if b.N == 0 { 596 return // Nothing to do when probing. 597 } 598 // Calculate grain size as number of iterations that take ~100µs. 599 // 100µs is enough to amortize the overhead and provide sufficient 600 // dynamic load balancing. 601 grain := uint64(0) 602 if b.previousN > 0 && b.previousDuration > 0 { 603 grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration) 604 } 605 if grain < 1 { 606 grain = 1 607 } 608 // We expect the inner loop and function call to take at least 10ns, 609 // so do not do more than 100µs/10ns=1e4 iterations. 610 if grain > 1e4 { 611 grain = 1e4 612 } 613 614 n := uint64(0) 615 numProcs := b.parallelism * runtime.GOMAXPROCS(0) 616 var wg sync.WaitGroup 617 wg.Add(numProcs) 618 for p := 0; p < numProcs; p++ { 619 go func() { 620 defer wg.Done() 621 pb := &PB{ 622 globalN: &n, 623 grain: grain, 624 bN: uint64(b.N), 625 } 626 body(pb) 627 }() 628 } 629 wg.Wait() 630 if n <= uint64(b.N) && !b.Failed() { 631 b.Fatal("RunParallel: body exited without pb.Next() == false") 632 } 633 } 634 635 // SetParallelism sets the number of goroutines used by RunParallel to p*GOMAXPROCS. 636 // There is usually no need to call SetParallelism for CPU-bound benchmarks. 637 // If p is less than 1, this call will have no effect. 638 func (b *B) SetParallelism(p int) { 639 if p >= 1 { 640 b.parallelism = p 641 } 642 } 643 644 // Benchmark benchmarks a single function. Useful for creating 645 // custom benchmarks that do not use the "go test" command. 646 // 647 // If f calls Run, the result will be an estimate of running all its 648 // subbenchmarks that don't call Run in sequence in a single benchmark. 649 func Benchmark(f func(b *B)) BenchmarkResult { 650 b := &B{ 651 common: common{ 652 signal: make(chan bool), 653 w: discard{}, 654 }, 655 benchFunc: f, 656 benchTime: *benchTime, 657 } 658 if b.run1() { 659 b.run() 660 } 661 return b.result 662 } 663 664 type discard struct{} 665 666 func (discard) Write(b []byte) (n int, err error) { return len(b), nil }