github.com/MangoDowner/go-gm@v0.0.0-20180818020936-8baa2bd4408c/src/testing/benchmark.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package testing 6 7 import ( 8 "flag" 9 "fmt" 10 "internal/race" 11 "os" 12 "runtime" 13 "sync" 14 "sync/atomic" 15 "time" 16 ) 17 18 var matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`") 19 var benchTime = flag.Duration("test.benchtime", 1*time.Second, "run each benchmark for duration `d`") 20 var benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks") 21 22 // Global lock to ensure only one benchmark runs at a time. 23 var benchmarkLock sync.Mutex 24 25 // Used for every benchmark for measuring memory. 26 var memStats runtime.MemStats 27 28 // An internal type but exported because it is cross-package; part of the implementation 29 // of the "go test" command. 30 type InternalBenchmark struct { 31 Name string 32 F func(b *B) 33 } 34 35 // B is a type passed to Benchmark functions to manage benchmark 36 // timing and to specify the number of iterations to run. 37 // 38 // A benchmark ends when its Benchmark function returns or calls any of the methods 39 // FailNow, Fatal, Fatalf, SkipNow, Skip, or Skipf. Those methods must be called 40 // only from the goroutine running the Benchmark function. 41 // The other reporting methods, such as the variations of Log and Error, 42 // may be called simultaneously from multiple goroutines. 43 // 44 // Like in tests, benchmark logs are accumulated during execution 45 // and dumped to standard error when done. Unlike in tests, benchmark logs 46 // are always printed, so as not to hide output whose existence may be 47 // affecting benchmark results. 48 type B struct { 49 common 50 importPath string // import path of the package containing the benchmark 51 context *benchContext 52 N int 53 previousN int // number of iterations in the previous run 54 previousDuration time.Duration // total duration of the previous run 55 benchFunc func(b *B) 56 benchTime time.Duration 57 bytes int64 58 missingBytes bool // one of the subbenchmarks does not have bytes set. 59 timerOn bool 60 showAllocResult bool 61 result BenchmarkResult 62 parallelism int // RunParallel creates parallelism*GOMAXPROCS goroutines 63 // The initial states of memStats.Mallocs and memStats.TotalAlloc. 64 startAllocs uint64 65 startBytes uint64 66 // The net total of this test after being run. 67 netAllocs uint64 68 netBytes uint64 69 } 70 71 // StartTimer starts timing a test. This function is called automatically 72 // before a benchmark starts, but it can also used to resume timing after 73 // a call to StopTimer. 74 func (b *B) StartTimer() { 75 if !b.timerOn { 76 runtime.ReadMemStats(&memStats) 77 b.startAllocs = memStats.Mallocs 78 b.startBytes = memStats.TotalAlloc 79 b.start = time.Now() 80 b.timerOn = true 81 } 82 } 83 84 // StopTimer stops timing a test. This can be used to pause the timer 85 // while performing complex initialization that you don't 86 // want to measure. 87 func (b *B) StopTimer() { 88 if b.timerOn { 89 b.duration += time.Now().Sub(b.start) 90 runtime.ReadMemStats(&memStats) 91 b.netAllocs += memStats.Mallocs - b.startAllocs 92 b.netBytes += memStats.TotalAlloc - b.startBytes 93 b.timerOn = false 94 } 95 } 96 97 // ResetTimer zeros the elapsed benchmark time and memory allocation counters. 98 // It does not affect whether the timer is running. 99 func (b *B) ResetTimer() { 100 if b.timerOn { 101 runtime.ReadMemStats(&memStats) 102 b.startAllocs = memStats.Mallocs 103 b.startBytes = memStats.TotalAlloc 104 b.start = time.Now() 105 } 106 b.duration = 0 107 b.netAllocs = 0 108 b.netBytes = 0 109 } 110 111 // SetBytes records the number of bytes processed in a single operation. 112 // If this is called, the benchmark will report ns/op and MB/s. 113 func (b *B) SetBytes(n int64) { b.bytes = n } 114 115 // ReportAllocs enables malloc statistics for this benchmark. 116 // It is equivalent to setting -test.benchmem, but it only affects the 117 // benchmark function that calls ReportAllocs. 118 func (b *B) ReportAllocs() { 119 b.showAllocResult = true 120 } 121 122 func (b *B) nsPerOp() int64 { 123 if b.N <= 0 { 124 return 0 125 } 126 return b.duration.Nanoseconds() / int64(b.N) 127 } 128 129 // runN runs a single benchmark for the specified number of iterations. 130 func (b *B) runN(n int) { 131 benchmarkLock.Lock() 132 defer benchmarkLock.Unlock() 133 // Try to get a comparable environment for each run 134 // by clearing garbage from previous runs. 135 runtime.GC() 136 b.raceErrors = -race.Errors() 137 b.N = n 138 b.parallelism = 1 139 b.ResetTimer() 140 b.StartTimer() 141 b.benchFunc(b) 142 b.StopTimer() 143 b.previousN = n 144 b.previousDuration = b.duration 145 b.raceErrors += race.Errors() 146 if b.raceErrors > 0 { 147 b.Errorf("race detected during execution of benchmark") 148 } 149 } 150 151 func min(x, y int) int { 152 if x > y { 153 return y 154 } 155 return x 156 } 157 158 func max(x, y int) int { 159 if x < y { 160 return y 161 } 162 return x 163 } 164 165 // roundDown10 rounds a number down to the nearest power of 10. 166 func roundDown10(n int) int { 167 var tens = 0 168 // tens = floor(log_10(n)) 169 for n >= 10 { 170 n = n / 10 171 tens++ 172 } 173 // result = 10^tens 174 result := 1 175 for i := 0; i < tens; i++ { 176 result *= 10 177 } 178 return result 179 } 180 181 // roundUp rounds x up to a number of the form [1eX, 2eX, 3eX, 5eX]. 182 func roundUp(n int) int { 183 base := roundDown10(n) 184 switch { 185 case n <= base: 186 return base 187 case n <= (2 * base): 188 return 2 * base 189 case n <= (3 * base): 190 return 3 * base 191 case n <= (5 * base): 192 return 5 * base 193 default: 194 return 10 * base 195 } 196 } 197 198 // run1 runs the first iteration of benchFunc. It returns whether more 199 // iterations of this benchmarks should be run. 200 func (b *B) run1() bool { 201 if ctx := b.context; ctx != nil { 202 // Extend maxLen, if needed. 203 if n := len(b.name) + ctx.extLen + 1; n > ctx.maxLen { 204 ctx.maxLen = n + 8 // Add additional slack to avoid too many jumps in size. 205 } 206 } 207 go func() { 208 // Signal that we're done whether we return normally 209 // or by FailNow's runtime.Goexit. 210 defer func() { 211 b.signal <- true 212 }() 213 214 b.runN(1) 215 }() 216 <-b.signal 217 if b.failed { 218 fmt.Fprintf(b.w, "--- FAIL: %s\n%s", b.name, b.output) 219 return false 220 } 221 // Only print the output if we know we are not going to proceed. 222 // Otherwise it is printed in processBench. 223 if atomic.LoadInt32(&b.hasSub) != 0 || b.finished { 224 tag := "BENCH" 225 if b.skipped { 226 tag = "SKIP" 227 } 228 if b.chatty && (len(b.output) > 0 || b.finished) { 229 b.trimOutput() 230 fmt.Fprintf(b.w, "--- %s: %s\n%s", tag, b.name, b.output) 231 } 232 return false 233 } 234 return true 235 } 236 237 var labelsOnce sync.Once 238 239 // run executes the benchmark in a separate goroutine, including all of its 240 // subbenchmarks. b must not have subbenchmarks. 241 func (b *B) run() BenchmarkResult { 242 labelsOnce.Do(func() { 243 fmt.Fprintf(b.w, "goos: %s\n", runtime.GOOS) 244 fmt.Fprintf(b.w, "goarch: %s\n", runtime.GOARCH) 245 if b.importPath != "" { 246 fmt.Fprintf(b.w, "pkg: %s\n", b.importPath) 247 } 248 }) 249 if b.context != nil { 250 // Running go test --test.bench 251 b.context.processBench(b) // Must call doBench. 252 } else { 253 // Running func Benchmark. 254 b.doBench() 255 } 256 return b.result 257 } 258 259 func (b *B) doBench() BenchmarkResult { 260 go b.launch() 261 <-b.signal 262 return b.result 263 } 264 265 // launch launches the benchmark function. It gradually increases the number 266 // of benchmark iterations until the benchmark runs for the requested benchtime. 267 // launch is run by the doBench function as a separate goroutine. 268 // run1 must have been called on b. 269 func (b *B) launch() { 270 // Signal that we're done whether we return normally 271 // or by FailNow's runtime.Goexit. 272 defer func() { 273 b.signal <- true 274 }() 275 276 // Run the benchmark for at least the specified amount of time. 277 d := b.benchTime 278 for n := 1; !b.failed && b.duration < d && n < 1e9; { 279 last := n 280 // Predict required iterations. 281 n = int(d.Nanoseconds()) 282 if nsop := b.nsPerOp(); nsop != 0 { 283 n /= int(nsop) 284 } 285 // Run more iterations than we think we'll need (1.2x). 286 // Don't grow too fast in case we had timing errors previously. 287 // Be sure to run at least one more than last time. 288 n = max(min(n+n/5, 100*last), last+1) 289 // Round up to something easy to read. 290 n = roundUp(n) 291 b.runN(n) 292 } 293 b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes} 294 } 295 296 // The results of a benchmark run. 297 type BenchmarkResult struct { 298 N int // The number of iterations. 299 T time.Duration // The total time taken. 300 Bytes int64 // Bytes processed in one iteration. 301 MemAllocs uint64 // The total number of memory allocations. 302 MemBytes uint64 // The total number of bytes allocated. 303 } 304 305 func (r BenchmarkResult) NsPerOp() int64 { 306 if r.N <= 0 { 307 return 0 308 } 309 return r.T.Nanoseconds() / int64(r.N) 310 } 311 312 func (r BenchmarkResult) mbPerSec() float64 { 313 if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 { 314 return 0 315 } 316 return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds() 317 } 318 319 // AllocsPerOp returns r.MemAllocs / r.N. 320 func (r BenchmarkResult) AllocsPerOp() int64 { 321 if r.N <= 0 { 322 return 0 323 } 324 return int64(r.MemAllocs) / int64(r.N) 325 } 326 327 // AllocedBytesPerOp returns r.MemBytes / r.N. 328 func (r BenchmarkResult) AllocedBytesPerOp() int64 { 329 if r.N <= 0 { 330 return 0 331 } 332 return int64(r.MemBytes) / int64(r.N) 333 } 334 335 func (r BenchmarkResult) String() string { 336 mbs := r.mbPerSec() 337 mb := "" 338 if mbs != 0 { 339 mb = fmt.Sprintf("\t%7.2f MB/s", mbs) 340 } 341 nsop := r.NsPerOp() 342 ns := fmt.Sprintf("%10d ns/op", nsop) 343 if r.N > 0 && nsop < 100 { 344 // The format specifiers here make sure that 345 // the ones digits line up for all three possible formats. 346 if nsop < 10 { 347 ns = fmt.Sprintf("%13.2f ns/op", float64(r.T.Nanoseconds())/float64(r.N)) 348 } else { 349 ns = fmt.Sprintf("%12.1f ns/op", float64(r.T.Nanoseconds())/float64(r.N)) 350 } 351 } 352 return fmt.Sprintf("%8d\t%s%s", r.N, ns, mb) 353 } 354 355 // MemString returns r.AllocedBytesPerOp and r.AllocsPerOp in the same format as 'go test'. 356 func (r BenchmarkResult) MemString() string { 357 return fmt.Sprintf("%8d B/op\t%8d allocs/op", 358 r.AllocedBytesPerOp(), r.AllocsPerOp()) 359 } 360 361 // benchmarkName returns full name of benchmark including procs suffix. 362 func benchmarkName(name string, n int) string { 363 if n != 1 { 364 return fmt.Sprintf("%s-%d", name, n) 365 } 366 return name 367 } 368 369 type benchContext struct { 370 match *matcher 371 372 maxLen int // The largest recorded benchmark name. 373 extLen int // Maximum extension length. 374 } 375 376 // An internal function but exported because it is cross-package; part of the implementation 377 // of the "go test" command. 378 func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) { 379 runBenchmarks("", matchString, benchmarks) 380 } 381 382 func runBenchmarks(importPath string, matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool { 383 // If no flag was specified, don't run benchmarks. 384 if len(*matchBenchmarks) == 0 { 385 return true 386 } 387 // Collect matching benchmarks and determine longest name. 388 maxprocs := 1 389 for _, procs := range cpuList { 390 if procs > maxprocs { 391 maxprocs = procs 392 } 393 } 394 ctx := &benchContext{ 395 match: newMatcher(matchString, *matchBenchmarks, "-test.bench"), 396 extLen: len(benchmarkName("", maxprocs)), 397 } 398 var bs []InternalBenchmark 399 for _, Benchmark := range benchmarks { 400 if _, matched, _ := ctx.match.fullName(nil, Benchmark.Name); matched { 401 bs = append(bs, Benchmark) 402 benchName := benchmarkName(Benchmark.Name, maxprocs) 403 if l := len(benchName) + ctx.extLen + 1; l > ctx.maxLen { 404 ctx.maxLen = l 405 } 406 } 407 } 408 main := &B{ 409 common: common{ 410 name: "Main", 411 w: os.Stdout, 412 chatty: *chatty, 413 }, 414 importPath: importPath, 415 benchFunc: func(b *B) { 416 for _, Benchmark := range bs { 417 b.Run(Benchmark.Name, Benchmark.F) 418 } 419 }, 420 benchTime: *benchTime, 421 context: ctx, 422 } 423 main.runN(1) 424 return !main.failed 425 } 426 427 // processBench runs bench b for the configured CPU counts and prints the results. 428 func (ctx *benchContext) processBench(b *B) { 429 for i, procs := range cpuList { 430 runtime.GOMAXPROCS(procs) 431 benchName := benchmarkName(b.name, procs) 432 fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName) 433 // Recompute the running time for all but the first iteration. 434 if i > 0 { 435 b = &B{ 436 common: common{ 437 signal: make(chan bool), 438 name: b.name, 439 w: b.w, 440 chatty: b.chatty, 441 }, 442 benchFunc: b.benchFunc, 443 benchTime: b.benchTime, 444 } 445 b.run1() 446 } 447 r := b.doBench() 448 if b.failed { 449 // The output could be very long here, but probably isn't. 450 // We print it all, regardless, because we don't want to trim the reason 451 // the benchmark failed. 452 fmt.Fprintf(b.w, "--- FAIL: %s\n%s", benchName, b.output) 453 continue 454 } 455 results := r.String() 456 if *benchmarkMemory || b.showAllocResult { 457 results += "\t" + r.MemString() 458 } 459 fmt.Fprintln(b.w, results) 460 // Unlike with tests, we ignore the -chatty flag and always print output for 461 // benchmarks since the output generation time will skew the results. 462 if len(b.output) > 0 { 463 b.trimOutput() 464 fmt.Fprintf(b.w, "--- BENCH: %s\n%s", benchName, b.output) 465 } 466 if p := runtime.GOMAXPROCS(-1); p != procs { 467 fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p) 468 } 469 } 470 } 471 472 // Run benchmarks f as a subbenchmark with the given name. It reports 473 // whether there were any failures. 474 // 475 // A subbenchmark is like any other benchmark. A benchmark that calls Run at 476 // least once will not be measured itself and will be called once with N=1. 477 // 478 // Run may be called simultaneously from multiple goroutines, but all such 479 // calls must return before the outer benchmark function for b returns. 480 func (b *B) Run(name string, f func(b *B)) bool { 481 // Since b has subbenchmarks, we will no longer run it as a benchmark itself. 482 // Release the lock and acquire it on exit to ensure locks stay paired. 483 atomic.StoreInt32(&b.hasSub, 1) 484 benchmarkLock.Unlock() 485 defer benchmarkLock.Lock() 486 487 benchName, ok, partial := b.name, true, false 488 if b.context != nil { 489 benchName, ok, partial = b.context.match.fullName(&b.common, name) 490 } 491 if !ok { 492 return true 493 } 494 sub := &B{ 495 common: common{ 496 signal: make(chan bool), 497 name: benchName, 498 parent: &b.common, 499 level: b.level + 1, 500 w: b.w, 501 chatty: b.chatty, 502 }, 503 importPath: b.importPath, 504 benchFunc: f, 505 benchTime: b.benchTime, 506 context: b.context, 507 } 508 if partial { 509 // Partial name match, like -bench=X/Y matching BenchmarkX. 510 // Only process sub-benchmarks, if any. 511 atomic.StoreInt32(&sub.hasSub, 1) 512 } 513 if sub.run1() { 514 sub.run() 515 } 516 b.add(sub.result) 517 return !sub.failed 518 } 519 520 // add simulates running benchmarks in sequence in a single iteration. It is 521 // used to give some meaningful results in case func Benchmark is used in 522 // combination with Run. 523 func (b *B) add(other BenchmarkResult) { 524 r := &b.result 525 // The aggregated BenchmarkResults resemble running all subbenchmarks as 526 // in sequence in a single benchmark. 527 r.N = 1 528 r.T += time.Duration(other.NsPerOp()) 529 if other.Bytes == 0 { 530 // Summing Bytes is meaningless in aggregate if not all subbenchmarks 531 // set it. 532 b.missingBytes = true 533 r.Bytes = 0 534 } 535 if !b.missingBytes { 536 r.Bytes += other.Bytes 537 } 538 r.MemAllocs += uint64(other.AllocsPerOp()) 539 r.MemBytes += uint64(other.AllocedBytesPerOp()) 540 } 541 542 // trimOutput shortens the output from a benchmark, which can be very long. 543 func (b *B) trimOutput() { 544 // The output is likely to appear multiple times because the benchmark 545 // is run multiple times, but at least it will be seen. This is not a big deal 546 // because benchmarks rarely print, but just in case, we trim it if it's too long. 547 const maxNewlines = 10 548 for nlCount, j := 0, 0; j < len(b.output); j++ { 549 if b.output[j] == '\n' { 550 nlCount++ 551 if nlCount >= maxNewlines { 552 b.output = append(b.output[:j], "\n\t... [output truncated]\n"...) 553 break 554 } 555 } 556 } 557 } 558 559 // A PB is used by RunParallel for running parallel benchmarks. 560 type PB struct { 561 globalN *uint64 // shared between all worker goroutines iteration counter 562 grain uint64 // acquire that many iterations from globalN at once 563 cache uint64 // local cache of acquired iterations 564 bN uint64 // total number of iterations to execute (b.N) 565 } 566 567 // Next reports whether there are more iterations to execute. 568 func (pb *PB) Next() bool { 569 if pb.cache == 0 { 570 n := atomic.AddUint64(pb.globalN, pb.grain) 571 if n <= pb.bN { 572 pb.cache = pb.grain 573 } else if n < pb.bN+pb.grain { 574 pb.cache = pb.bN + pb.grain - n 575 } else { 576 return false 577 } 578 } 579 pb.cache-- 580 return true 581 } 582 583 // RunParallel runs a benchmark in parallel. 584 // It creates multiple goroutines and distributes b.N iterations among them. 585 // The number of goroutines defaults to GOMAXPROCS. To increase parallelism for 586 // non-CPU-bound benchmarks, call SetParallelism before RunParallel. 587 // RunParallel is usually used with the go test -cpu flag. 588 // 589 // The body function will be run in each goroutine. It should set up any 590 // goroutine-local state and then iterate until pb.Next returns false. 591 // It should not use the StartTimer, StopTimer, or ResetTimer functions, 592 // because they have global effect. It should also not call Run. 593 func (b *B) RunParallel(body func(*PB)) { 594 if b.N == 0 { 595 return // Nothing to do when probing. 596 } 597 // Calculate grain size as number of iterations that take ~100µs. 598 // 100µs is enough to amortize the overhead and provide sufficient 599 // dynamic load balancing. 600 grain := uint64(0) 601 if b.previousN > 0 && b.previousDuration > 0 { 602 grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration) 603 } 604 if grain < 1 { 605 grain = 1 606 } 607 // We expect the inner loop and function call to take at least 10ns, 608 // so do not do more than 100µs/10ns=1e4 iterations. 609 if grain > 1e4 { 610 grain = 1e4 611 } 612 613 n := uint64(0) 614 numProcs := b.parallelism * runtime.GOMAXPROCS(0) 615 var wg sync.WaitGroup 616 wg.Add(numProcs) 617 for p := 0; p < numProcs; p++ { 618 go func() { 619 defer wg.Done() 620 pb := &PB{ 621 globalN: &n, 622 grain: grain, 623 bN: uint64(b.N), 624 } 625 body(pb) 626 }() 627 } 628 wg.Wait() 629 if n <= uint64(b.N) && !b.Failed() { 630 b.Fatal("RunParallel: body exited without pb.Next() == false") 631 } 632 } 633 634 // SetParallelism sets the number of goroutines used by RunParallel to p*GOMAXPROCS. 635 // There is usually no need to call SetParallelism for CPU-bound benchmarks. 636 // If p is less than 1, this call will have no effect. 637 func (b *B) SetParallelism(p int) { 638 if p >= 1 { 639 b.parallelism = p 640 } 641 } 642 643 // Benchmark benchmarks a single function. Useful for creating 644 // custom benchmarks that do not use the "go test" command. 645 // 646 // If f calls Run, the result will be an estimate of running all its 647 // subbenchmarks that don't call Run in sequence in a single benchmark. 648 func Benchmark(f func(b *B)) BenchmarkResult { 649 b := &B{ 650 common: common{ 651 signal: make(chan bool), 652 w: discard{}, 653 }, 654 benchFunc: f, 655 benchTime: *benchTime, 656 } 657 if b.run1() { 658 b.run() 659 } 660 return b.result 661 } 662 663 type discard struct{} 664 665 func (discard) Write(b []byte) (n int, err error) { return len(b), nil }