github.com/sanprasirt/go@v0.0.0-20170607001320-a027466e4b6d/src/testing/benchmark.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package testing 6 7 import ( 8 "flag" 9 "fmt" 10 "internal/race" 11 "os" 12 "runtime" 13 "sync" 14 "sync/atomic" 15 "time" 16 ) 17 18 var matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`") 19 var benchTime = flag.Duration("test.benchtime", 1*time.Second, "run each benchmark for duration `d`") 20 var benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks") 21 22 // Global lock to ensure only one benchmark runs at a time. 23 var benchmarkLock sync.Mutex 24 25 // Used for every benchmark for measuring memory. 26 var memStats runtime.MemStats 27 28 // An internal type but exported because it is cross-package; part of the implementation 29 // of the "go test" command. 30 type InternalBenchmark struct { 31 Name string 32 F func(b *B) 33 } 34 35 // B is a type passed to Benchmark functions to manage benchmark 36 // timing and to specify the number of iterations to run. 37 // 38 // A benchmark ends when its Benchmark function returns or calls any of the methods 39 // FailNow, Fatal, Fatalf, SkipNow, Skip, or Skipf. Those methods must be called 40 // only from the goroutine running the Benchmark function. 41 // The other reporting methods, such as the variations of Log and Error, 42 // may be called simultaneously from multiple goroutines. 43 // 44 // Like in tests, benchmark logs are accumulated during execution 45 // and dumped to standard error when done. Unlike in tests, benchmark logs 46 // are always printed, so as not to hide output whose existence may be 47 // affecting benchmark results. 48 type B struct { 49 common 50 importPath string // import path of the package containing the benchmark 51 context *benchContext 52 N int 53 previousN int // number of iterations in the previous run 54 previousDuration time.Duration // total duration of the previous run 55 benchFunc func(b *B) 56 benchTime time.Duration 57 bytes int64 58 missingBytes bool // one of the subbenchmarks does not have bytes set. 59 timerOn bool 60 showAllocResult bool 61 result BenchmarkResult 62 parallelism int // RunParallel creates parallelism*GOMAXPROCS goroutines 63 // The initial states of memStats.Mallocs and memStats.TotalAlloc. 64 startAllocs uint64 65 startBytes uint64 66 // The net total of this test after being run. 67 netAllocs uint64 68 netBytes uint64 69 } 70 71 // StartTimer starts timing a test. This function is called automatically 72 // before a benchmark starts, but it can also used to resume timing after 73 // a call to StopTimer. 74 func (b *B) StartTimer() { 75 if !b.timerOn { 76 if *benchmarkMemory || b.showAllocResult { 77 runtime.ReadMemStats(&memStats) 78 b.startAllocs = memStats.Mallocs 79 b.startBytes = memStats.TotalAlloc 80 } 81 b.start = time.Now() 82 b.timerOn = true 83 } 84 } 85 86 // StopTimer stops timing a test. This can be used to pause the timer 87 // while performing complex initialization that you don't 88 // want to measure. 89 func (b *B) StopTimer() { 90 if b.timerOn { 91 b.duration += time.Now().Sub(b.start) 92 if *benchmarkMemory || b.showAllocResult { 93 runtime.ReadMemStats(&memStats) 94 b.netAllocs += memStats.Mallocs - b.startAllocs 95 b.netBytes += memStats.TotalAlloc - b.startBytes 96 } 97 b.timerOn = false 98 } 99 } 100 101 // ResetTimer zeros the elapsed benchmark time and memory allocation counters. 102 // It does not affect whether the timer is running. 103 func (b *B) ResetTimer() { 104 if b.timerOn { 105 if *benchmarkMemory || b.showAllocResult { 106 runtime.ReadMemStats(&memStats) 107 b.startAllocs = memStats.Mallocs 108 b.startBytes = memStats.TotalAlloc 109 } 110 b.start = time.Now() 111 } 112 b.duration = 0 113 b.netAllocs = 0 114 b.netBytes = 0 115 } 116 117 // SetBytes records the number of bytes processed in a single operation. 118 // If this is called, the benchmark will report ns/op and MB/s. 119 func (b *B) SetBytes(n int64) { b.bytes = n } 120 121 // ReportAllocs enables malloc statistics for this benchmark. 122 // It is equivalent to setting -test.benchmem, but it only affects the 123 // benchmark function that calls ReportAllocs. 124 func (b *B) ReportAllocs() { 125 b.showAllocResult = true 126 } 127 128 func (b *B) nsPerOp() int64 { 129 if b.N <= 0 { 130 return 0 131 } 132 return b.duration.Nanoseconds() / int64(b.N) 133 } 134 135 // runN runs a single benchmark for the specified number of iterations. 136 func (b *B) runN(n int) { 137 benchmarkLock.Lock() 138 defer benchmarkLock.Unlock() 139 // Try to get a comparable environment for each run 140 // by clearing garbage from previous runs. 141 runtime.GC() 142 b.raceErrors = -race.Errors() 143 b.N = n 144 b.parallelism = 1 145 b.ResetTimer() 146 b.StartTimer() 147 b.benchFunc(b) 148 b.StopTimer() 149 b.previousN = n 150 b.previousDuration = b.duration 151 b.raceErrors += race.Errors() 152 if b.raceErrors > 0 { 153 b.Errorf("race detected during execution of benchmark") 154 } 155 } 156 157 func min(x, y int) int { 158 if x > y { 159 return y 160 } 161 return x 162 } 163 164 func max(x, y int) int { 165 if x < y { 166 return y 167 } 168 return x 169 } 170 171 // roundDown10 rounds a number down to the nearest power of 10. 172 func roundDown10(n int) int { 173 var tens = 0 174 // tens = floor(log_10(n)) 175 for n >= 10 { 176 n = n / 10 177 tens++ 178 } 179 // result = 10^tens 180 result := 1 181 for i := 0; i < tens; i++ { 182 result *= 10 183 } 184 return result 185 } 186 187 // roundUp rounds x up to a number of the form [1eX, 2eX, 3eX, 5eX]. 188 func roundUp(n int) int { 189 base := roundDown10(n) 190 switch { 191 case n <= base: 192 return base 193 case n <= (2 * base): 194 return 2 * base 195 case n <= (3 * base): 196 return 3 * base 197 case n <= (5 * base): 198 return 5 * base 199 default: 200 return 10 * base 201 } 202 } 203 204 // run1 runs the first iteration of benchFunc. It returns whether more 205 // iterations of this benchmarks should be run. 206 func (b *B) run1() bool { 207 if ctx := b.context; ctx != nil { 208 // Extend maxLen, if needed. 209 if n := len(b.name) + ctx.extLen + 1; n > ctx.maxLen { 210 ctx.maxLen = n + 8 // Add additional slack to avoid too many jumps in size. 211 } 212 } 213 go func() { 214 // Signal that we're done whether we return normally 215 // or by FailNow's runtime.Goexit. 216 defer func() { 217 b.signal <- true 218 }() 219 220 b.runN(1) 221 }() 222 <-b.signal 223 if b.failed { 224 fmt.Fprintf(b.w, "--- FAIL: %s\n%s", b.name, b.output) 225 return false 226 } 227 // Only print the output if we know we are not going to proceed. 228 // Otherwise it is printed in processBench. 229 if atomic.LoadInt32(&b.hasSub) != 0 || b.finished { 230 tag := "BENCH" 231 if b.skipped { 232 tag = "SKIP" 233 } 234 if b.chatty && (len(b.output) > 0 || b.finished) { 235 b.trimOutput() 236 fmt.Fprintf(b.w, "--- %s: %s\n%s", tag, b.name, b.output) 237 } 238 return false 239 } 240 return true 241 } 242 243 var labelsOnce sync.Once 244 245 // run executes the benchmark in a separate goroutine, including all of its 246 // subbenchmarks. b must not have subbenchmarks. 247 func (b *B) run() BenchmarkResult { 248 labelsOnce.Do(func() { 249 fmt.Fprintf(b.w, "goos: %s\n", runtime.GOOS) 250 fmt.Fprintf(b.w, "goarch: %s\n", runtime.GOARCH) 251 if b.importPath != "" { 252 fmt.Fprintf(b.w, "pkg: %s\n", b.importPath) 253 } 254 }) 255 if b.context != nil { 256 // Running go test --test.bench 257 b.context.processBench(b) // Must call doBench. 258 } else { 259 // Running func Benchmark. 260 b.doBench() 261 } 262 return b.result 263 } 264 265 func (b *B) doBench() BenchmarkResult { 266 go b.launch() 267 <-b.signal 268 return b.result 269 } 270 271 // launch launches the benchmark function. It gradually increases the number 272 // of benchmark iterations until the benchmark runs for the requested benchtime. 273 // launch is run by the doBench function as a separate goroutine. 274 // run1 must have been called on b. 275 func (b *B) launch() { 276 // Signal that we're done whether we return normally 277 // or by FailNow's runtime.Goexit. 278 defer func() { 279 b.signal <- true 280 }() 281 282 // Run the benchmark for at least the specified amount of time. 283 d := b.benchTime 284 for n := 1; !b.failed && b.duration < d && n < 1e9; { 285 last := n 286 // Predict required iterations. 287 n = int(d.Nanoseconds()) 288 if nsop := b.nsPerOp(); nsop != 0 { 289 n /= int(nsop) 290 } 291 // Run more iterations than we think we'll need (1.2x). 292 // Don't grow too fast in case we had timing errors previously. 293 // Be sure to run at least one more than last time. 294 n = max(min(n+n/5, 100*last), last+1) 295 // Round up to something easy to read. 296 n = roundUp(n) 297 b.runN(n) 298 } 299 b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes} 300 } 301 302 // The results of a benchmark run. 303 // MemAllocs and MemBytes may be zero if memory benchmarking is not requested 304 // using B.ReportAllocs or the -benchmem command line flag. 305 type BenchmarkResult struct { 306 N int // The number of iterations. 307 T time.Duration // The total time taken. 308 Bytes int64 // Bytes processed in one iteration. 309 MemAllocs uint64 // The total number of memory allocations. 310 MemBytes uint64 // The total number of bytes allocated. 311 } 312 313 func (r BenchmarkResult) NsPerOp() int64 { 314 if r.N <= 0 { 315 return 0 316 } 317 return r.T.Nanoseconds() / int64(r.N) 318 } 319 320 func (r BenchmarkResult) mbPerSec() float64 { 321 if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 { 322 return 0 323 } 324 return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds() 325 } 326 327 // AllocsPerOp returns r.MemAllocs / r.N. 328 func (r BenchmarkResult) AllocsPerOp() int64 { 329 if r.N <= 0 { 330 return 0 331 } 332 return int64(r.MemAllocs) / int64(r.N) 333 } 334 335 // AllocedBytesPerOp returns r.MemBytes / r.N. 336 func (r BenchmarkResult) AllocedBytesPerOp() int64 { 337 if r.N <= 0 { 338 return 0 339 } 340 return int64(r.MemBytes) / int64(r.N) 341 } 342 343 func (r BenchmarkResult) String() string { 344 mbs := r.mbPerSec() 345 mb := "" 346 if mbs != 0 { 347 mb = fmt.Sprintf("\t%7.2f MB/s", mbs) 348 } 349 nsop := r.NsPerOp() 350 ns := fmt.Sprintf("%10d ns/op", nsop) 351 if r.N > 0 && nsop < 100 { 352 // The format specifiers here make sure that 353 // the ones digits line up for all three possible formats. 354 if nsop < 10 { 355 ns = fmt.Sprintf("%13.2f ns/op", float64(r.T.Nanoseconds())/float64(r.N)) 356 } else { 357 ns = fmt.Sprintf("%12.1f ns/op", float64(r.T.Nanoseconds())/float64(r.N)) 358 } 359 } 360 return fmt.Sprintf("%8d\t%s%s", r.N, ns, mb) 361 } 362 363 // MemString returns r.AllocedBytesPerOp and r.AllocsPerOp in the same format as 'go test'. 364 func (r BenchmarkResult) MemString() string { 365 return fmt.Sprintf("%8d B/op\t%8d allocs/op", 366 r.AllocedBytesPerOp(), r.AllocsPerOp()) 367 } 368 369 // benchmarkName returns full name of benchmark including procs suffix. 370 func benchmarkName(name string, n int) string { 371 if n != 1 { 372 return fmt.Sprintf("%s-%d", name, n) 373 } 374 return name 375 } 376 377 type benchContext struct { 378 match *matcher 379 380 maxLen int // The largest recorded benchmark name. 381 extLen int // Maximum extension length. 382 } 383 384 // An internal function but exported because it is cross-package; part of the implementation 385 // of the "go test" command. 386 func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) { 387 runBenchmarks("", matchString, benchmarks) 388 } 389 390 func runBenchmarks(importPath string, matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool { 391 // If no flag was specified, don't run benchmarks. 392 if len(*matchBenchmarks) == 0 { 393 return true 394 } 395 // Collect matching benchmarks and determine longest name. 396 maxprocs := 1 397 for _, procs := range cpuList { 398 if procs > maxprocs { 399 maxprocs = procs 400 } 401 } 402 ctx := &benchContext{ 403 match: newMatcher(matchString, *matchBenchmarks, "-test.bench"), 404 extLen: len(benchmarkName("", maxprocs)), 405 } 406 var bs []InternalBenchmark 407 for _, Benchmark := range benchmarks { 408 if _, matched := ctx.match.fullName(nil, Benchmark.Name); matched { 409 bs = append(bs, Benchmark) 410 benchName := benchmarkName(Benchmark.Name, maxprocs) 411 if l := len(benchName) + ctx.extLen + 1; l > ctx.maxLen { 412 ctx.maxLen = l 413 } 414 } 415 } 416 main := &B{ 417 common: common{ 418 name: "Main", 419 w: os.Stdout, 420 chatty: *chatty, 421 }, 422 importPath: importPath, 423 benchFunc: func(b *B) { 424 for _, Benchmark := range bs { 425 b.Run(Benchmark.Name, Benchmark.F) 426 } 427 }, 428 benchTime: *benchTime, 429 context: ctx, 430 } 431 main.runN(1) 432 return !main.failed 433 } 434 435 // processBench runs bench b for the configured CPU counts and prints the results. 436 func (ctx *benchContext) processBench(b *B) { 437 for i, procs := range cpuList { 438 runtime.GOMAXPROCS(procs) 439 benchName := benchmarkName(b.name, procs) 440 fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName) 441 // Recompute the running time for all but the first iteration. 442 if i > 0 { 443 b = &B{ 444 common: common{ 445 signal: make(chan bool), 446 name: b.name, 447 w: b.w, 448 chatty: b.chatty, 449 }, 450 benchFunc: b.benchFunc, 451 benchTime: b.benchTime, 452 } 453 b.run1() 454 } 455 r := b.doBench() 456 if b.failed { 457 // The output could be very long here, but probably isn't. 458 // We print it all, regardless, because we don't want to trim the reason 459 // the benchmark failed. 460 fmt.Fprintf(b.w, "--- FAIL: %s\n%s", benchName, b.output) 461 continue 462 } 463 results := r.String() 464 if *benchmarkMemory || b.showAllocResult { 465 results += "\t" + r.MemString() 466 } 467 fmt.Fprintln(b.w, results) 468 // Unlike with tests, we ignore the -chatty flag and always print output for 469 // benchmarks since the output generation time will skew the results. 470 if len(b.output) > 0 { 471 b.trimOutput() 472 fmt.Fprintf(b.w, "--- BENCH: %s\n%s", benchName, b.output) 473 } 474 if p := runtime.GOMAXPROCS(-1); p != procs { 475 fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p) 476 } 477 } 478 } 479 480 // Run benchmarks f as a subbenchmark with the given name. It reports 481 // whether there were any failures. 482 // 483 // A subbenchmark is like any other benchmark. A benchmark that calls Run at 484 // least once will not be measured itself and will be called once with N=1. 485 // 486 // Run may be called simultaneously from multiple goroutines, but all such 487 // calls must happen before the outer benchmark function for b returns. 488 func (b *B) Run(name string, f func(b *B)) bool { 489 // Since b has subbenchmarks, we will no longer run it as a benchmark itself. 490 // Release the lock and acquire it on exit to ensure locks stay paired. 491 atomic.StoreInt32(&b.hasSub, 1) 492 benchmarkLock.Unlock() 493 defer benchmarkLock.Lock() 494 495 benchName, ok := b.name, true 496 if b.context != nil { 497 benchName, ok = b.context.match.fullName(&b.common, name) 498 } 499 if !ok { 500 return true 501 } 502 sub := &B{ 503 common: common{ 504 signal: make(chan bool), 505 name: benchName, 506 parent: &b.common, 507 level: b.level + 1, 508 w: b.w, 509 chatty: b.chatty, 510 }, 511 importPath: b.importPath, 512 benchFunc: f, 513 benchTime: b.benchTime, 514 context: b.context, 515 } 516 if sub.run1() { 517 sub.run() 518 } 519 b.add(sub.result) 520 return !sub.failed 521 } 522 523 // add simulates running benchmarks in sequence in a single iteration. It is 524 // used to give some meaningful results in case func Benchmark is used in 525 // combination with Run. 526 func (b *B) add(other BenchmarkResult) { 527 r := &b.result 528 // The aggregated BenchmarkResults resemble running all subbenchmarks as 529 // in sequence in a single benchmark. 530 r.N = 1 531 r.T += time.Duration(other.NsPerOp()) 532 if other.Bytes == 0 { 533 // Summing Bytes is meaningless in aggregate if not all subbenchmarks 534 // set it. 535 b.missingBytes = true 536 r.Bytes = 0 537 } 538 if !b.missingBytes { 539 r.Bytes += other.Bytes 540 } 541 r.MemAllocs += uint64(other.AllocsPerOp()) 542 r.MemBytes += uint64(other.AllocedBytesPerOp()) 543 } 544 545 // trimOutput shortens the output from a benchmark, which can be very long. 546 func (b *B) trimOutput() { 547 // The output is likely to appear multiple times because the benchmark 548 // is run multiple times, but at least it will be seen. This is not a big deal 549 // because benchmarks rarely print, but just in case, we trim it if it's too long. 550 const maxNewlines = 10 551 for nlCount, j := 0, 0; j < len(b.output); j++ { 552 if b.output[j] == '\n' { 553 nlCount++ 554 if nlCount >= maxNewlines { 555 b.output = append(b.output[:j], "\n\t... [output truncated]\n"...) 556 break 557 } 558 } 559 } 560 } 561 562 // A PB is used by RunParallel for running parallel benchmarks. 563 type PB struct { 564 globalN *uint64 // shared between all worker goroutines iteration counter 565 grain uint64 // acquire that many iterations from globalN at once 566 cache uint64 // local cache of acquired iterations 567 bN uint64 // total number of iterations to execute (b.N) 568 } 569 570 // Next reports whether there are more iterations to execute. 571 func (pb *PB) Next() bool { 572 if pb.cache == 0 { 573 n := atomic.AddUint64(pb.globalN, pb.grain) 574 if n <= pb.bN { 575 pb.cache = pb.grain 576 } else if n < pb.bN+pb.grain { 577 pb.cache = pb.bN + pb.grain - n 578 } else { 579 return false 580 } 581 } 582 pb.cache-- 583 return true 584 } 585 586 // RunParallel runs a benchmark in parallel. 587 // It creates multiple goroutines and distributes b.N iterations among them. 588 // The number of goroutines defaults to GOMAXPROCS. To increase parallelism for 589 // non-CPU-bound benchmarks, call SetParallelism before RunParallel. 590 // RunParallel is usually used with the go test -cpu flag. 591 // 592 // The body function will be run in each goroutine. It should set up any 593 // goroutine-local state and then iterate until pb.Next returns false. 594 // It should not use the StartTimer, StopTimer, or ResetTimer functions, 595 // because they have global effect. It should also not call Run. 596 func (b *B) RunParallel(body func(*PB)) { 597 if b.N == 0 { 598 return // Nothing to do when probing. 599 } 600 // Calculate grain size as number of iterations that take ~100µs. 601 // 100µs is enough to amortize the overhead and provide sufficient 602 // dynamic load balancing. 603 grain := uint64(0) 604 if b.previousN > 0 && b.previousDuration > 0 { 605 grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration) 606 } 607 if grain < 1 { 608 grain = 1 609 } 610 // We expect the inner loop and function call to take at least 10ns, 611 // so do not do more than 100µs/10ns=1e4 iterations. 612 if grain > 1e4 { 613 grain = 1e4 614 } 615 616 n := uint64(0) 617 numProcs := b.parallelism * runtime.GOMAXPROCS(0) 618 var wg sync.WaitGroup 619 wg.Add(numProcs) 620 for p := 0; p < numProcs; p++ { 621 go func() { 622 defer wg.Done() 623 pb := &PB{ 624 globalN: &n, 625 grain: grain, 626 bN: uint64(b.N), 627 } 628 body(pb) 629 }() 630 } 631 wg.Wait() 632 if n <= uint64(b.N) && !b.Failed() { 633 b.Fatal("RunParallel: body exited without pb.Next() == false") 634 } 635 } 636 637 // SetParallelism sets the number of goroutines used by RunParallel to p*GOMAXPROCS. 638 // There is usually no need to call SetParallelism for CPU-bound benchmarks. 639 // If p is less than 1, this call will have no effect. 640 func (b *B) SetParallelism(p int) { 641 if p >= 1 { 642 b.parallelism = p 643 } 644 } 645 646 // Benchmark benchmarks a single function. Useful for creating 647 // custom benchmarks that do not use the "go test" command. 648 // 649 // If f calls Run, the result will be an estimate of running all its 650 // subbenchmarks that don't call Run in sequence in a single benchmark. 651 func Benchmark(f func(b *B)) BenchmarkResult { 652 b := &B{ 653 common: common{ 654 signal: make(chan bool), 655 w: discard{}, 656 }, 657 benchFunc: f, 658 benchTime: *benchTime, 659 } 660 if b.run1() { 661 b.run() 662 } 663 return b.result 664 } 665 666 type discard struct{} 667 668 func (discard) Write(b []byte) (n int, err error) { return len(b), nil }