github.com/dannin/go@v0.0.0-20161031215817-d35dfd405eaa/src/testing/benchmark.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package testing 6 7 import ( 8 "flag" 9 "fmt" 10 "os" 11 "runtime" 12 "sync" 13 "sync/atomic" 14 "time" 15 ) 16 17 var matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`") 18 var benchTime = flag.Duration("test.benchtime", 1*time.Second, "run each benchmark for duration `d`") 19 var benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks") 20 21 // Global lock to ensure only one benchmark runs at a time. 22 var benchmarkLock sync.Mutex 23 24 // Used for every benchmark for measuring memory. 25 var memStats runtime.MemStats 26 27 // An internal type but exported because it is cross-package; part of the implementation 28 // of the "go test" command. 29 type InternalBenchmark struct { 30 Name string 31 F func(b *B) 32 } 33 34 // B is a type passed to Benchmark functions to manage benchmark 35 // timing and to specify the number of iterations to run. 36 // 37 // A benchmark ends when its Benchmark function returns or calls any of the methods 38 // FailNow, Fatal, Fatalf, SkipNow, Skip, or Skipf. Those methods must be called 39 // only from the goroutine running the Benchmark function. 40 // The other reporting methods, such as the variations of Log and Error, 41 // may be called simultaneously from multiple goroutines. 42 // 43 // Like in tests, benchmark logs are accumulated during execution 44 // and dumped to standard error when done. Unlike in tests, benchmark logs 45 // are always printed, so as not to hide output whose existence may be 46 // affecting benchmark results. 47 type B struct { 48 common 49 context *benchContext 50 N int 51 previousN int // number of iterations in the previous run 52 previousDuration time.Duration // total duration of the previous run 53 benchFunc func(b *B) 54 benchTime time.Duration 55 bytes int64 56 missingBytes bool // one of the subbenchmarks does not have bytes set. 57 timerOn bool 58 showAllocResult bool 59 result BenchmarkResult 60 parallelism int // RunParallel creates parallelism*GOMAXPROCS goroutines 61 // The initial states of memStats.Mallocs and memStats.TotalAlloc. 62 startAllocs uint64 63 startBytes uint64 64 // The net total of this test after being run. 65 netAllocs uint64 66 netBytes uint64 67 } 68 69 // StartTimer starts timing a test. This function is called automatically 70 // before a benchmark starts, but it can also used to resume timing after 71 // a call to StopTimer. 72 func (b *B) StartTimer() { 73 if !b.timerOn { 74 runtime.ReadMemStats(&memStats) 75 b.startAllocs = memStats.Mallocs 76 b.startBytes = memStats.TotalAlloc 77 b.start = time.Now() 78 b.timerOn = true 79 } 80 } 81 82 // StopTimer stops timing a test. This can be used to pause the timer 83 // while performing complex initialization that you don't 84 // want to measure. 85 func (b *B) StopTimer() { 86 if b.timerOn { 87 b.duration += time.Now().Sub(b.start) 88 runtime.ReadMemStats(&memStats) 89 b.netAllocs += memStats.Mallocs - b.startAllocs 90 b.netBytes += memStats.TotalAlloc - b.startBytes 91 b.timerOn = false 92 } 93 } 94 95 // ResetTimer zeros the elapsed benchmark time and memory allocation counters. 96 // It does not affect whether the timer is running. 97 func (b *B) ResetTimer() { 98 if b.timerOn { 99 runtime.ReadMemStats(&memStats) 100 b.startAllocs = memStats.Mallocs 101 b.startBytes = memStats.TotalAlloc 102 b.start = time.Now() 103 } 104 b.duration = 0 105 b.netAllocs = 0 106 b.netBytes = 0 107 } 108 109 // SetBytes records the number of bytes processed in a single operation. 110 // If this is called, the benchmark will report ns/op and MB/s. 111 func (b *B) SetBytes(n int64) { b.bytes = n } 112 113 // ReportAllocs enables malloc statistics for this benchmark. 114 // It is equivalent to setting -test.benchmem, but it only affects the 115 // benchmark function that calls ReportAllocs. 116 func (b *B) ReportAllocs() { 117 b.showAllocResult = true 118 } 119 120 func (b *B) nsPerOp() int64 { 121 if b.N <= 0 { 122 return 0 123 } 124 return b.duration.Nanoseconds() / int64(b.N) 125 } 126 127 // runN runs a single benchmark for the specified number of iterations. 128 func (b *B) runN(n int) { 129 benchmarkLock.Lock() 130 defer benchmarkLock.Unlock() 131 // Try to get a comparable environment for each run 132 // by clearing garbage from previous runs. 133 runtime.GC() 134 b.N = n 135 b.parallelism = 1 136 b.ResetTimer() 137 b.StartTimer() 138 b.benchFunc(b) 139 b.StopTimer() 140 b.previousN = n 141 b.previousDuration = b.duration 142 } 143 144 func min(x, y int) int { 145 if x > y { 146 return y 147 } 148 return x 149 } 150 151 func max(x, y int) int { 152 if x < y { 153 return y 154 } 155 return x 156 } 157 158 // roundDown10 rounds a number down to the nearest power of 10. 159 func roundDown10(n int) int { 160 var tens = 0 161 // tens = floor(log_10(n)) 162 for n >= 10 { 163 n = n / 10 164 tens++ 165 } 166 // result = 10^tens 167 result := 1 168 for i := 0; i < tens; i++ { 169 result *= 10 170 } 171 return result 172 } 173 174 // roundUp rounds x up to a number of the form [1eX, 2eX, 3eX, 5eX]. 175 func roundUp(n int) int { 176 base := roundDown10(n) 177 switch { 178 case n <= base: 179 return base 180 case n <= (2 * base): 181 return 2 * base 182 case n <= (3 * base): 183 return 3 * base 184 case n <= (5 * base): 185 return 5 * base 186 default: 187 return 10 * base 188 } 189 } 190 191 // run1 runs the first iteration of benchFunc. It returns whether more 192 // iterations of this benchmarks should be run. 193 func (b *B) run1() bool { 194 if ctx := b.context; ctx != nil { 195 // Extend maxLen, if needed. 196 if n := len(b.name) + ctx.extLen + 1; n > ctx.maxLen { 197 ctx.maxLen = n + 8 // Add additional slack to avoid too many jumps in size. 198 } 199 } 200 go func() { 201 // Signal that we're done whether we return normally 202 // or by FailNow's runtime.Goexit. 203 defer func() { 204 b.signal <- true 205 }() 206 207 b.runN(1) 208 }() 209 <-b.signal 210 if b.failed { 211 fmt.Fprintf(b.w, "--- FAIL: %s\n%s", b.name, b.output) 212 return false 213 } 214 // Only print the output if we know we are not going to proceed. 215 // Otherwise it is printed in processBench. 216 if b.hasSub || b.finished { 217 tag := "BENCH" 218 if b.skipped { 219 tag = "SKIP" 220 } 221 if b.chatty && (len(b.output) > 0 || b.finished) { 222 b.trimOutput() 223 fmt.Fprintf(b.w, "--- %s: %s\n%s", tag, b.name, b.output) 224 } 225 return false 226 } 227 return true 228 } 229 230 // run executes the benchmark in a separate goroutine, including all of its 231 // subbenchmarks. b must not have subbenchmarks. 232 func (b *B) run() BenchmarkResult { 233 if b.context != nil { 234 // Running go test --test.bench 235 b.context.processBench(b) // Must call doBench. 236 } else { 237 // Running func Benchmark. 238 b.doBench() 239 } 240 return b.result 241 } 242 243 func (b *B) doBench() BenchmarkResult { 244 go b.launch() 245 <-b.signal 246 return b.result 247 } 248 249 // launch launches the benchmark function. It gradually increases the number 250 // of benchmark iterations until the benchmark runs for the requested benchtime. 251 // launch is run by the doBench function as a separate goroutine. 252 // run1 must have been called on b. 253 func (b *B) launch() { 254 // Signal that we're done whether we return normally 255 // or by FailNow's runtime.Goexit. 256 defer func() { 257 b.signal <- true 258 }() 259 260 // Run the benchmark for at least the specified amount of time. 261 d := b.benchTime 262 for n := 1; !b.failed && b.duration < d && n < 1e9; { 263 last := n 264 // Predict required iterations. 265 n = int(d.Nanoseconds()) 266 if nsop := b.nsPerOp(); nsop != 0 { 267 n /= int(nsop) 268 } 269 // Run more iterations than we think we'll need (1.2x). 270 // Don't grow too fast in case we had timing errors previously. 271 // Be sure to run at least one more than last time. 272 n = max(min(n+n/5, 100*last), last+1) 273 // Round up to something easy to read. 274 n = roundUp(n) 275 b.runN(n) 276 } 277 b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes} 278 } 279 280 // The results of a benchmark run. 281 type BenchmarkResult struct { 282 N int // The number of iterations. 283 T time.Duration // The total time taken. 284 Bytes int64 // Bytes processed in one iteration. 285 MemAllocs uint64 // The total number of memory allocations. 286 MemBytes uint64 // The total number of bytes allocated. 287 } 288 289 func (r BenchmarkResult) NsPerOp() int64 { 290 if r.N <= 0 { 291 return 0 292 } 293 return r.T.Nanoseconds() / int64(r.N) 294 } 295 296 func (r BenchmarkResult) mbPerSec() float64 { 297 if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 { 298 return 0 299 } 300 return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds() 301 } 302 303 func (r BenchmarkResult) AllocsPerOp() int64 { 304 if r.N <= 0 { 305 return 0 306 } 307 return int64(r.MemAllocs) / int64(r.N) 308 } 309 310 func (r BenchmarkResult) AllocedBytesPerOp() int64 { 311 if r.N <= 0 { 312 return 0 313 } 314 return int64(r.MemBytes) / int64(r.N) 315 } 316 317 func (r BenchmarkResult) String() string { 318 mbs := r.mbPerSec() 319 mb := "" 320 if mbs != 0 { 321 mb = fmt.Sprintf("\t%7.2f MB/s", mbs) 322 } 323 nsop := r.NsPerOp() 324 ns := fmt.Sprintf("%10d ns/op", nsop) 325 if r.N > 0 && nsop < 100 { 326 // The format specifiers here make sure that 327 // the ones digits line up for all three possible formats. 328 if nsop < 10 { 329 ns = fmt.Sprintf("%13.2f ns/op", float64(r.T.Nanoseconds())/float64(r.N)) 330 } else { 331 ns = fmt.Sprintf("%12.1f ns/op", float64(r.T.Nanoseconds())/float64(r.N)) 332 } 333 } 334 return fmt.Sprintf("%8d\t%s%s", r.N, ns, mb) 335 } 336 337 func (r BenchmarkResult) MemString() string { 338 return fmt.Sprintf("%8d B/op\t%8d allocs/op", 339 r.AllocedBytesPerOp(), r.AllocsPerOp()) 340 } 341 342 // benchmarkName returns full name of benchmark including procs suffix. 343 func benchmarkName(name string, n int) string { 344 if n != 1 { 345 return fmt.Sprintf("%s-%d", name, n) 346 } 347 return name 348 } 349 350 type benchContext struct { 351 match *matcher 352 353 maxLen int // The largest recorded benchmark name. 354 extLen int // Maximum extension length. 355 } 356 357 // An internal function but exported because it is cross-package; part of the implementation 358 // of the "go test" command. 359 func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) { 360 runBenchmarks(matchString, benchmarks) 361 } 362 363 func runBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool { 364 // If no flag was specified, don't run benchmarks. 365 if len(*matchBenchmarks) == 0 { 366 return true 367 } 368 // Collect matching benchmarks and determine longest name. 369 maxprocs := 1 370 for _, procs := range cpuList { 371 if procs > maxprocs { 372 maxprocs = procs 373 } 374 } 375 ctx := &benchContext{ 376 match: newMatcher(matchString, *matchBenchmarks, "-test.bench"), 377 extLen: len(benchmarkName("", maxprocs)), 378 } 379 var bs []InternalBenchmark 380 for _, Benchmark := range benchmarks { 381 if _, matched := ctx.match.fullName(nil, Benchmark.Name); matched { 382 bs = append(bs, Benchmark) 383 benchName := benchmarkName(Benchmark.Name, maxprocs) 384 if l := len(benchName) + ctx.extLen + 1; l > ctx.maxLen { 385 ctx.maxLen = l 386 } 387 } 388 } 389 main := &B{ 390 common: common{ 391 name: "Main", 392 w: os.Stdout, 393 chatty: *chatty, 394 }, 395 benchFunc: func(b *B) { 396 for _, Benchmark := range bs { 397 b.Run(Benchmark.Name, Benchmark.F) 398 } 399 }, 400 benchTime: *benchTime, 401 context: ctx, 402 } 403 main.runN(1) 404 return !main.failed 405 } 406 407 // processBench runs bench b for the configured CPU counts and prints the results. 408 func (ctx *benchContext) processBench(b *B) { 409 for i, procs := range cpuList { 410 runtime.GOMAXPROCS(procs) 411 benchName := benchmarkName(b.name, procs) 412 fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName) 413 // Recompute the running time for all but the first iteration. 414 if i > 0 { 415 b = &B{ 416 common: common{ 417 signal: make(chan bool), 418 name: b.name, 419 w: b.w, 420 chatty: b.chatty, 421 }, 422 benchFunc: b.benchFunc, 423 benchTime: b.benchTime, 424 } 425 b.run1() 426 } 427 r := b.doBench() 428 if b.failed { 429 // The output could be very long here, but probably isn't. 430 // We print it all, regardless, because we don't want to trim the reason 431 // the benchmark failed. 432 fmt.Fprintf(b.w, "--- FAIL: %s\n%s", benchName, b.output) 433 continue 434 } 435 results := r.String() 436 if *benchmarkMemory || b.showAllocResult { 437 results += "\t" + r.MemString() 438 } 439 fmt.Fprintln(b.w, results) 440 // Unlike with tests, we ignore the -chatty flag and always print output for 441 // benchmarks since the output generation time will skew the results. 442 if len(b.output) > 0 { 443 b.trimOutput() 444 fmt.Fprintf(b.w, "--- BENCH: %s\n%s", benchName, b.output) 445 } 446 if p := runtime.GOMAXPROCS(-1); p != procs { 447 fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p) 448 } 449 } 450 } 451 452 // Run benchmarks f as a subbenchmark with the given name. It reports 453 // whether there were any failures. 454 // 455 // A subbenchmark is like any other benchmark. A benchmark that calls Run at 456 // least once will not be measured itself and will be called once with N=1. 457 func (b *B) Run(name string, f func(b *B)) bool { 458 // Since b has subbenchmarks, we will no longer run it as a benchmark itself. 459 // Release the lock and acquire it on exit to ensure locks stay paired. 460 b.hasSub = true 461 benchmarkLock.Unlock() 462 defer benchmarkLock.Lock() 463 464 benchName, ok := b.name, true 465 if b.context != nil { 466 benchName, ok = b.context.match.fullName(&b.common, name) 467 } 468 if !ok { 469 return true 470 } 471 sub := &B{ 472 common: common{ 473 signal: make(chan bool), 474 name: benchName, 475 parent: &b.common, 476 level: b.level + 1, 477 w: b.w, 478 chatty: b.chatty, 479 }, 480 benchFunc: f, 481 benchTime: b.benchTime, 482 context: b.context, 483 } 484 if sub.run1() { 485 sub.run() 486 } 487 b.add(sub.result) 488 return !sub.failed 489 } 490 491 // add simulates running benchmarks in sequence in a single iteration. It is 492 // used to give some meaningful results in case func Benchmark is used in 493 // combination with Run. 494 func (b *B) add(other BenchmarkResult) { 495 r := &b.result 496 // The aggregated BenchmarkResults resemble running all subbenchmarks as 497 // in sequence in a single benchmark. 498 r.N = 1 499 r.T += time.Duration(other.NsPerOp()) 500 if other.Bytes == 0 { 501 // Summing Bytes is meaningless in aggregate if not all subbenchmarks 502 // set it. 503 b.missingBytes = true 504 r.Bytes = 0 505 } 506 if !b.missingBytes { 507 r.Bytes += other.Bytes 508 } 509 r.MemAllocs += uint64(other.AllocsPerOp()) 510 r.MemBytes += uint64(other.AllocedBytesPerOp()) 511 } 512 513 // trimOutput shortens the output from a benchmark, which can be very long. 514 func (b *B) trimOutput() { 515 // The output is likely to appear multiple times because the benchmark 516 // is run multiple times, but at least it will be seen. This is not a big deal 517 // because benchmarks rarely print, but just in case, we trim it if it's too long. 518 const maxNewlines = 10 519 for nlCount, j := 0, 0; j < len(b.output); j++ { 520 if b.output[j] == '\n' { 521 nlCount++ 522 if nlCount >= maxNewlines { 523 b.output = append(b.output[:j], "\n\t... [output truncated]\n"...) 524 break 525 } 526 } 527 } 528 } 529 530 // A PB is used by RunParallel for running parallel benchmarks. 531 type PB struct { 532 globalN *uint64 // shared between all worker goroutines iteration counter 533 grain uint64 // acquire that many iterations from globalN at once 534 cache uint64 // local cache of acquired iterations 535 bN uint64 // total number of iterations to execute (b.N) 536 } 537 538 // Next reports whether there are more iterations to execute. 539 func (pb *PB) Next() bool { 540 if pb.cache == 0 { 541 n := atomic.AddUint64(pb.globalN, pb.grain) 542 if n <= pb.bN { 543 pb.cache = pb.grain 544 } else if n < pb.bN+pb.grain { 545 pb.cache = pb.bN + pb.grain - n 546 } else { 547 return false 548 } 549 } 550 pb.cache-- 551 return true 552 } 553 554 // RunParallel runs a benchmark in parallel. 555 // It creates multiple goroutines and distributes b.N iterations among them. 556 // The number of goroutines defaults to GOMAXPROCS. To increase parallelism for 557 // non-CPU-bound benchmarks, call SetParallelism before RunParallel. 558 // RunParallel is usually used with the go test -cpu flag. 559 // 560 // The body function will be run in each goroutine. It should set up any 561 // goroutine-local state and then iterate until pb.Next returns false. 562 // It should not use the StartTimer, StopTimer, or ResetTimer functions, 563 // because they have global effect. It should also not call Run. 564 func (b *B) RunParallel(body func(*PB)) { 565 if b.N == 0 { 566 return // Nothing to do when probing. 567 } 568 // Calculate grain size as number of iterations that take ~100µs. 569 // 100µs is enough to amortize the overhead and provide sufficient 570 // dynamic load balancing. 571 grain := uint64(0) 572 if b.previousN > 0 && b.previousDuration > 0 { 573 grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration) 574 } 575 if grain < 1 { 576 grain = 1 577 } 578 // We expect the inner loop and function call to take at least 10ns, 579 // so do not do more than 100µs/10ns=1e4 iterations. 580 if grain > 1e4 { 581 grain = 1e4 582 } 583 584 n := uint64(0) 585 numProcs := b.parallelism * runtime.GOMAXPROCS(0) 586 var wg sync.WaitGroup 587 wg.Add(numProcs) 588 for p := 0; p < numProcs; p++ { 589 go func() { 590 defer wg.Done() 591 pb := &PB{ 592 globalN: &n, 593 grain: grain, 594 bN: uint64(b.N), 595 } 596 body(pb) 597 }() 598 } 599 wg.Wait() 600 if n <= uint64(b.N) && !b.Failed() { 601 b.Fatal("RunParallel: body exited without pb.Next() == false") 602 } 603 } 604 605 // SetParallelism sets the number of goroutines used by RunParallel to p*GOMAXPROCS. 606 // There is usually no need to call SetParallelism for CPU-bound benchmarks. 607 // If p is less than 1, this call will have no effect. 608 func (b *B) SetParallelism(p int) { 609 if p >= 1 { 610 b.parallelism = p 611 } 612 } 613 614 // Benchmark benchmarks a single function. Useful for creating 615 // custom benchmarks that do not use the "go test" command. 616 // 617 // If f calls Run, the result will be an estimate of running all its 618 // subbenchmarks that don't call Run in sequence in a single benchmark. 619 func Benchmark(f func(b *B)) BenchmarkResult { 620 b := &B{ 621 common: common{ 622 signal: make(chan bool), 623 w: discard{}, 624 }, 625 benchFunc: f, 626 benchTime: *benchTime, 627 } 628 if !b.run1() { 629 return BenchmarkResult{} 630 } 631 return b.run() 632 } 633 634 type discard struct{} 635 636 func (discard) Write(b []byte) (n int, err error) { return len(b), nil }