github.com/fjballest/golang@v0.0.0-20151209143359-e4c5fe594ca8/src/testing/benchmark.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package testing 6 7 import ( 8 "flag" 9 "fmt" 10 "os" 11 "runtime" 12 "sync" 13 "sync/atomic" 14 "time" 15 ) 16 17 var matchBenchmarks = flag.String("test.bench", "", "regular expression to select benchmarks to run") 18 var benchTime = flag.Duration("test.benchtime", 1*time.Second, "approximate run time for each benchmark") 19 var benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks") 20 21 // Global lock to ensure only one benchmark runs at a time. 22 var benchmarkLock sync.Mutex 23 24 // Used for every benchmark for measuring memory. 25 var memStats runtime.MemStats 26 27 // An internal type but exported because it is cross-package; part of the implementation 28 // of the "go test" command. 29 type InternalBenchmark struct { 30 Name string 31 F func(b *B) 32 } 33 34 // B is a type passed to Benchmark functions to manage benchmark 35 // timing and to specify the number of iterations to run. 36 // 37 // A benchmark ends when its Benchmark function returns or calls any of the methods 38 // FailNow, Fatal, Fatalf, SkipNow, Skip, or Skipf. Those methods must be called 39 // only from the goroutine running the Benchmark function. 40 // The other reporting methods, such as the variations of Log and Error, 41 // may be called simultaneously from multiple goroutines. 42 // 43 // Like in tests, benchmark logs are accumulated during execution 44 // and dumped to standard error when done. Unlike in tests, benchmark logs 45 // are always printed, so as not to hide output whose existence may be 46 // affecting benchmark results. 47 type B struct { 48 common 49 N int 50 previousN int // number of iterations in the previous run 51 previousDuration time.Duration // total duration of the previous run 52 benchmark InternalBenchmark 53 bytes int64 54 timerOn bool 55 showAllocResult bool 56 result BenchmarkResult 57 parallelism int // RunParallel creates parallelism*GOMAXPROCS goroutines 58 // The initial states of memStats.Mallocs and memStats.TotalAlloc. 59 startAllocs uint64 60 startBytes uint64 61 // The net total of this test after being run. 62 netAllocs uint64 63 netBytes uint64 64 } 65 66 // StartTimer starts timing a test. This function is called automatically 67 // before a benchmark starts, but it can also used to resume timing after 68 // a call to StopTimer. 69 func (b *B) StartTimer() { 70 if !b.timerOn { 71 runtime.ReadMemStats(&memStats) 72 b.startAllocs = memStats.Mallocs 73 b.startBytes = memStats.TotalAlloc 74 b.start = time.Now() 75 b.timerOn = true 76 } 77 } 78 79 // StopTimer stops timing a test. This can be used to pause the timer 80 // while performing complex initialization that you don't 81 // want to measure. 82 func (b *B) StopTimer() { 83 if b.timerOn { 84 b.duration += time.Now().Sub(b.start) 85 runtime.ReadMemStats(&memStats) 86 b.netAllocs += memStats.Mallocs - b.startAllocs 87 b.netBytes += memStats.TotalAlloc - b.startBytes 88 b.timerOn = false 89 } 90 } 91 92 // ResetTimer zeros the elapsed benchmark time and memory allocation counters. 93 // It does not affect whether the timer is running. 94 func (b *B) ResetTimer() { 95 if b.timerOn { 96 runtime.ReadMemStats(&memStats) 97 b.startAllocs = memStats.Mallocs 98 b.startBytes = memStats.TotalAlloc 99 b.start = time.Now() 100 } 101 b.duration = 0 102 b.netAllocs = 0 103 b.netBytes = 0 104 } 105 106 // SetBytes records the number of bytes processed in a single operation. 107 // If this is called, the benchmark will report ns/op and MB/s. 108 func (b *B) SetBytes(n int64) { b.bytes = n } 109 110 // ReportAllocs enables malloc statistics for this benchmark. 111 // It is equivalent to setting -test.benchmem, but it only affects the 112 // benchmark function that calls ReportAllocs. 113 func (b *B) ReportAllocs() { 114 b.showAllocResult = true 115 } 116 117 func (b *B) nsPerOp() int64 { 118 if b.N <= 0 { 119 return 0 120 } 121 return b.duration.Nanoseconds() / int64(b.N) 122 } 123 124 // runN runs a single benchmark for the specified number of iterations. 125 func (b *B) runN(n int) { 126 benchmarkLock.Lock() 127 defer benchmarkLock.Unlock() 128 // Try to get a comparable environment for each run 129 // by clearing garbage from previous runs. 130 runtime.GC() 131 b.N = n 132 b.parallelism = 1 133 b.ResetTimer() 134 b.StartTimer() 135 b.benchmark.F(b) 136 b.StopTimer() 137 b.previousN = n 138 b.previousDuration = b.duration 139 } 140 141 func min(x, y int) int { 142 if x > y { 143 return y 144 } 145 return x 146 } 147 148 func max(x, y int) int { 149 if x < y { 150 return y 151 } 152 return x 153 } 154 155 // roundDown10 rounds a number down to the nearest power of 10. 156 func roundDown10(n int) int { 157 var tens = 0 158 // tens = floor(log_10(n)) 159 for n >= 10 { 160 n = n / 10 161 tens++ 162 } 163 // result = 10^tens 164 result := 1 165 for i := 0; i < tens; i++ { 166 result *= 10 167 } 168 return result 169 } 170 171 // roundUp rounds x up to a number of the form [1eX, 2eX, 3eX, 5eX]. 172 func roundUp(n int) int { 173 base := roundDown10(n) 174 switch { 175 case n <= base: 176 return base 177 case n <= (2 * base): 178 return 2 * base 179 case n <= (3 * base): 180 return 3 * base 181 case n <= (5 * base): 182 return 5 * base 183 default: 184 return 10 * base 185 } 186 } 187 188 // run times the benchmark function in a separate goroutine. 189 func (b *B) run() BenchmarkResult { 190 go b.launch() 191 <-b.signal 192 return b.result 193 } 194 195 // launch launches the benchmark function. It gradually increases the number 196 // of benchmark iterations until the benchmark runs for the requested benchtime. 197 // It prints timing information in this form 198 // testing.BenchmarkHello 100000 19 ns/op 199 // launch is run by the run function as a separate goroutine. 200 func (b *B) launch() { 201 // Run the benchmark for a single iteration in case it's expensive. 202 n := 1 203 204 // Signal that we're done whether we return normally 205 // or by FailNow's runtime.Goexit. 206 defer func() { 207 b.signal <- b 208 }() 209 210 b.runN(n) 211 // Run the benchmark for at least the specified amount of time. 212 d := *benchTime 213 for !b.failed && b.duration < d && n < 1e9 { 214 last := n 215 // Predict required iterations. 216 if b.nsPerOp() == 0 { 217 n = 1e9 218 } else { 219 n = int(d.Nanoseconds() / b.nsPerOp()) 220 } 221 // Run more iterations than we think we'll need (1.2x). 222 // Don't grow too fast in case we had timing errors previously. 223 // Be sure to run at least one more than last time. 224 n = max(min(n+n/5, 100*last), last+1) 225 // Round up to something easy to read. 226 n = roundUp(n) 227 b.runN(n) 228 } 229 b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes} 230 } 231 232 // The results of a benchmark run. 233 type BenchmarkResult struct { 234 N int // The number of iterations. 235 T time.Duration // The total time taken. 236 Bytes int64 // Bytes processed in one iteration. 237 MemAllocs uint64 // The total number of memory allocations. 238 MemBytes uint64 // The total number of bytes allocated. 239 } 240 241 func (r BenchmarkResult) NsPerOp() int64 { 242 if r.N <= 0 { 243 return 0 244 } 245 return r.T.Nanoseconds() / int64(r.N) 246 } 247 248 func (r BenchmarkResult) mbPerSec() float64 { 249 if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 { 250 return 0 251 } 252 return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds() 253 } 254 255 func (r BenchmarkResult) AllocsPerOp() int64 { 256 if r.N <= 0 { 257 return 0 258 } 259 return int64(r.MemAllocs) / int64(r.N) 260 } 261 262 func (r BenchmarkResult) AllocedBytesPerOp() int64 { 263 if r.N <= 0 { 264 return 0 265 } 266 return int64(r.MemBytes) / int64(r.N) 267 } 268 269 func (r BenchmarkResult) String() string { 270 mbs := r.mbPerSec() 271 mb := "" 272 if mbs != 0 { 273 mb = fmt.Sprintf("\t%7.2f MB/s", mbs) 274 } 275 nsop := r.NsPerOp() 276 ns := fmt.Sprintf("%10d ns/op", nsop) 277 if r.N > 0 && nsop < 100 { 278 // The format specifiers here make sure that 279 // the ones digits line up for all three possible formats. 280 if nsop < 10 { 281 ns = fmt.Sprintf("%13.2f ns/op", float64(r.T.Nanoseconds())/float64(r.N)) 282 } else { 283 ns = fmt.Sprintf("%12.1f ns/op", float64(r.T.Nanoseconds())/float64(r.N)) 284 } 285 } 286 return fmt.Sprintf("%8d\t%s%s", r.N, ns, mb) 287 } 288 289 func (r BenchmarkResult) MemString() string { 290 return fmt.Sprintf("%8d B/op\t%8d allocs/op", 291 r.AllocedBytesPerOp(), r.AllocsPerOp()) 292 } 293 294 // benchmarkName returns full name of benchmark including procs suffix. 295 func benchmarkName(name string, n int) string { 296 if n != 1 { 297 return fmt.Sprintf("%s-%d", name, n) 298 } 299 return name 300 } 301 302 // An internal function but exported because it is cross-package; part of the implementation 303 // of the "go test" command. 304 func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) { 305 // If no flag was specified, don't run benchmarks. 306 if len(*matchBenchmarks) == 0 { 307 return 308 } 309 // Collect matching benchmarks and determine longest name. 310 maxprocs := 1 311 for _, procs := range cpuList { 312 if procs > maxprocs { 313 maxprocs = procs 314 } 315 } 316 maxlen := 0 317 var bs []InternalBenchmark 318 for _, Benchmark := range benchmarks { 319 matched, err := matchString(*matchBenchmarks, Benchmark.Name) 320 if err != nil { 321 fmt.Fprintf(os.Stderr, "testing: invalid regexp for -test.bench: %s\n", err) 322 os.Exit(1) 323 } 324 if matched { 325 bs = append(bs, Benchmark) 326 benchName := benchmarkName(Benchmark.Name, maxprocs) 327 if l := len(benchName); l > maxlen { 328 maxlen = l 329 } 330 } 331 } 332 for _, Benchmark := range bs { 333 for _, procs := range cpuList { 334 runtime.GOMAXPROCS(procs) 335 b := &B{ 336 common: common{ 337 signal: make(chan interface{}), 338 }, 339 benchmark: Benchmark, 340 } 341 benchName := benchmarkName(Benchmark.Name, procs) 342 fmt.Printf("%-*s\t", maxlen, benchName) 343 r := b.run() 344 if b.failed { 345 // The output could be very long here, but probably isn't. 346 // We print it all, regardless, because we don't want to trim the reason 347 // the benchmark failed. 348 fmt.Printf("--- FAIL: %s\n%s", benchName, b.output) 349 continue 350 } 351 results := r.String() 352 if *benchmarkMemory || b.showAllocResult { 353 results += "\t" + r.MemString() 354 } 355 fmt.Println(results) 356 // Unlike with tests, we ignore the -chatty flag and always print output for 357 // benchmarks since the output generation time will skew the results. 358 if len(b.output) > 0 { 359 b.trimOutput() 360 fmt.Printf("--- BENCH: %s\n%s", benchName, b.output) 361 } 362 if p := runtime.GOMAXPROCS(-1); p != procs { 363 fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p) 364 } 365 } 366 } 367 } 368 369 // trimOutput shortens the output from a benchmark, which can be very long. 370 func (b *B) trimOutput() { 371 // The output is likely to appear multiple times because the benchmark 372 // is run multiple times, but at least it will be seen. This is not a big deal 373 // because benchmarks rarely print, but just in case, we trim it if it's too long. 374 const maxNewlines = 10 375 for nlCount, j := 0, 0; j < len(b.output); j++ { 376 if b.output[j] == '\n' { 377 nlCount++ 378 if nlCount >= maxNewlines { 379 b.output = append(b.output[:j], "\n\t... [output truncated]\n"...) 380 break 381 } 382 } 383 } 384 } 385 386 // A PB is used by RunParallel for running parallel benchmarks. 387 type PB struct { 388 globalN *uint64 // shared between all worker goroutines iteration counter 389 grain uint64 // acquire that many iterations from globalN at once 390 cache uint64 // local cache of acquired iterations 391 bN uint64 // total number of iterations to execute (b.N) 392 } 393 394 // Next reports whether there are more iterations to execute. 395 func (pb *PB) Next() bool { 396 if pb.cache == 0 { 397 n := atomic.AddUint64(pb.globalN, pb.grain) 398 if n <= pb.bN { 399 pb.cache = pb.grain 400 } else if n < pb.bN+pb.grain { 401 pb.cache = pb.bN + pb.grain - n 402 } else { 403 return false 404 } 405 } 406 pb.cache-- 407 return true 408 } 409 410 // RunParallel runs a benchmark in parallel. 411 // It creates multiple goroutines and distributes b.N iterations among them. 412 // The number of goroutines defaults to GOMAXPROCS. To increase parallelism for 413 // non-CPU-bound benchmarks, call SetParallelism before RunParallel. 414 // RunParallel is usually used with the go test -cpu flag. 415 // 416 // The body function will be run in each goroutine. It should set up any 417 // goroutine-local state and then iterate until pb.Next returns false. 418 // It should not use the StartTimer, StopTimer, or ResetTimer functions, 419 // because they have global effect. 420 func (b *B) RunParallel(body func(*PB)) { 421 // Calculate grain size as number of iterations that take ~100µs. 422 // 100µs is enough to amortize the overhead and provide sufficient 423 // dynamic load balancing. 424 grain := uint64(0) 425 if b.previousN > 0 && b.previousDuration > 0 { 426 grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration) 427 } 428 if grain < 1 { 429 grain = 1 430 } 431 // We expect the inner loop and function call to take at least 10ns, 432 // so do not do more than 100µs/10ns=1e4 iterations. 433 if grain > 1e4 { 434 grain = 1e4 435 } 436 437 n := uint64(0) 438 numProcs := b.parallelism * runtime.GOMAXPROCS(0) 439 var wg sync.WaitGroup 440 wg.Add(numProcs) 441 for p := 0; p < numProcs; p++ { 442 go func() { 443 defer wg.Done() 444 pb := &PB{ 445 globalN: &n, 446 grain: grain, 447 bN: uint64(b.N), 448 } 449 body(pb) 450 }() 451 } 452 wg.Wait() 453 if n <= uint64(b.N) && !b.Failed() { 454 b.Fatal("RunParallel: body exited without pb.Next() == false") 455 } 456 } 457 458 // SetParallelism sets the number of goroutines used by RunParallel to p*GOMAXPROCS. 459 // There is usually no need to call SetParallelism for CPU-bound benchmarks. 460 // If p is less than 1, this call will have no effect. 461 func (b *B) SetParallelism(p int) { 462 if p >= 1 { 463 b.parallelism = p 464 } 465 } 466 467 // Benchmark benchmarks a single function. Useful for creating 468 // custom benchmarks that do not use the "go test" command. 469 func Benchmark(f func(b *B)) BenchmarkResult { 470 b := &B{ 471 common: common{ 472 signal: make(chan interface{}), 473 }, 474 benchmark: InternalBenchmark{"", f}, 475 } 476 return b.run() 477 }