google.golang.org/grpc@v1.62.1/benchmark/stats/stats.go (about) 1 /* 2 * 3 * Copyright 2017 gRPC authors. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 */ 18 19 // Package stats tracks the statistics associated with benchmark runs. 20 package stats 21 22 import ( 23 "bytes" 24 "fmt" 25 "log" 26 "math" 27 "runtime" 28 "sort" 29 "strconv" 30 "sync" 31 "time" 32 33 "google.golang.org/grpc" 34 ) 35 36 // FeatureIndex is an enum for features that usually differ across individual 37 // benchmark runs in a single execution. These are usually configured by the 38 // user through command line flags. 39 type FeatureIndex int 40 41 // FeatureIndex enum values corresponding to individually settable features. 42 const ( 43 EnableTraceIndex FeatureIndex = iota 44 ReadLatenciesIndex 45 ReadKbpsIndex 46 ReadMTUIndex 47 MaxConcurrentCallsIndex 48 ReqSizeBytesIndex 49 RespSizeBytesIndex 50 ReqPayloadCurveIndex 51 RespPayloadCurveIndex 52 CompModesIndex 53 EnableChannelzIndex 54 EnablePreloaderIndex 55 ClientReadBufferSize 56 ClientWriteBufferSize 57 ServerReadBufferSize 58 ServerWriteBufferSize 59 SleepBetweenRPCs 60 RecvBufferPool 61 SharedWriteBuffer 62 63 // MaxFeatureIndex is a place holder to indicate the total number of feature 64 // indices we have. Any new feature indices should be added above this. 65 MaxFeatureIndex 66 ) 67 68 // Features represent configured options for a specific benchmark run. This is 69 // usually constructed from command line arguments passed by the caller. See 70 // benchmark/benchmain/main.go for defined command line flags. This is also 71 // part of the BenchResults struct which is serialized and written to a file. 72 type Features struct { 73 // Network mode used for this benchmark run. Could be one of Local, LAN, WAN 74 // or Longhaul. 75 NetworkMode string 76 // UseBufCon indicates whether an in-memory connection was used for this 77 // benchmark run instead of system network I/O. 78 UseBufConn bool 79 // EnableKeepalive indicates if keepalives were enabled on the connections 80 // used in this benchmark run. 81 EnableKeepalive bool 82 // BenchTime indicates the duration of the benchmark run. 83 BenchTime time.Duration 84 // Connections configures the number of grpc connections between client and server. 85 Connections int 86 87 // Features defined above are usually the same for all benchmark runs in a 88 // particular invocation, while the features defined below could vary from 89 // run to run based on the configured command line. These features have a 90 // corresponding featureIndex value which is used for a variety of reasons. 91 92 // EnableTrace indicates if tracing was enabled. 93 EnableTrace bool 94 // Latency is the simulated one-way network latency used. 95 Latency time.Duration 96 // Kbps is the simulated network throughput used. 97 Kbps int 98 // MTU is the simulated network MTU used. 99 MTU int 100 // MaxConcurrentCalls is the number of concurrent RPCs made during this 101 // benchmark run. 102 MaxConcurrentCalls int 103 // ReqSizeBytes is the request size in bytes used in this benchmark run. 104 // Unused if ReqPayloadCurve is non-nil. 105 ReqSizeBytes int 106 // RespSizeBytes is the response size in bytes used in this benchmark run. 107 // Unused if RespPayloadCurve is non-nil. 108 RespSizeBytes int 109 // ReqPayloadCurve is a histogram representing the shape a random 110 // distribution request payloads should take. 111 ReqPayloadCurve *PayloadCurve 112 // RespPayloadCurve is a histogram representing the shape a random 113 // distribution request payloads should take. 114 RespPayloadCurve *PayloadCurve 115 // ModeCompressor represents the compressor mode used. 116 ModeCompressor string 117 // EnableChannelz indicates if channelz was turned on. 118 EnableChannelz bool 119 // EnablePreloader indicates if preloading was turned on. 120 EnablePreloader bool 121 // ClientReadBufferSize is the size of the client read buffer in bytes. If negative, use the default buffer size. 122 ClientReadBufferSize int 123 // ClientWriteBufferSize is the size of the client write buffer in bytes. If negative, use the default buffer size. 124 ClientWriteBufferSize int 125 // ServerReadBufferSize is the size of the server read buffer in bytes. If negative, use the default buffer size. 126 ServerReadBufferSize int 127 // ServerWriteBufferSize is the size of the server write buffer in bytes. If negative, use the default buffer size. 128 ServerWriteBufferSize int 129 // SleepBetweenRPCs configures optional delay between RPCs. 130 SleepBetweenRPCs time.Duration 131 // RecvBufferPool represents the shared recv buffer pool used. 132 RecvBufferPool string 133 // SharedWriteBuffer configures whether both client and server share per-connection write buffer 134 SharedWriteBuffer bool 135 } 136 137 // String returns all the feature values as a string. 138 func (f Features) String() string { 139 var reqPayloadString, respPayloadString string 140 if f.ReqPayloadCurve != nil { 141 reqPayloadString = fmt.Sprintf("reqPayloadCurve_%s", f.ReqPayloadCurve.ShortHash()) 142 } else { 143 reqPayloadString = fmt.Sprintf("reqSize_%vB", f.ReqSizeBytes) 144 } 145 if f.RespPayloadCurve != nil { 146 respPayloadString = fmt.Sprintf("respPayloadCurve_%s", f.RespPayloadCurve.ShortHash()) 147 } else { 148 respPayloadString = fmt.Sprintf("respSize_%vB", f.RespSizeBytes) 149 } 150 return fmt.Sprintf("networkMode_%v-bufConn_%v-keepalive_%v-benchTime_%v-"+ 151 "trace_%v-latency_%v-kbps_%v-MTU_%v-maxConcurrentCalls_%v-%s-%s-"+ 152 "compressor_%v-channelz_%v-preloader_%v-clientReadBufferSize_%v-"+ 153 "clientWriteBufferSize_%v-serverReadBufferSize_%v-serverWriteBufferSize_%v-"+ 154 "sleepBetweenRPCs_%v-connections_%v-recvBufferPool_%v-sharedWriteBuffer_%v", 155 f.NetworkMode, f.UseBufConn, f.EnableKeepalive, f.BenchTime, f.EnableTrace, 156 f.Latency, f.Kbps, f.MTU, f.MaxConcurrentCalls, reqPayloadString, 157 respPayloadString, f.ModeCompressor, f.EnableChannelz, f.EnablePreloader, 158 f.ClientReadBufferSize, f.ClientWriteBufferSize, f.ServerReadBufferSize, 159 f.ServerWriteBufferSize, f.SleepBetweenRPCs, f.Connections, 160 f.RecvBufferPool, f.SharedWriteBuffer) 161 } 162 163 // SharedFeatures returns the shared features as a pretty printable string. 164 // 'wantFeatures' is a bitmask of wanted features, indexed by FeaturesIndex. 165 func (f Features) SharedFeatures(wantFeatures []bool) string { 166 var b bytes.Buffer 167 if f.NetworkMode != "" { 168 b.WriteString(fmt.Sprintf("Network: %v\n", f.NetworkMode)) 169 } 170 if f.UseBufConn { 171 b.WriteString(fmt.Sprintf("UseBufConn: %v\n", f.UseBufConn)) 172 } 173 if f.EnableKeepalive { 174 b.WriteString(fmt.Sprintf("EnableKeepalive: %v\n", f.EnableKeepalive)) 175 } 176 b.WriteString(fmt.Sprintf("BenchTime: %v\n", f.BenchTime)) 177 f.partialString(&b, wantFeatures, ": ", "\n") 178 return b.String() 179 } 180 181 // PrintableName returns a one line name which includes the features specified 182 // by 'wantFeatures' which is a bitmask of wanted features, indexed by 183 // FeaturesIndex. 184 func (f Features) PrintableName(wantFeatures []bool) string { 185 var b bytes.Buffer 186 f.partialString(&b, wantFeatures, "_", "-") 187 return b.String() 188 } 189 190 // partialString writes features specified by 'wantFeatures' to the provided 191 // bytes.Buffer. 192 func (f Features) partialString(b *bytes.Buffer, wantFeatures []bool, sep, delim string) { 193 for i, sf := range wantFeatures { 194 if sf { 195 switch FeatureIndex(i) { 196 case EnableTraceIndex: 197 b.WriteString(fmt.Sprintf("Trace%v%v%v", sep, f.EnableTrace, delim)) 198 case ReadLatenciesIndex: 199 b.WriteString(fmt.Sprintf("Latency%v%v%v", sep, f.Latency, delim)) 200 case ReadKbpsIndex: 201 b.WriteString(fmt.Sprintf("Kbps%v%v%v", sep, f.Kbps, delim)) 202 case ReadMTUIndex: 203 b.WriteString(fmt.Sprintf("MTU%v%v%v", sep, f.MTU, delim)) 204 case MaxConcurrentCallsIndex: 205 b.WriteString(fmt.Sprintf("Callers%v%v%v", sep, f.MaxConcurrentCalls, delim)) 206 case ReqSizeBytesIndex: 207 b.WriteString(fmt.Sprintf("ReqSize%v%vB%v", sep, f.ReqSizeBytes, delim)) 208 case RespSizeBytesIndex: 209 b.WriteString(fmt.Sprintf("RespSize%v%vB%v", sep, f.RespSizeBytes, delim)) 210 case ReqPayloadCurveIndex: 211 if f.ReqPayloadCurve != nil { 212 b.WriteString(fmt.Sprintf("ReqPayloadCurve%vSHA-256:%v%v", sep, f.ReqPayloadCurve.Hash(), delim)) 213 } 214 case RespPayloadCurveIndex: 215 if f.RespPayloadCurve != nil { 216 b.WriteString(fmt.Sprintf("RespPayloadCurve%vSHA-256:%v%v", sep, f.RespPayloadCurve.Hash(), delim)) 217 } 218 case CompModesIndex: 219 b.WriteString(fmt.Sprintf("Compressor%v%v%v", sep, f.ModeCompressor, delim)) 220 case EnableChannelzIndex: 221 b.WriteString(fmt.Sprintf("Channelz%v%v%v", sep, f.EnableChannelz, delim)) 222 case EnablePreloaderIndex: 223 b.WriteString(fmt.Sprintf("Preloader%v%v%v", sep, f.EnablePreloader, delim)) 224 case ClientReadBufferSize: 225 b.WriteString(fmt.Sprintf("ClientReadBufferSize%v%v%v", sep, f.ClientReadBufferSize, delim)) 226 case ClientWriteBufferSize: 227 b.WriteString(fmt.Sprintf("ClientWriteBufferSize%v%v%v", sep, f.ClientWriteBufferSize, delim)) 228 case ServerReadBufferSize: 229 b.WriteString(fmt.Sprintf("ServerReadBufferSize%v%v%v", sep, f.ServerReadBufferSize, delim)) 230 case ServerWriteBufferSize: 231 b.WriteString(fmt.Sprintf("ServerWriteBufferSize%v%v%v", sep, f.ServerWriteBufferSize, delim)) 232 case SleepBetweenRPCs: 233 b.WriteString(fmt.Sprintf("SleepBetweenRPCs%v%v%v", sep, f.SleepBetweenRPCs, delim)) 234 case RecvBufferPool: 235 b.WriteString(fmt.Sprintf("RecvBufferPool%v%v%v", sep, f.RecvBufferPool, delim)) 236 case SharedWriteBuffer: 237 b.WriteString(fmt.Sprintf("SharedWriteBuffer%v%v%v", sep, f.SharedWriteBuffer, delim)) 238 default: 239 log.Fatalf("Unknown feature index %v. maxFeatureIndex is %v", i, MaxFeatureIndex) 240 } 241 } 242 } 243 } 244 245 // BenchResults records features and results of a benchmark run. A collection 246 // of these structs is usually serialized and written to a file after a 247 // benchmark execution, and could later be read for pretty-printing or 248 // comparison with other benchmark results. 249 type BenchResults struct { 250 // GoVersion is the version of the compiler the benchmark was compiled with. 251 GoVersion string 252 // GrpcVersion is the gRPC version being benchmarked. 253 GrpcVersion string 254 // RunMode is the workload mode for this benchmark run. This could be unary, 255 // stream or unconstrained. 256 RunMode string 257 // Features represents the configured feature options for this run. 258 Features Features 259 // SharedFeatures represents the features which were shared across all 260 // benchmark runs during one execution. It is a slice indexed by 261 // 'FeaturesIndex' and a value of true indicates that the associated 262 // feature is shared across all runs. 263 SharedFeatures []bool 264 // Data contains the statistical data of interest from the benchmark run. 265 Data RunData 266 } 267 268 // RunData contains statistical data of interest from a benchmark run. 269 type RunData struct { 270 // TotalOps is the number of operations executed during this benchmark run. 271 // Only makes sense for unary and streaming workloads. 272 TotalOps uint64 273 // SendOps is the number of send operations executed during this benchmark 274 // run. Only makes sense for unconstrained workloads. 275 SendOps uint64 276 // RecvOps is the number of receive operations executed during this benchmark 277 // run. Only makes sense for unconstrained workloads. 278 RecvOps uint64 279 // AllocedBytes is the average memory allocation in bytes per operation. 280 AllocedBytes float64 281 // Allocs is the average number of memory allocations per operation. 282 Allocs float64 283 // ReqT is the average request throughput associated with this run. 284 ReqT float64 285 // RespT is the average response throughput associated with this run. 286 RespT float64 287 288 // We store different latencies associated with each run. These latencies are 289 // only computed for unary and stream workloads as they are not very useful 290 // for unconstrained workloads. 291 292 // Fiftieth is the 50th percentile latency. 293 Fiftieth time.Duration 294 // Ninetieth is the 90th percentile latency. 295 Ninetieth time.Duration 296 // Ninetyninth is the 99th percentile latency. 297 NinetyNinth time.Duration 298 // Average is the average latency. 299 Average time.Duration 300 } 301 302 type durationSlice []time.Duration 303 304 func (a durationSlice) Len() int { return len(a) } 305 func (a durationSlice) Swap(i, j int) { a[i], a[j] = a[j], a[i] } 306 func (a durationSlice) Less(i, j int) bool { return a[i] < a[j] } 307 308 // Stats is a helper for gathering statistics about individual benchmark runs. 309 type Stats struct { 310 mu sync.Mutex 311 numBuckets int 312 hw *histWrapper 313 results []BenchResults 314 startMS runtime.MemStats 315 stopMS runtime.MemStats 316 } 317 318 type histWrapper struct { 319 unit time.Duration 320 histogram *Histogram 321 durations durationSlice 322 } 323 324 // NewStats creates a new Stats instance. If numBuckets is not positive, the 325 // default value (16) will be used. 326 func NewStats(numBuckets int) *Stats { 327 if numBuckets <= 0 { 328 numBuckets = 16 329 } 330 // Use one more bucket for the last unbounded bucket. 331 s := &Stats{numBuckets: numBuckets + 1} 332 s.hw = &histWrapper{} 333 return s 334 } 335 336 // StartRun is to be invoked to indicate the start of a new benchmark run. 337 func (s *Stats) StartRun(mode string, f Features, sf []bool) { 338 s.mu.Lock() 339 defer s.mu.Unlock() 340 341 runtime.ReadMemStats(&s.startMS) 342 s.results = append(s.results, BenchResults{ 343 GoVersion: runtime.Version(), 344 GrpcVersion: grpc.Version, 345 RunMode: mode, 346 Features: f, 347 SharedFeatures: sf, 348 }) 349 } 350 351 // EndRun is to be invoked to indicate the end of the ongoing benchmark run. It 352 // computes a bunch of stats and dumps them to stdout. 353 func (s *Stats) EndRun(count uint64) { 354 s.mu.Lock() 355 defer s.mu.Unlock() 356 357 runtime.ReadMemStats(&s.stopMS) 358 r := &s.results[len(s.results)-1] 359 r.Data = RunData{ 360 TotalOps: count, 361 AllocedBytes: float64(s.stopMS.TotalAlloc-s.startMS.TotalAlloc) / float64(count), 362 Allocs: float64(s.stopMS.Mallocs-s.startMS.Mallocs) / float64(count), 363 ReqT: float64(count) * float64(r.Features.ReqSizeBytes) * 8 / r.Features.BenchTime.Seconds(), 364 RespT: float64(count) * float64(r.Features.RespSizeBytes) * 8 / r.Features.BenchTime.Seconds(), 365 } 366 s.computeLatencies(r) 367 s.dump(r) 368 s.hw = &histWrapper{} 369 } 370 371 // EndUnconstrainedRun is similar to EndRun, but is to be used for 372 // unconstrained workloads. 373 func (s *Stats) EndUnconstrainedRun(req uint64, resp uint64) { 374 s.mu.Lock() 375 defer s.mu.Unlock() 376 377 runtime.ReadMemStats(&s.stopMS) 378 r := &s.results[len(s.results)-1] 379 r.Data = RunData{ 380 SendOps: req, 381 RecvOps: resp, 382 AllocedBytes: float64(s.stopMS.TotalAlloc-s.startMS.TotalAlloc) / float64((req+resp)/2), 383 Allocs: float64(s.stopMS.Mallocs-s.startMS.Mallocs) / float64((req+resp)/2), 384 ReqT: float64(req) * float64(r.Features.ReqSizeBytes) * 8 / r.Features.BenchTime.Seconds(), 385 RespT: float64(resp) * float64(r.Features.RespSizeBytes) * 8 / r.Features.BenchTime.Seconds(), 386 } 387 s.computeLatencies(r) 388 s.dump(r) 389 s.hw = &histWrapper{} 390 } 391 392 // AddDuration adds an elapsed duration per operation to the stats. This is 393 // used by unary and stream modes where request and response stats are equal. 394 func (s *Stats) AddDuration(d time.Duration) { 395 s.mu.Lock() 396 defer s.mu.Unlock() 397 398 s.hw.durations = append(s.hw.durations, d) 399 } 400 401 // GetResults returns the results from all benchmark runs. 402 func (s *Stats) GetResults() []BenchResults { 403 s.mu.Lock() 404 defer s.mu.Unlock() 405 406 return s.results 407 } 408 409 // computeLatencies computes percentile latencies based on durations stored in 410 // the stats object and updates the corresponding fields in the result object. 411 func (s *Stats) computeLatencies(result *BenchResults) { 412 if len(s.hw.durations) == 0 { 413 return 414 } 415 sort.Sort(s.hw.durations) 416 minDuration := int64(s.hw.durations[0]) 417 maxDuration := int64(s.hw.durations[len(s.hw.durations)-1]) 418 419 // Use the largest unit that can represent the minimum time duration. 420 s.hw.unit = time.Nanosecond 421 for _, u := range []time.Duration{time.Microsecond, time.Millisecond, time.Second} { 422 if minDuration <= int64(u) { 423 break 424 } 425 s.hw.unit = u 426 } 427 428 numBuckets := s.numBuckets 429 if n := int(maxDuration - minDuration + 1); n < numBuckets { 430 numBuckets = n 431 } 432 s.hw.histogram = NewHistogram(HistogramOptions{ 433 NumBuckets: numBuckets, 434 // max-min(lower bound of last bucket) = (1 + growthFactor)^(numBuckets-2) * baseBucketSize. 435 GrowthFactor: math.Pow(float64(maxDuration-minDuration), 1/float64(numBuckets-2)) - 1, 436 BaseBucketSize: 1.0, 437 MinValue: minDuration, 438 }) 439 for _, d := range s.hw.durations { 440 s.hw.histogram.Add(int64(d)) 441 } 442 result.Data.Fiftieth = s.hw.durations[max(s.hw.histogram.Count*int64(50)/100-1, 0)] 443 result.Data.Ninetieth = s.hw.durations[max(s.hw.histogram.Count*int64(90)/100-1, 0)] 444 result.Data.NinetyNinth = s.hw.durations[max(s.hw.histogram.Count*int64(99)/100-1, 0)] 445 result.Data.Average = time.Duration(float64(s.hw.histogram.Sum) / float64(s.hw.histogram.Count)) 446 } 447 448 // dump returns a printable version. 449 func (s *Stats) dump(result *BenchResults) { 450 var b bytes.Buffer 451 452 // Go and gRPC version information. 453 b.WriteString(fmt.Sprintf("%s/grpc%s\n", result.GoVersion, result.GrpcVersion)) 454 455 // This prints the run mode and all features of the bench on a line. 456 b.WriteString(fmt.Sprintf("%s-%s:\n", result.RunMode, result.Features.String())) 457 458 unit := s.hw.unit 459 tUnit := fmt.Sprintf("%v", unit)[1:] // stores one of s, ms, μs, ns 460 461 if l := result.Data.Fiftieth; l != 0 { 462 b.WriteString(fmt.Sprintf("50_Latency: %s%s\t", strconv.FormatFloat(float64(l)/float64(unit), 'f', 4, 64), tUnit)) 463 } 464 if l := result.Data.Ninetieth; l != 0 { 465 b.WriteString(fmt.Sprintf("90_Latency: %s%s\t", strconv.FormatFloat(float64(l)/float64(unit), 'f', 4, 64), tUnit)) 466 } 467 if l := result.Data.NinetyNinth; l != 0 { 468 b.WriteString(fmt.Sprintf("99_Latency: %s%s\t", strconv.FormatFloat(float64(l)/float64(unit), 'f', 4, 64), tUnit)) 469 } 470 if l := result.Data.Average; l != 0 { 471 b.WriteString(fmt.Sprintf("Avg_Latency: %s%s\t", strconv.FormatFloat(float64(l)/float64(unit), 'f', 4, 64), tUnit)) 472 } 473 b.WriteString(fmt.Sprintf("Bytes/op: %v\t", result.Data.AllocedBytes)) 474 b.WriteString(fmt.Sprintf("Allocs/op: %v\t\n", result.Data.Allocs)) 475 476 // This prints the histogram stats for the latency. 477 if s.hw.histogram == nil { 478 b.WriteString("Histogram (empty)\n") 479 } else { 480 b.WriteString(fmt.Sprintf("Histogram (unit: %s)\n", tUnit)) 481 s.hw.histogram.PrintWithUnit(&b, float64(unit)) 482 } 483 484 // Print throughput data. 485 req := result.Data.SendOps 486 if req == 0 { 487 req = result.Data.TotalOps 488 } 489 resp := result.Data.RecvOps 490 if resp == 0 { 491 resp = result.Data.TotalOps 492 } 493 b.WriteString(fmt.Sprintf("Number of requests: %v\tRequest throughput: %v bit/s\n", req, result.Data.ReqT)) 494 b.WriteString(fmt.Sprintf("Number of responses: %v\tResponse throughput: %v bit/s\n", resp, result.Data.RespT)) 495 fmt.Println(b.String()) 496 } 497 498 func max(a, b int64) int64 { 499 if a > b { 500 return a 501 } 502 return b 503 }