github.com/jgbaldwinbrown/perf@v0.1.1/benchfmt/reader.go (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package benchfmt 6 7 import ( 8 "bufio" 9 "bytes" 10 "fmt" 11 "io" 12 "math" 13 "unicode" 14 "unicode/utf8" 15 16 "golang.org/x/perf/benchfmt/internal/bytesconv" 17 "golang.org/x/perf/benchunit" 18 ) 19 20 // A Reader reads the Go benchmark format. 21 // 22 // Its API is modeled on bufio.Scanner. To minimize allocation, a 23 // Reader retains ownership of everything it creates; a caller should 24 // copy anything it needs to retain. 25 // 26 // To construct a new Reader, either call NewReader, or call Reset on 27 // a zeroed Reader. 28 type Reader struct { 29 s *bufio.Scanner 30 err error // current I/O error 31 32 // q is the queue of records to return before processing the next 33 // input line. qPos is the index of the current record in q. We 34 // track the index explicitly rather than slicing q so that we can 35 // reuse the q slice when we reach the end. 36 q []Record 37 qPos int 38 39 result Result 40 units UnitMetadataMap 41 42 interns map[string]string 43 } 44 45 // A SyntaxError represents a syntax error on a particular line of a 46 // benchmark results file. 47 type SyntaxError struct { 48 FileName string 49 Line int 50 Msg string 51 } 52 53 func (e *SyntaxError) Pos() (fileName string, line int) { 54 return e.FileName, e.Line 55 } 56 57 func (s *SyntaxError) Error() string { 58 return fmt.Sprintf("%s:%d: %s", s.FileName, s.Line, s.Msg) 59 } 60 61 var noResult = &SyntaxError{"", 0, "Reader.Scan has not been called"} 62 63 // NewReader constructs a reader to parse the Go benchmark format from r. 64 // fileName is used in error messages; it is purely diagnostic. 65 func NewReader(r io.Reader, fileName string) *Reader { 66 reader := new(Reader) 67 reader.Reset(r, fileName) 68 return reader 69 } 70 71 // newSyntaxError returns a *SyntaxError at the Reader's current position. 72 func (r *Reader) newSyntaxError(msg string) *SyntaxError { 73 return &SyntaxError{r.result.fileName, r.result.line, msg} 74 } 75 76 // Reset resets the reader to begin reading from a new input. 77 // It also resets all accumulated configuration values. 78 // It does NOT reset unit metadata because it carries across files. 79 // 80 // initConfig is an alternating sequence of keys and values. 81 // Reset will install these as the initial internal configuration 82 // before any results are read from the input file. 83 func (r *Reader) Reset(ior io.Reader, fileName string, initConfig ...string) { 84 r.s = bufio.NewScanner(ior) 85 if fileName == "" { 86 fileName = "<unknown>" 87 } 88 r.err = nil 89 if r.interns == nil { 90 r.interns = make(map[string]string) 91 } 92 if r.units == nil { 93 r.units = make(map[UnitMetadataKey]*UnitMetadata) 94 } 95 96 // Wipe the queue in case the user hasn't consumed everything from 97 // this file. 98 r.qPos = 0 99 r.q = r.q[:0] 100 101 // Wipe the Result. 102 r.result.Config = r.result.Config[:0] 103 r.result.Name = r.result.Name[:0] 104 r.result.Iters = 0 105 r.result.Values = r.result.Values[:0] 106 for k := range r.result.configPos { 107 delete(r.result.configPos, k) 108 } 109 r.result.fileName = fileName 110 r.result.line = 0 111 112 // Set up initial configuration. 113 if len(initConfig)%2 != 0 { 114 panic("len(initConfig) must be a multiple of 2") 115 } 116 for i := 0; i < len(initConfig); i += 2 { 117 r.result.SetConfig(initConfig[i], initConfig[i+1]) 118 } 119 } 120 121 var ( 122 benchmarkPrefix = []byte("Benchmark") 123 unitPrefix = []byte("Unit") 124 ) 125 126 // Scan advances the reader to the next result and reports whether a 127 // result was read. 128 // The caller should use the Result method to get the result. 129 // If Scan reaches EOF or an I/O error occurs, it returns false, 130 // in which case the caller should use the Err method to check for errors. 131 func (r *Reader) Scan() bool { 132 if r.err != nil { 133 return false 134 } 135 136 // If there's anything in the queue from an earlier line, just pop 137 // the queue and return without consuming any more input. 138 if r.qPos+1 < len(r.q) { 139 r.qPos++ 140 return true 141 } 142 // Otherwise, we've drained the queue and need to parse more input 143 // to refill it. Reset it to 0 so we can reuse the space. 144 r.qPos = 0 145 r.q = r.q[:0] 146 147 // Process lines until we add something to the queue or hit EOF. 148 for len(r.q) == 0 && r.s.Scan() { 149 r.result.line++ 150 // We do everything in byte buffers to avoid allocation. 151 line := r.s.Bytes() 152 // Most lines are benchmark lines, and we can check 153 // for that very quickly, so start with that. 154 if bytes.HasPrefix(line, benchmarkPrefix) { 155 // At this point we commit to this being a 156 // benchmark line. If it's malformed, we treat 157 // that as an error. 158 if err := r.parseBenchmarkLine(line); err != nil { 159 r.q = append(r.q, err) 160 } else { 161 r.q = append(r.q, &r.result) 162 } 163 continue 164 } 165 if len(line) > 0 && line[0] == 'U' { 166 if nLine, ok := r.isUnitLine(line); ok { 167 // Parse unit metadata line. This queues up its own 168 // records and errors. 169 r.parseUnitLine(nLine) 170 continue 171 } 172 } 173 if key, val, ok := parseKeyValueLine(line); ok { 174 // Intern key, since there tend to be few 175 // unique keys. 176 keyStr := r.intern(key) 177 if len(val) == 0 { 178 r.result.deleteConfig(keyStr) 179 } else { 180 cfg := r.result.ensureConfig(keyStr, true) 181 cfg.Value = append(cfg.Value[:0], val...) 182 } 183 continue 184 } 185 // Ignore the line. 186 } 187 188 if len(r.q) > 0 { 189 // We queued something up to return. 190 return true 191 } 192 193 // We hit EOF. Check for IO errors. 194 if err := r.s.Err(); err != nil { 195 r.err = fmt.Errorf("%s:%d: %w", r.result.fileName, r.result.line, err) 196 return false 197 } 198 r.err = nil 199 return false 200 } 201 202 // parseKeyValueLine attempts to parse line as a key: val pair, 203 // with ok reporting whether the line could be parsed. 204 func parseKeyValueLine(line []byte) (key, val []byte, ok bool) { 205 for i := 0; i < len(line); { 206 r, n := utf8.DecodeRune(line[i:]) 207 // key begins with a lower case character ... 208 if i == 0 && !unicode.IsLower(r) { 209 return 210 } 211 // and contains no space characters nor upper case 212 // characters. 213 if unicode.IsSpace(r) || unicode.IsUpper(r) { 214 return 215 } 216 if i > 0 && r == ':' { 217 key, val = line[:i], line[i+1:] 218 break 219 } 220 221 i += n 222 } 223 if len(key) == 0 { 224 return 225 } 226 // Value can be omitted entirely, in which case the colon must 227 // still be present, but need not be followed by a space. 228 if len(val) == 0 { 229 ok = true 230 return 231 } 232 // One or more ASCII space or tab characters separate "key:" 233 // from "value." 234 for len(val) > 0 && (val[0] == ' ' || val[0] == '\t') { 235 val = val[1:] 236 ok = true 237 } 238 return 239 } 240 241 // parseBenchmarkLine parses line as a benchmark result and updates r.result. 242 // The caller must have already checked that line begins with "Benchmark". 243 func (r *Reader) parseBenchmarkLine(line []byte) *SyntaxError { 244 var f []byte 245 var err error 246 247 // Skip "Benchmark" 248 line = line[len("Benchmark"):] 249 250 // Read the name. 251 r.result.Name, line = splitField(line) 252 253 // Read the iteration count. 254 f, line = splitField(line) 255 if len(f) == 0 { 256 return r.newSyntaxError("missing iteration count") 257 } 258 r.result.Iters, err = bytesconv.Atoi(f) 259 switch err := err.(type) { 260 case nil: 261 // ok 262 case *bytesconv.NumError: 263 return r.newSyntaxError("parsing iteration count: " + err.Err.Error()) 264 default: 265 return r.newSyntaxError(err.Error()) 266 } 267 268 // Read value/unit pairs. 269 r.result.Values = r.result.Values[:0] 270 for { 271 f, line = splitField(line) 272 if len(f) == 0 { 273 if len(r.result.Values) > 0 { 274 break 275 } 276 return r.newSyntaxError("missing measurements") 277 } 278 val, err := atof(f) 279 switch err := err.(type) { 280 case nil: 281 // ok 282 case *bytesconv.NumError: 283 return r.newSyntaxError("parsing measurement: " + err.Err.Error()) 284 default: 285 return r.newSyntaxError(err.Error()) 286 } 287 f, line = splitField(line) 288 if len(f) == 0 { 289 return r.newSyntaxError("missing units") 290 } 291 unit := r.intern(f) 292 293 // Tidy the value. 294 tidyVal, tidyUnit := benchunit.Tidy(val, unit) 295 var v Value 296 if tidyVal == val { 297 v = Value{Value: val, Unit: unit} 298 } else { 299 v = Value{Value: tidyVal, Unit: tidyUnit, OrigValue: val, OrigUnit: unit} 300 } 301 302 r.result.Values = append(r.result.Values, v) 303 } 304 305 return nil 306 } 307 308 // isUnitLine tests whether line is a unit metadata line. If it is, it 309 // returns the line after the "Unit" literal and true. 310 func (r *Reader) isUnitLine(line []byte) (rest []byte, ok bool) { 311 var f []byte 312 // Is this a unit metadata line? 313 f, line = splitField(line) 314 if bytes.Equal(f, unitPrefix) { 315 return line, true 316 } 317 return nil, false 318 } 319 320 // parseUnitLine parses line as a unit metadata line, starting 321 // after "Unit". It updates r.q. 322 // If there are syntax errors on the line, it will attempt to parse 323 // what it can and return a non-nil error. 324 func (r *Reader) parseUnitLine(line []byte) { 325 var f []byte 326 // isUnitLine already consumed the literal "Unit". 327 // Consume the next field, which is the unit. 328 f, line = splitField(line) 329 if len(f) == 0 { 330 r.q = append(r.q, r.newSyntaxError("missing unit")) 331 return 332 } 333 unit := r.intern(f) 334 335 // The metadata map is indexed by tidied units because we want to 336 // support lookups by tidy units and there's no way to "untidy" a 337 // unit. 338 _, tidyUnit := benchunit.Tidy(1, unit) 339 340 // Consume key=value pairs. 341 for { 342 f, line = splitField(line) 343 if len(f) == 0 { 344 break 345 } 346 eq := bytes.IndexByte(f, '=') 347 if eq <= 0 { 348 r.q = append(r.q, r.newSyntaxError("expected key=value")) 349 continue 350 } 351 key := UnitMetadataKey{tidyUnit, r.intern(f[:eq])} 352 value := r.intern(f[eq+1:]) 353 354 if have, ok := r.units[key]; ok { 355 if have.Value == value { 356 // We already have this unit metadata. Ignore. 357 continue 358 } 359 // Report incompatible unit metadata. 360 r.q = append(r.q, r.newSyntaxError(fmt.Sprintf("metadata %s of unit %s already set to %s", key.Key, unit, have.Value))) 361 continue 362 } 363 364 metadata := &UnitMetadata{key, unit, value, r.result.fileName, r.result.line} 365 r.units[key] = metadata 366 r.q = append(r.q, metadata) 367 } 368 } 369 370 func (r *Reader) intern(x []byte) string { 371 const maxIntern = 1024 372 if s, ok := r.interns[string(x)]; ok { 373 return s 374 } 375 if len(r.interns) >= maxIntern { 376 // Evict a random item from the interns table. 377 // Map iteration order is unspecified, but both 378 // the gc and libgo runtimes both provide random 379 // iteration order. The choice of item to evict doesn't 380 // affect correctness, so we do the simple thing. 381 for k := range r.interns { 382 delete(r.interns, k) 383 break 384 } 385 } 386 s := string(x) 387 r.interns[s] = s 388 return s 389 } 390 391 // A Record is a single record read from a benchmark file. It may be a 392 // *Result or a *SyntaxError. 393 type Record interface { 394 // Pos returns the position of this record as a file name and a 395 // 1-based line number within that file. If this record was not read 396 // from a file, it returns "", 0. 397 Pos() (fileName string, line int) 398 } 399 400 var _ Record = (*Result)(nil) 401 var _ Record = (*SyntaxError)(nil) 402 var _ Record = (*UnitMetadata)(nil) 403 404 // Result returns the record that was just read by Scan. This is either 405 // a *Result, a *UnitMetadata, or a *SyntaxError indicating a parse error. 406 // It may return more types in the future. 407 // 408 // Parse errors are non-fatal, so the caller can continue to call 409 // Scan. 410 // 411 // If this returns a *Result, the caller should not retain the Result, 412 // as it will be overwritten by the next call to Scan. 413 func (r *Reader) Result() Record { 414 if r.qPos >= len(r.q) { 415 // This should only happen if Scan has never been called. 416 return noResult 417 } 418 return r.q[r.qPos] 419 } 420 421 // Err returns the first non-EOF I/O error that was encountered by the 422 // Reader. 423 func (r *Reader) Err() error { 424 return r.err 425 } 426 427 // Units returns the accumulated unit metadata. 428 // 429 // Callers that want to consume the entire stream of benchmark results 430 // and then process units can use this instead of monitoring 431 // *UnitMetadata Records. 432 func (r *Reader) Units() UnitMetadataMap { 433 return r.units 434 } 435 436 // Parsing helpers. 437 // 438 // These are designed to leverage common fast paths. The ASCII fast 439 // path is especially important, and more than doubles the performance 440 // of the parser. 441 442 // atof is a wrapper for bytesconv.ParseFloat that optimizes for 443 // numbers that are usually integers. 444 func atof(x []byte) (float64, error) { 445 // Try parsing as an integer. 446 var val int64 447 for _, ch := range x { 448 digit := ch - '0' 449 if digit >= 10 { 450 goto fail 451 } 452 if val > (math.MaxInt64-10)/10 { 453 goto fail // avoid int64 overflow 454 } 455 val = (val * 10) + int64(digit) 456 } 457 return float64(val), nil 458 459 fail: 460 // The fast path failed. Parse it as a float. 461 return bytesconv.ParseFloat(x, 64) 462 } 463 464 const isSpace uint64 = 1<<'\t' | 1<<'\n' | 1<<'\v' | 1<<'\f' | 1<<'\r' | 1<<' ' 465 466 // splitField consumes and returns non-whitespace in x as field, 467 // consumes whitespace following the field, and then returns the 468 // remaining bytes of x. 469 func splitField(x []byte) (field, rest []byte) { 470 // Collect non-whitespace into field. 471 var i int 472 for i = 0; i < len(x); { 473 if x[i] < utf8.RuneSelf { 474 // Fast path for ASCII 475 if (isSpace>>x[i])&1 != 0 { 476 rest = x[i+1:] 477 break 478 479 } 480 i++ 481 } else { 482 // Slow path for Unicode 483 r, n := utf8.DecodeRune(x[i:]) 484 if unicode.IsSpace(r) { 485 rest = x[i+n:] 486 break 487 } 488 i += n 489 } 490 } 491 field = x[:i] 492 493 // Strip whitespace from rest. 494 for len(rest) > 0 { 495 if rest[0] < utf8.RuneSelf { 496 if (isSpace>>rest[0])&1 == 0 { 497 break 498 } 499 rest = rest[1:] 500 } else { 501 r, n := utf8.DecodeRune(rest) 502 if !unicode.IsSpace(r) { 503 break 504 } 505 rest = rest[n:] 506 } 507 } 508 return 509 }