github.com/jgbaldwinbrown/perf@v0.1.1/benchfmt/reader.go

github.com/jgbaldwinbrown/perf@v0.1.1/benchfmt/reader.go (about)

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package benchfmt
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"fmt"
    11  	"io"
    12  	"math"
    13  	"unicode"
    14  	"unicode/utf8"
    15  
    16  	"golang.org/x/perf/benchfmt/internal/bytesconv"
    17  	"golang.org/x/perf/benchunit"
    18  )
    19  
    20  // A Reader reads the Go benchmark format.
    21  //
    22  // Its API is modeled on bufio.Scanner. To minimize allocation, a
    23  // Reader retains ownership of everything it creates; a caller should
    24  // copy anything it needs to retain.
    25  //
    26  // To construct a new Reader, either call NewReader, or call Reset on
    27  // a zeroed Reader.
    28  type Reader struct {
    29  	s   *bufio.Scanner
    30  	err error // current I/O error
    31  
    32  	// q is the queue of records to return before processing the next
    33  	// input line. qPos is the index of the current record in q. We
    34  	// track the index explicitly rather than slicing q so that we can
    35  	// reuse the q slice when we reach the end.
    36  	q    []Record
    37  	qPos int
    38  
    39  	result Result
    40  	units  UnitMetadataMap
    41  
    42  	interns map[string]string
    43  }
    44  
    45  // A SyntaxError represents a syntax error on a particular line of a
    46  // benchmark results file.
    47  type SyntaxError struct {
    48  	FileName string
    49  	Line     int
    50  	Msg      string
    51  }
    52  
    53  func (e *SyntaxError) Pos() (fileName string, line int) {
    54  	return e.FileName, e.Line
    55  }
    56  
    57  func (s *SyntaxError) Error() string {
    58  	return fmt.Sprintf("%s:%d: %s", s.FileName, s.Line, s.Msg)
    59  }
    60  
    61  var noResult = &SyntaxError{"", 0, "Reader.Scan has not been called"}
    62  
    63  // NewReader constructs a reader to parse the Go benchmark format from r.
    64  // fileName is used in error messages; it is purely diagnostic.
    65  func NewReader(r io.Reader, fileName string) *Reader {
    66  	reader := new(Reader)
    67  	reader.Reset(r, fileName)
    68  	return reader
    69  }
    70  
    71  // newSyntaxError returns a *SyntaxError at the Reader's current position.
    72  func (r *Reader) newSyntaxError(msg string) *SyntaxError {
    73  	return &SyntaxError{r.result.fileName, r.result.line, msg}
    74  }
    75  
    76  // Reset resets the reader to begin reading from a new input.
    77  // It also resets all accumulated configuration values.
    78  // It does NOT reset unit metadata because it carries across files.
    79  //
    80  // initConfig is an alternating sequence of keys and values.
    81  // Reset will install these as the initial internal configuration
    82  // before any results are read from the input file.
    83  func (r *Reader) Reset(ior io.Reader, fileName string, initConfig ...string) {
    84  	r.s = bufio.NewScanner(ior)
    85  	if fileName == "" {
    86  		fileName = "<unknown>"
    87  	}
    88  	r.err = nil
    89  	if r.interns == nil {
    90  		r.interns = make(map[string]string)
    91  	}
    92  	if r.units == nil {
    93  		r.units = make(map[UnitMetadataKey]*UnitMetadata)
    94  	}
    95  
    96  	// Wipe the queue in case the user hasn't consumed everything from
    97  	// this file.
    98  	r.qPos = 0
    99  	r.q = r.q[:0]
   100  
   101  	// Wipe the Result.
   102  	r.result.Config = r.result.Config[:0]
   103  	r.result.Name = r.result.Name[:0]
   104  	r.result.Iters = 0
   105  	r.result.Values = r.result.Values[:0]
   106  	for k := range r.result.configPos {
   107  		delete(r.result.configPos, k)
   108  	}
   109  	r.result.fileName = fileName
   110  	r.result.line = 0
   111  
   112  	// Set up initial configuration.
   113  	if len(initConfig)%2 != 0 {
   114  		panic("len(initConfig) must be a multiple of 2")
   115  	}
   116  	for i := 0; i < len(initConfig); i += 2 {
   117  		r.result.SetConfig(initConfig[i], initConfig[i+1])
   118  	}
   119  }
   120  
   121  var (
   122  	benchmarkPrefix = []byte("Benchmark")
   123  	unitPrefix      = []byte("Unit")
   124  )
   125  
   126  // Scan advances the reader to the next result and reports whether a
   127  // result was read.
   128  // The caller should use the Result method to get the result.
   129  // If Scan reaches EOF or an I/O error occurs, it returns false,
   130  // in which case the caller should use the Err method to check for errors.
   131  func (r *Reader) Scan() bool {
   132  	if r.err != nil {
   133  		return false
   134  	}
   135  
   136  	// If there's anything in the queue from an earlier line, just pop
   137  	// the queue and return without consuming any more input.
   138  	if r.qPos+1 < len(r.q) {
   139  		r.qPos++
   140  		return true
   141  	}
   142  	// Otherwise, we've drained the queue and need to parse more input
   143  	// to refill it. Reset it to 0 so we can reuse the space.
   144  	r.qPos = 0
   145  	r.q = r.q[:0]
   146  
   147  	// Process lines until we add something to the queue or hit EOF.
   148  	for len(r.q) == 0 && r.s.Scan() {
   149  		r.result.line++
   150  		// We do everything in byte buffers to avoid allocation.
   151  		line := r.s.Bytes()
   152  		// Most lines are benchmark lines, and we can check
   153  		// for that very quickly, so start with that.
   154  		if bytes.HasPrefix(line, benchmarkPrefix) {
   155  			// At this point we commit to this being a
   156  			// benchmark line. If it's malformed, we treat
   157  			// that as an error.
   158  			if err := r.parseBenchmarkLine(line); err != nil {
   159  				r.q = append(r.q, err)
   160  			} else {
   161  				r.q = append(r.q, &r.result)
   162  			}
   163  			continue
   164  		}
   165  		if len(line) > 0 && line[0] == 'U' {
   166  			if nLine, ok := r.isUnitLine(line); ok {
   167  				// Parse unit metadata line. This queues up its own
   168  				// records and errors.
   169  				r.parseUnitLine(nLine)
   170  				continue
   171  			}
   172  		}
   173  		if key, val, ok := parseKeyValueLine(line); ok {
   174  			// Intern key, since there tend to be few
   175  			// unique keys.
   176  			keyStr := r.intern(key)
   177  			if len(val) == 0 {
   178  				r.result.deleteConfig(keyStr)
   179  			} else {
   180  				cfg := r.result.ensureConfig(keyStr, true)
   181  				cfg.Value = append(cfg.Value[:0], val...)
   182  			}
   183  			continue
   184  		}
   185  		// Ignore the line.
   186  	}
   187  
   188  	if len(r.q) > 0 {
   189  		// We queued something up to return.
   190  		return true
   191  	}
   192  
   193  	// We hit EOF. Check for IO errors.
   194  	if err := r.s.Err(); err != nil {
   195  		r.err = fmt.Errorf("%s:%d: %w", r.result.fileName, r.result.line, err)
   196  		return false
   197  	}
   198  	r.err = nil
   199  	return false
   200  }
   201  
   202  // parseKeyValueLine attempts to parse line as a key: val pair,
   203  // with ok reporting whether the line could be parsed.
   204  func parseKeyValueLine(line []byte) (key, val []byte, ok bool) {
   205  	for i := 0; i < len(line); {
   206  		r, n := utf8.DecodeRune(line[i:])
   207  		// key begins with a lower case character ...
   208  		if i == 0 && !unicode.IsLower(r) {
   209  			return
   210  		}
   211  		// and contains no space characters nor upper case
   212  		// characters.
   213  		if unicode.IsSpace(r) || unicode.IsUpper(r) {
   214  			return
   215  		}
   216  		if i > 0 && r == ':' {
   217  			key, val = line[:i], line[i+1:]
   218  			break
   219  		}
   220  
   221  		i += n
   222  	}
   223  	if len(key) == 0 {
   224  		return
   225  	}
   226  	// Value can be omitted entirely, in which case the colon must
   227  	// still be present, but need not be followed by a space.
   228  	if len(val) == 0 {
   229  		ok = true
   230  		return
   231  	}
   232  	// One or more ASCII space or tab characters separate "key:"
   233  	// from "value."
   234  	for len(val) > 0 && (val[0] == ' ' || val[0] == '\t') {
   235  		val = val[1:]
   236  		ok = true
   237  	}
   238  	return
   239  }
   240  
   241  // parseBenchmarkLine parses line as a benchmark result and updates r.result.
   242  // The caller must have already checked that line begins with "Benchmark".
   243  func (r *Reader) parseBenchmarkLine(line []byte) *SyntaxError {
   244  	var f []byte
   245  	var err error
   246  
   247  	// Skip "Benchmark"
   248  	line = line[len("Benchmark"):]
   249  
   250  	// Read the name.
   251  	r.result.Name, line = splitField(line)
   252  
   253  	// Read the iteration count.
   254  	f, line = splitField(line)
   255  	if len(f) == 0 {
   256  		return r.newSyntaxError("missing iteration count")
   257  	}
   258  	r.result.Iters, err = bytesconv.Atoi(f)
   259  	switch err := err.(type) {
   260  	case nil:
   261  		// ok
   262  	case *bytesconv.NumError:
   263  		return r.newSyntaxError("parsing iteration count: " + err.Err.Error())
   264  	default:
   265  		return r.newSyntaxError(err.Error())
   266  	}
   267  
   268  	// Read value/unit pairs.
   269  	r.result.Values = r.result.Values[:0]
   270  	for {
   271  		f, line = splitField(line)
   272  		if len(f) == 0 {
   273  			if len(r.result.Values) > 0 {
   274  				break
   275  			}
   276  			return r.newSyntaxError("missing measurements")
   277  		}
   278  		val, err := atof(f)
   279  		switch err := err.(type) {
   280  		case nil:
   281  			// ok
   282  		case *bytesconv.NumError:
   283  			return r.newSyntaxError("parsing measurement: " + err.Err.Error())
   284  		default:
   285  			return r.newSyntaxError(err.Error())
   286  		}
   287  		f, line = splitField(line)
   288  		if len(f) == 0 {
   289  			return r.newSyntaxError("missing units")
   290  		}
   291  		unit := r.intern(f)
   292  
   293  		// Tidy the value.
   294  		tidyVal, tidyUnit := benchunit.Tidy(val, unit)
   295  		var v Value
   296  		if tidyVal == val {
   297  			v = Value{Value: val, Unit: unit}
   298  		} else {
   299  			v = Value{Value: tidyVal, Unit: tidyUnit, OrigValue: val, OrigUnit: unit}
   300  		}
   301  
   302  		r.result.Values = append(r.result.Values, v)
   303  	}
   304  
   305  	return nil
   306  }
   307  
   308  // isUnitLine tests whether line is a unit metadata line. If it is, it
   309  // returns the line after the "Unit" literal and true.
   310  func (r *Reader) isUnitLine(line []byte) (rest []byte, ok bool) {
   311  	var f []byte
   312  	// Is this a unit metadata line?
   313  	f, line = splitField(line)
   314  	if bytes.Equal(f, unitPrefix) {
   315  		return line, true
   316  	}
   317  	return nil, false
   318  }
   319  
   320  // parseUnitLine parses line as a unit metadata line, starting
   321  // after "Unit". It updates r.q.
   322  // If there are syntax errors on the line, it will attempt to parse
   323  // what it can and return a non-nil error.
   324  func (r *Reader) parseUnitLine(line []byte) {
   325  	var f []byte
   326  	// isUnitLine already consumed the literal "Unit".
   327  	// Consume the next field, which is the unit.
   328  	f, line = splitField(line)
   329  	if len(f) == 0 {
   330  		r.q = append(r.q, r.newSyntaxError("missing unit"))
   331  		return
   332  	}
   333  	unit := r.intern(f)
   334  
   335  	// The metadata map is indexed by tidied units because we want to
   336  	// support lookups by tidy units and there's no way to "untidy" a
   337  	// unit.
   338  	_, tidyUnit := benchunit.Tidy(1, unit)
   339  
   340  	// Consume key=value pairs.
   341  	for {
   342  		f, line = splitField(line)
   343  		if len(f) == 0 {
   344  			break
   345  		}
   346  		eq := bytes.IndexByte(f, '=')
   347  		if eq <= 0 {
   348  			r.q = append(r.q, r.newSyntaxError("expected key=value"))
   349  			continue
   350  		}
   351  		key := UnitMetadataKey{tidyUnit, r.intern(f[:eq])}
   352  		value := r.intern(f[eq+1:])
   353  
   354  		if have, ok := r.units[key]; ok {
   355  			if have.Value == value {
   356  				// We already have this unit metadata. Ignore.
   357  				continue
   358  			}
   359  			// Report incompatible unit metadata.
   360  			r.q = append(r.q, r.newSyntaxError(fmt.Sprintf("metadata %s of unit %s already set to %s", key.Key, unit, have.Value)))
   361  			continue
   362  		}
   363  
   364  		metadata := &UnitMetadata{key, unit, value, r.result.fileName, r.result.line}
   365  		r.units[key] = metadata
   366  		r.q = append(r.q, metadata)
   367  	}
   368  }
   369  
   370  func (r *Reader) intern(x []byte) string {
   371  	const maxIntern = 1024
   372  	if s, ok := r.interns[string(x)]; ok {
   373  		return s
   374  	}
   375  	if len(r.interns) >= maxIntern {
   376  		// Evict a random item from the interns table.
   377  		// Map iteration order is unspecified, but both
   378  		// the gc and libgo runtimes both provide random
   379  		// iteration order. The choice of item to evict doesn't
   380  		// affect correctness, so we do the simple thing.
   381  		for k := range r.interns {
   382  			delete(r.interns, k)
   383  			break
   384  		}
   385  	}
   386  	s := string(x)
   387  	r.interns[s] = s
   388  	return s
   389  }
   390  
   391  // A Record is a single record read from a benchmark file. It may be a
   392  // *Result or a *SyntaxError.
   393  type Record interface {
   394  	// Pos returns the position of this record as a file name and a
   395  	// 1-based line number within that file. If this record was not read
   396  	// from a file, it returns "", 0.
   397  	Pos() (fileName string, line int)
   398  }
   399  
   400  var _ Record = (*Result)(nil)
   401  var _ Record = (*SyntaxError)(nil)
   402  var _ Record = (*UnitMetadata)(nil)
   403  
   404  // Result returns the record that was just read by Scan. This is either
   405  // a *Result, a *UnitMetadata, or a *SyntaxError indicating a parse error.
   406  // It may return more types in the future.
   407  //
   408  // Parse errors are non-fatal, so the caller can continue to call
   409  // Scan.
   410  //
   411  // If this returns a *Result, the caller should not retain the Result,
   412  // as it will be overwritten by the next call to Scan.
   413  func (r *Reader) Result() Record {
   414  	if r.qPos >= len(r.q) {
   415  		// This should only happen if Scan has never been called.
   416  		return noResult
   417  	}
   418  	return r.q[r.qPos]
   419  }
   420  
   421  // Err returns the first non-EOF I/O error that was encountered by the
   422  // Reader.
   423  func (r *Reader) Err() error {
   424  	return r.err
   425  }
   426  
   427  // Units returns the accumulated unit metadata.
   428  //
   429  // Callers that want to consume the entire stream of benchmark results
   430  // and then process units can use this instead of monitoring
   431  // *UnitMetadata Records.
   432  func (r *Reader) Units() UnitMetadataMap {
   433  	return r.units
   434  }
   435  
   436  // Parsing helpers.
   437  //
   438  // These are designed to leverage common fast paths. The ASCII fast
   439  // path is especially important, and more than doubles the performance
   440  // of the parser.
   441  
   442  // atof is a wrapper for bytesconv.ParseFloat that optimizes for
   443  // numbers that are usually integers.
   444  func atof(x []byte) (float64, error) {
   445  	// Try parsing as an integer.
   446  	var val int64
   447  	for _, ch := range x {
   448  		digit := ch - '0'
   449  		if digit >= 10 {
   450  			goto fail
   451  		}
   452  		if val > (math.MaxInt64-10)/10 {
   453  			goto fail // avoid int64 overflow
   454  		}
   455  		val = (val * 10) + int64(digit)
   456  	}
   457  	return float64(val), nil
   458  
   459  fail:
   460  	// The fast path failed. Parse it as a float.
   461  	return bytesconv.ParseFloat(x, 64)
   462  }
   463  
   464  const isSpace uint64 = 1<<'\t' | 1<<'\n' | 1<<'\v' | 1<<'\f' | 1<<'\r' | 1<<' '
   465  
   466  // splitField consumes and returns non-whitespace in x as field,
   467  // consumes whitespace following the field, and then returns the
   468  // remaining bytes of x.
   469  func splitField(x []byte) (field, rest []byte) {
   470  	// Collect non-whitespace into field.
   471  	var i int
   472  	for i = 0; i < len(x); {
   473  		if x[i] < utf8.RuneSelf {
   474  			// Fast path for ASCII
   475  			if (isSpace>>x[i])&1 != 0 {
   476  				rest = x[i+1:]
   477  				break
   478  
   479  			}
   480  			i++
   481  		} else {
   482  			// Slow path for Unicode
   483  			r, n := utf8.DecodeRune(x[i:])
   484  			if unicode.IsSpace(r) {
   485  				rest = x[i+n:]
   486  				break
   487  			}
   488  			i += n
   489  		}
   490  	}
   491  	field = x[:i]
   492  
   493  	// Strip whitespace from rest.
   494  	for len(rest) > 0 {
   495  		if rest[0] < utf8.RuneSelf {
   496  			if (isSpace>>rest[0])&1 == 0 {
   497  				break
   498  			}
   499  			rest = rest[1:]
   500  		} else {
   501  			r, n := utf8.DecodeRune(rest)
   502  			if !unicode.IsSpace(r) {
   503  				break
   504  			}
   505  			rest = rest[n:]
   506  		}
   507  	}
   508  	return
   509  }