github.com/jgbaldwinbrown/perf@v0.1.1/benchproc/projection.go (about)

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package benchproc
     6  
     7  import (
     8  	"fmt"
     9  	"hash/maphash"
    10  	"strings"
    11  	"sync"
    12  
    13  	"golang.org/x/perf/benchfmt"
    14  	"golang.org/x/perf/benchproc/internal/parse"
    15  )
    16  
    17  // TODO: If we support comparison operators in filter expressions,
    18  // does it make sense to unify the orders understood by projections
    19  // with the comparison orders supported in filters? One danger is that
    20  // the default order for projections is observation order, but if you
    21  // filter on key<val, you probably want that to be numeric by default
    22  // (it's not clear you ever want a comparison on observation order).
    23  
    24  // A ProjectionParser parses one or more related projection expressions.
    25  type ProjectionParser struct {
    26  	configKeys   map[string]bool // Specific .config keys (excluded from .config)
    27  	fullnameKeys []string        // Specific sub-name keys (excluded from .fullname)
    28  	haveConfig   bool            // .config was projected
    29  	haveFullname bool            // .fullname was projected
    30  
    31  	// Fields below here are constructed when the first Result is
    32  	// processed.
    33  
    34  	fullExtractor extractor
    35  }
    36  
    37  // Parse parses a single projection expression, such as ".name,/size".
    38  // A projection expression describes how to extract fields of a
    39  // benchfmt.Result into a Key and how to order the resulting Keys. See
    40  // "go doc golang.org/x/perf/benchproc/syntax" for a description of
    41  // projection syntax.
    42  //
    43  // A projection expression may also imply a filter, for example if
    44  // there's a fixed order like "/size@(1MiB)". Parse will add any filters
    45  // to "filter".
    46  //
    47  // If an application calls Parse multiple times on the same
    48  // ProjectionParser, these form a mutually-exclusive group of
    49  // projections in which specific keys in any projection are excluded
    50  // from group keys in any other projection. The group keys are
    51  // ".config" and ".fullname". For example, given two projections
    52  // ".config" and "commit,date", the specific file configuration keys
    53  // "commit" and "date" are excluded from the group key ".config".
    54  // The result is the same regardless of the order these expressions
    55  // are parsed in.
    56  func (p *ProjectionParser) Parse(projection string, filter *Filter) (*Projection, error) {
    57  	if p.configKeys == nil {
    58  		p.configKeys = make(map[string]bool)
    59  	}
    60  
    61  	proj := newProjection()
    62  
    63  	// Parse the projection.
    64  	parts, err := parse.ParseProjection(projection)
    65  	if err != nil {
    66  		return nil, err
    67  	}
    68  	var filterParts []filterFn
    69  	for _, part := range parts {
    70  		f, err := p.makeProjection(proj, projection, part)
    71  		if err != nil {
    72  			return nil, err
    73  		}
    74  		if f != nil {
    75  			filterParts = append(filterParts, f)
    76  		}
    77  	}
    78  	// Now that we've ensured the projection is valid, add any
    79  	// filter parts to the filter.
    80  	if len(filterParts) > 0 {
    81  		if filter == nil {
    82  			panic(fmt.Sprintf("projection expression %s contains a filter, but Parse was passed a nil *Filter", projection))
    83  		}
    84  		filterParts = append(filterParts, filter.match)
    85  		filter.match = filterOp(parse.OpAnd, filterParts)
    86  	}
    87  
    88  	return proj, nil
    89  }
    90  
    91  // ParseWithUnit is like Parse, but the returned Projection has an
    92  // additional field called ".unit" that extracts the unit of each
    93  // individual benchfmt.Value in a benchfmt.Result. It returns the
    94  // Projection and the ".unit" Field.
    95  //
    96  // Typically, callers need to break out individual benchmark values on
    97  // some dimension of a set of Projections. Adding a .unit field makes
    98  // this easy.
    99  //
   100  // Callers should use the ProjectValues method of the returned
   101  // Projection rather than the Project method to project each value
   102  // rather than the whole benchfmt.Result.
   103  func (p *ProjectionParser) ParseWithUnit(projection string, filter *Filter) (*Projection, *Field, error) {
   104  	proj, err := p.Parse(projection, filter)
   105  	if err != nil {
   106  		return nil, nil, err
   107  	}
   108  	field := proj.addField(proj.root, ".unit")
   109  	field.order = make(map[string]int)
   110  	field.cmp = func(a, b string) int {
   111  		return field.order[a] - field.order[b]
   112  	}
   113  	proj.unitField = field
   114  	return proj, field, nil
   115  }
   116  
   117  // Residue returns a projection for any field not yet projected by any
   118  // projection parsed by p. The resulting Projection does not have a
   119  // meaningful order.
   120  //
   121  // For example, following calls to p.Parse("goos") and
   122  // p.Parse(".fullname"), Reside would return a Projection with fields
   123  // for all file configuration fields except goos.
   124  //
   125  // The intended use of this is to report when a user may have
   126  // over-aggregated results. Specifically, track the residues of all of
   127  // the benchfmt.Results that are aggregated together (e.g., into a
   128  // single table cell). If there's more than one distinct residue, report
   129  // that those results differed in some field. Typically this is used
   130  // with NonSingularFields to report exactly which fields differ.
   131  func (p *ProjectionParser) Residue() *Projection {
   132  	s := newProjection()
   133  
   134  	// The .config and .fullname groups together cover the
   135  	// projection space. If they haven't already been specified,
   136  	// then these groups (with any specific keys excluded) exactly
   137  	// form the remainder.
   138  	if !p.haveConfig {
   139  		p.makeProjection(s, "", parse.Field{Key: ".config", Order: "first"})
   140  	}
   141  	if !p.haveFullname {
   142  		p.makeProjection(s, "", parse.Field{Key: ".fullname", Order: "first"})
   143  	}
   144  
   145  	return s
   146  }
   147  
   148  func (p *ProjectionParser) makeProjection(s *Projection, q string, proj parse.Field) (filterFn, error) {
   149  	// Construct the order function.
   150  	var initField func(field *Field)
   151  	var filter filterFn
   152  	makeFilter := func(ext extractor) {}
   153  	if proj.Order == "fixed" {
   154  		fixedMap := make(map[string]int, len(proj.Fixed))
   155  		for i, s := range proj.Fixed {
   156  			fixedMap[s] = i
   157  		}
   158  		initField = func(field *Field) {
   159  			field.cmp = func(a, b string) int {
   160  				return fixedMap[a] - fixedMap[b]
   161  			}
   162  		}
   163  		makeFilter = func(ext extractor) {
   164  			filter = func(res *benchfmt.Result) (mask, bool) {
   165  				_, ok := fixedMap[string(ext(res))]
   166  				return nil, ok
   167  			}
   168  		}
   169  	} else if proj.Order == "first" {
   170  		initField = func(field *Field) {
   171  			field.order = make(map[string]int)
   172  			field.cmp = func(a, b string) int {
   173  				return field.order[a] - field.order[b]
   174  			}
   175  		}
   176  	} else if cmp, ok := builtinOrders[proj.Order]; ok {
   177  		initField = func(field *Field) {
   178  			field.cmp = cmp
   179  		}
   180  	} else {
   181  		return nil, &parse.SyntaxError{q, proj.OrderOff, fmt.Sprintf("unknown order %q", proj.Order)}
   182  	}
   183  
   184  	var project func(*benchfmt.Result, *[]string)
   185  	switch proj.Key {
   186  	case ".config":
   187  		// File configuration, excluding any more
   188  		// specific file keys.
   189  		if proj.Order == "fixed" {
   190  			// Fixed orders don't make sense for a whole tuple.
   191  			return nil, &parse.SyntaxError{q, proj.OrderOff, fmt.Sprintf("fixed order not allowed for .config")}
   192  		}
   193  
   194  		p.haveConfig = true
   195  		group := s.addGroup(s.root, ".config")
   196  		seen := make(map[string]*Field)
   197  		project = func(r *benchfmt.Result, row *[]string) {
   198  			for _, cfg := range r.Config {
   199  				if !cfg.File {
   200  					continue
   201  				}
   202  
   203  				// Have we already seen this key? If so, use its already
   204  				// assigned field index.
   205  				field, ok := seen[cfg.Key]
   206  				if !ok {
   207  					// This closure doesn't get called until we've
   208  					// parsed all projections, so p.configKeys is fully
   209  					// populated from all parsed projections.
   210  					if p.configKeys[cfg.Key] {
   211  						// This key was explicitly specified in another
   212  						// projection, so omit it from .config.
   213  						continue
   214  					}
   215  					// Create a new field for this new key.
   216  					field = s.addField(group, cfg.Key)
   217  					initField(field)
   218  					seen[cfg.Key] = field
   219  				}
   220  
   221  				(*row)[field.idx] = s.intern(cfg.Value)
   222  			}
   223  		}
   224  
   225  	case ".fullname":
   226  		// Full benchmark name, including name config.
   227  		// We want to exclude any more specific keys,
   228  		// including keys from later projections, so
   229  		// we delay constructing the extractor until
   230  		// we process the first Result.
   231  		p.haveFullname = true
   232  		field := s.addField(s.root, ".fullname")
   233  		initField(field)
   234  		makeFilter(extractFull)
   235  
   236  		project = func(r *benchfmt.Result, row *[]string) {
   237  			if p.fullExtractor == nil {
   238  				p.fullExtractor = newExtractorFullName(p.fullnameKeys)
   239  			}
   240  			val := p.fullExtractor(r)
   241  			(*row)[field.idx] = s.intern(val)
   242  		}
   243  
   244  	case ".unit":
   245  		return nil, &parse.SyntaxError{q, proj.KeyOff, ".unit is only allowed in filters"}
   246  
   247  	default:
   248  		// This is a specific sub-name or file key. Add it
   249  		// to the excludes.
   250  		if proj.Key == ".name" || strings.HasPrefix(proj.Key, "/") {
   251  			p.fullnameKeys = append(p.fullnameKeys, proj.Key)
   252  		} else {
   253  			p.configKeys[proj.Key] = true
   254  		}
   255  		ext, err := newExtractor(proj.Key)
   256  		if err != nil {
   257  			return nil, &parse.SyntaxError{q, proj.KeyOff, err.Error()}
   258  		}
   259  		field := s.addField(s.root, proj.Key)
   260  		initField(field)
   261  		makeFilter(ext)
   262  		project = func(r *benchfmt.Result, row *[]string) {
   263  			val := ext(r)
   264  			(*row)[field.idx] = s.intern(val)
   265  		}
   266  	}
   267  	s.project = append(s.project, project)
   268  	return filter, nil
   269  }
   270  
   271  // A Projection extracts some subset of the fields of a benchfmt.Result
   272  // into a Key.
   273  //
   274  // A Projection also implies a sort order over Keys that is
   275  // lexicographic over the fields of the Projection. The sort order of
   276  // each individual field is specified by the projection expression and
   277  // defaults to the order in which values of that field were first
   278  // observed.
   279  type Projection struct {
   280  	root    *Field
   281  	nFields int
   282  
   283  	// unitField, if non-nil, is the ".unit" field used to project
   284  	// the values of a benchmark result.
   285  	unitField *Field
   286  
   287  	// project is a set of functions that project a Result into
   288  	// row.
   289  	//
   290  	// These take a pointer to row because these functions may
   291  	// grow the set of fields, so the row slice may grow.
   292  	project []func(r *benchfmt.Result, row *[]string)
   293  
   294  	// row is the buffer used to construct a projection.
   295  	row []string
   296  
   297  	// flatCache is a cache of the flattened sort fields in tuple
   298  	// comparison order.
   299  	flatCache     []*Field
   300  	flatCacheOnce *sync.Once
   301  
   302  	// interns is used to intern the []byte to string conversion. It's
   303  	// keyed by string because we can't key a map on []byte, but the
   304  	// compiler elides the string allocation in interns[string(x)], so
   305  	// lookups are still cheap. These strings are always referenced in
   306  	// keys, so this doesn't cause any over-retention.
   307  	interns map[string]string
   308  
   309  	// keys are the interned Keys of this Projection.
   310  	keys map[uint64][]*keyNode
   311  }
   312  
   313  func newProjection() *Projection {
   314  	var p Projection
   315  	p.root = &Field{idx: -1}
   316  	p.flatCacheOnce = new(sync.Once)
   317  	p.interns = make(map[string]string)
   318  	p.keys = make(map[uint64][]*keyNode)
   319  	return &p
   320  }
   321  
   322  func (p *Projection) addField(group *Field, name string) *Field {
   323  	if group.idx != -1 {
   324  		panic("field's parent is not a group")
   325  	}
   326  
   327  	// Assign this field an index.
   328  	field := &Field{Name: name, proj: p, idx: p.nFields}
   329  	p.nFields++
   330  	group.Sub = append(group.Sub, field)
   331  	// Clear the flat cache.
   332  	if p.flatCache != nil {
   333  		p.flatCache = nil
   334  		p.flatCacheOnce = new(sync.Once)
   335  	}
   336  	// Add to the row buffer.
   337  	p.row = append(p.row, "")
   338  	return field
   339  }
   340  
   341  func (p *Projection) addGroup(group *Field, name string) *Field {
   342  	field := &Field{Name: name, IsTuple: true, proj: p, idx: -1}
   343  	group.Sub = append(group.Sub, field)
   344  	return field
   345  }
   346  
   347  // Fields returns the fields of p. These correspond exactly to the
   348  // fields in the Projection's projection expression.
   349  //
   350  // The caller must not modify the returned slice.
   351  func (p *Projection) Fields() []*Field {
   352  	return p.root.Sub
   353  }
   354  
   355  // FlattenedFields is like Fields, but expands tuple Fields
   356  // (specifically, ".config") into their sub-Fields. This is also the
   357  // sequence of Fields used for sorting Keys returned from this
   358  // Projection.
   359  //
   360  // The caller must not modify the returned slice.
   361  func (p *Projection) FlattenedFields() []*Field {
   362  	// This can reasonably be called in parallel after all results have
   363  	// been projected, so we make sure it's thread-safe.
   364  	p.flatCacheOnce.Do(func() {
   365  		p.flatCache = []*Field{}
   366  		var walk func(f *Field)
   367  		walk = func(f *Field) {
   368  			if f.idx != -1 {
   369  				p.flatCache = append(p.flatCache, f)
   370  				return
   371  			}
   372  			for _, sub := range f.Sub {
   373  				walk(sub)
   374  			}
   375  		}
   376  		walk(p.root)
   377  	})
   378  	return p.flatCache
   379  }
   380  
   381  // A Field is a single field of a Projection.
   382  //
   383  // For example, in the projection ".name,/gomaxprocs", ".name" and
   384  // "/gomaxprocs" are both Fields.
   385  //
   386  // A Field may be a group field with sub-Fields.
   387  type Field struct {
   388  	Name string
   389  
   390  	// IsTuple indicates that this Field is a tuple that does not itself
   391  	// have a string value.
   392  	IsTuple bool
   393  
   394  	// Sub is the sequence of sub-Fields for a group field.
   395  	Sub []*Field
   396  
   397  	proj *Projection
   398  
   399  	// idx gives the index of this field's values in a keyNode.
   400  	//
   401  	// Indexes are assigned sequentially as new sub-Fields are added to
   402  	// group Fields. This allows the set of Fields to grow without
   403  	// invalidating existing Keys.
   404  	//
   405  	// idx is -1 for Fields that are not directly stored in a keyNode,
   406  	// such as the root Field and ".config".
   407  	idx int
   408  
   409  	// cmp is the comparison function for values of this field. It
   410  	// returns <0 if a < b, >0 if a > b, or 0 if a == b or a and b
   411  	// are unorderable.
   412  	cmp func(a, b string) int
   413  
   414  	// order, if non-nil, records the observation order of this
   415  	// field.
   416  	order map[string]int
   417  }
   418  
   419  // String returns the name of Field f.
   420  func (f Field) String() string {
   421  	return f.Name
   422  }
   423  
   424  var keySeed = maphash.MakeSeed()
   425  
   426  // Project extracts fields from benchmark Result r according to
   427  // Projection s and returns them as a Key.
   428  //
   429  // Two Keys produced by Project will be == if and only if their
   430  // projected fields have the same values. Notably, this means Keys can
   431  // be used as Go map keys, which is useful for grouping benchmark
   432  // results.
   433  //
   434  // Calling Project may add new sub-Fields to group Fields in this
   435  // Projection. For example, if the Projection has a ".config" field and
   436  // r has a never-before-seen file configuration key, this will add a new
   437  // sub-Field to the ".config" Field.
   438  //
   439  // If this Projection includes a .units field, it will be left as "" in
   440  // the resulting Key. The caller should use ProjectValues instead.
   441  func (p *Projection) Project(r *benchfmt.Result) Key {
   442  	p.populateRow(r)
   443  	return p.internRow()
   444  }
   445  
   446  // ProjectValues is like Project, but for each benchmark value of
   447  // r.Values individually. The returned slice corresponds to the
   448  // r.Values slice.
   449  //
   450  // If this Projection includes a .unit field, it will differ between
   451  // these Keys. If not, then all of the Keys will be identical
   452  // because the benchmark values vary only on .unit.
   453  func (p *Projection) ProjectValues(r *benchfmt.Result) []Key {
   454  	p.populateRow(r)
   455  	out := make([]Key, len(r.Values))
   456  	if p.unitField == nil {
   457  		// There's no .unit, so the Keys will all be the same.
   458  		key := p.internRow()
   459  		for i := range out {
   460  			out[i] = key
   461  		}
   462  		return out
   463  	}
   464  	// Vary the .unit field.
   465  	for i, val := range r.Values {
   466  		p.row[p.unitField.idx] = val.Unit
   467  		out[i] = p.internRow()
   468  	}
   469  	return out
   470  }
   471  
   472  func (p *Projection) populateRow(r *benchfmt.Result) {
   473  	// Clear the row buffer.
   474  	for i := range p.row {
   475  		p.row[i] = ""
   476  	}
   477  
   478  	// Run the projection functions to fill in row.
   479  	for _, proj := range p.project {
   480  		// proj may add fields and grow row.
   481  		proj(r, &p.row)
   482  	}
   483  }
   484  
   485  func (p *Projection) internRow() Key {
   486  	// Hash the row. This must be invariant to unused trailing fields: the
   487  	// field set can grow, and if those new fields are later cleared,
   488  	// we want Keys from before the growth to equal Keys from after the growth.
   489  	row := p.row
   490  	for len(row) > 0 && row[len(row)-1] == "" {
   491  		row = row[:len(row)-1]
   492  	}
   493  	var h maphash.Hash
   494  	h.SetSeed(keySeed)
   495  	for _, val := range row {
   496  		h.WriteString(val)
   497  	}
   498  	hash := h.Sum64()
   499  
   500  	// Check if we already have this key.
   501  	keys := p.keys[hash]
   502  	for _, key := range keys {
   503  		if key.equalRow(row) {
   504  			return Key{key}
   505  		}
   506  	}
   507  
   508  	// Update observation orders.
   509  	for _, field := range p.Fields() {
   510  		if field.order == nil {
   511  			// Not tracking observation order for this field.
   512  			continue
   513  		}
   514  		var val string
   515  		if field.idx < len(row) {
   516  			val = row[field.idx]
   517  		}
   518  		if _, ok := field.order[val]; !ok {
   519  			field.order[val] = len(field.order)
   520  		}
   521  	}
   522  
   523  	// Save the key.
   524  	key := &keyNode{p, append([]string(nil), row...)}
   525  	p.keys[hash] = append(p.keys[hash], key)
   526  	return Key{key}
   527  }
   528  
   529  func (p *Projection) intern(b []byte) string {
   530  	if str, ok := p.interns[string(b)]; ok {
   531  		return str
   532  	}
   533  	str := string(b)
   534  	p.interns[str] = str
   535  	return str
   536  }