github.com/jgbaldwinbrown/perf@v0.1.1/benchproc/projection.go (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package benchproc 6 7 import ( 8 "fmt" 9 "hash/maphash" 10 "strings" 11 "sync" 12 13 "golang.org/x/perf/benchfmt" 14 "golang.org/x/perf/benchproc/internal/parse" 15 ) 16 17 // TODO: If we support comparison operators in filter expressions, 18 // does it make sense to unify the orders understood by projections 19 // with the comparison orders supported in filters? One danger is that 20 // the default order for projections is observation order, but if you 21 // filter on key<val, you probably want that to be numeric by default 22 // (it's not clear you ever want a comparison on observation order). 23 24 // A ProjectionParser parses one or more related projection expressions. 25 type ProjectionParser struct { 26 configKeys map[string]bool // Specific .config keys (excluded from .config) 27 fullnameKeys []string // Specific sub-name keys (excluded from .fullname) 28 haveConfig bool // .config was projected 29 haveFullname bool // .fullname was projected 30 31 // Fields below here are constructed when the first Result is 32 // processed. 33 34 fullExtractor extractor 35 } 36 37 // Parse parses a single projection expression, such as ".name,/size". 38 // A projection expression describes how to extract fields of a 39 // benchfmt.Result into a Key and how to order the resulting Keys. See 40 // "go doc golang.org/x/perf/benchproc/syntax" for a description of 41 // projection syntax. 42 // 43 // A projection expression may also imply a filter, for example if 44 // there's a fixed order like "/size@(1MiB)". Parse will add any filters 45 // to "filter". 46 // 47 // If an application calls Parse multiple times on the same 48 // ProjectionParser, these form a mutually-exclusive group of 49 // projections in which specific keys in any projection are excluded 50 // from group keys in any other projection. The group keys are 51 // ".config" and ".fullname". For example, given two projections 52 // ".config" and "commit,date", the specific file configuration keys 53 // "commit" and "date" are excluded from the group key ".config". 54 // The result is the same regardless of the order these expressions 55 // are parsed in. 56 func (p *ProjectionParser) Parse(projection string, filter *Filter) (*Projection, error) { 57 if p.configKeys == nil { 58 p.configKeys = make(map[string]bool) 59 } 60 61 proj := newProjection() 62 63 // Parse the projection. 64 parts, err := parse.ParseProjection(projection) 65 if err != nil { 66 return nil, err 67 } 68 var filterParts []filterFn 69 for _, part := range parts { 70 f, err := p.makeProjection(proj, projection, part) 71 if err != nil { 72 return nil, err 73 } 74 if f != nil { 75 filterParts = append(filterParts, f) 76 } 77 } 78 // Now that we've ensured the projection is valid, add any 79 // filter parts to the filter. 80 if len(filterParts) > 0 { 81 if filter == nil { 82 panic(fmt.Sprintf("projection expression %s contains a filter, but Parse was passed a nil *Filter", projection)) 83 } 84 filterParts = append(filterParts, filter.match) 85 filter.match = filterOp(parse.OpAnd, filterParts) 86 } 87 88 return proj, nil 89 } 90 91 // ParseWithUnit is like Parse, but the returned Projection has an 92 // additional field called ".unit" that extracts the unit of each 93 // individual benchfmt.Value in a benchfmt.Result. It returns the 94 // Projection and the ".unit" Field. 95 // 96 // Typically, callers need to break out individual benchmark values on 97 // some dimension of a set of Projections. Adding a .unit field makes 98 // this easy. 99 // 100 // Callers should use the ProjectValues method of the returned 101 // Projection rather than the Project method to project each value 102 // rather than the whole benchfmt.Result. 103 func (p *ProjectionParser) ParseWithUnit(projection string, filter *Filter) (*Projection, *Field, error) { 104 proj, err := p.Parse(projection, filter) 105 if err != nil { 106 return nil, nil, err 107 } 108 field := proj.addField(proj.root, ".unit") 109 field.order = make(map[string]int) 110 field.cmp = func(a, b string) int { 111 return field.order[a] - field.order[b] 112 } 113 proj.unitField = field 114 return proj, field, nil 115 } 116 117 // Residue returns a projection for any field not yet projected by any 118 // projection parsed by p. The resulting Projection does not have a 119 // meaningful order. 120 // 121 // For example, following calls to p.Parse("goos") and 122 // p.Parse(".fullname"), Reside would return a Projection with fields 123 // for all file configuration fields except goos. 124 // 125 // The intended use of this is to report when a user may have 126 // over-aggregated results. Specifically, track the residues of all of 127 // the benchfmt.Results that are aggregated together (e.g., into a 128 // single table cell). If there's more than one distinct residue, report 129 // that those results differed in some field. Typically this is used 130 // with NonSingularFields to report exactly which fields differ. 131 func (p *ProjectionParser) Residue() *Projection { 132 s := newProjection() 133 134 // The .config and .fullname groups together cover the 135 // projection space. If they haven't already been specified, 136 // then these groups (with any specific keys excluded) exactly 137 // form the remainder. 138 if !p.haveConfig { 139 p.makeProjection(s, "", parse.Field{Key: ".config", Order: "first"}) 140 } 141 if !p.haveFullname { 142 p.makeProjection(s, "", parse.Field{Key: ".fullname", Order: "first"}) 143 } 144 145 return s 146 } 147 148 func (p *ProjectionParser) makeProjection(s *Projection, q string, proj parse.Field) (filterFn, error) { 149 // Construct the order function. 150 var initField func(field *Field) 151 var filter filterFn 152 makeFilter := func(ext extractor) {} 153 if proj.Order == "fixed" { 154 fixedMap := make(map[string]int, len(proj.Fixed)) 155 for i, s := range proj.Fixed { 156 fixedMap[s] = i 157 } 158 initField = func(field *Field) { 159 field.cmp = func(a, b string) int { 160 return fixedMap[a] - fixedMap[b] 161 } 162 } 163 makeFilter = func(ext extractor) { 164 filter = func(res *benchfmt.Result) (mask, bool) { 165 _, ok := fixedMap[string(ext(res))] 166 return nil, ok 167 } 168 } 169 } else if proj.Order == "first" { 170 initField = func(field *Field) { 171 field.order = make(map[string]int) 172 field.cmp = func(a, b string) int { 173 return field.order[a] - field.order[b] 174 } 175 } 176 } else if cmp, ok := builtinOrders[proj.Order]; ok { 177 initField = func(field *Field) { 178 field.cmp = cmp 179 } 180 } else { 181 return nil, &parse.SyntaxError{q, proj.OrderOff, fmt.Sprintf("unknown order %q", proj.Order)} 182 } 183 184 var project func(*benchfmt.Result, *[]string) 185 switch proj.Key { 186 case ".config": 187 // File configuration, excluding any more 188 // specific file keys. 189 if proj.Order == "fixed" { 190 // Fixed orders don't make sense for a whole tuple. 191 return nil, &parse.SyntaxError{q, proj.OrderOff, fmt.Sprintf("fixed order not allowed for .config")} 192 } 193 194 p.haveConfig = true 195 group := s.addGroup(s.root, ".config") 196 seen := make(map[string]*Field) 197 project = func(r *benchfmt.Result, row *[]string) { 198 for _, cfg := range r.Config { 199 if !cfg.File { 200 continue 201 } 202 203 // Have we already seen this key? If so, use its already 204 // assigned field index. 205 field, ok := seen[cfg.Key] 206 if !ok { 207 // This closure doesn't get called until we've 208 // parsed all projections, so p.configKeys is fully 209 // populated from all parsed projections. 210 if p.configKeys[cfg.Key] { 211 // This key was explicitly specified in another 212 // projection, so omit it from .config. 213 continue 214 } 215 // Create a new field for this new key. 216 field = s.addField(group, cfg.Key) 217 initField(field) 218 seen[cfg.Key] = field 219 } 220 221 (*row)[field.idx] = s.intern(cfg.Value) 222 } 223 } 224 225 case ".fullname": 226 // Full benchmark name, including name config. 227 // We want to exclude any more specific keys, 228 // including keys from later projections, so 229 // we delay constructing the extractor until 230 // we process the first Result. 231 p.haveFullname = true 232 field := s.addField(s.root, ".fullname") 233 initField(field) 234 makeFilter(extractFull) 235 236 project = func(r *benchfmt.Result, row *[]string) { 237 if p.fullExtractor == nil { 238 p.fullExtractor = newExtractorFullName(p.fullnameKeys) 239 } 240 val := p.fullExtractor(r) 241 (*row)[field.idx] = s.intern(val) 242 } 243 244 case ".unit": 245 return nil, &parse.SyntaxError{q, proj.KeyOff, ".unit is only allowed in filters"} 246 247 default: 248 // This is a specific sub-name or file key. Add it 249 // to the excludes. 250 if proj.Key == ".name" || strings.HasPrefix(proj.Key, "/") { 251 p.fullnameKeys = append(p.fullnameKeys, proj.Key) 252 } else { 253 p.configKeys[proj.Key] = true 254 } 255 ext, err := newExtractor(proj.Key) 256 if err != nil { 257 return nil, &parse.SyntaxError{q, proj.KeyOff, err.Error()} 258 } 259 field := s.addField(s.root, proj.Key) 260 initField(field) 261 makeFilter(ext) 262 project = func(r *benchfmt.Result, row *[]string) { 263 val := ext(r) 264 (*row)[field.idx] = s.intern(val) 265 } 266 } 267 s.project = append(s.project, project) 268 return filter, nil 269 } 270 271 // A Projection extracts some subset of the fields of a benchfmt.Result 272 // into a Key. 273 // 274 // A Projection also implies a sort order over Keys that is 275 // lexicographic over the fields of the Projection. The sort order of 276 // each individual field is specified by the projection expression and 277 // defaults to the order in which values of that field were first 278 // observed. 279 type Projection struct { 280 root *Field 281 nFields int 282 283 // unitField, if non-nil, is the ".unit" field used to project 284 // the values of a benchmark result. 285 unitField *Field 286 287 // project is a set of functions that project a Result into 288 // row. 289 // 290 // These take a pointer to row because these functions may 291 // grow the set of fields, so the row slice may grow. 292 project []func(r *benchfmt.Result, row *[]string) 293 294 // row is the buffer used to construct a projection. 295 row []string 296 297 // flatCache is a cache of the flattened sort fields in tuple 298 // comparison order. 299 flatCache []*Field 300 flatCacheOnce *sync.Once 301 302 // interns is used to intern the []byte to string conversion. It's 303 // keyed by string because we can't key a map on []byte, but the 304 // compiler elides the string allocation in interns[string(x)], so 305 // lookups are still cheap. These strings are always referenced in 306 // keys, so this doesn't cause any over-retention. 307 interns map[string]string 308 309 // keys are the interned Keys of this Projection. 310 keys map[uint64][]*keyNode 311 } 312 313 func newProjection() *Projection { 314 var p Projection 315 p.root = &Field{idx: -1} 316 p.flatCacheOnce = new(sync.Once) 317 p.interns = make(map[string]string) 318 p.keys = make(map[uint64][]*keyNode) 319 return &p 320 } 321 322 func (p *Projection) addField(group *Field, name string) *Field { 323 if group.idx != -1 { 324 panic("field's parent is not a group") 325 } 326 327 // Assign this field an index. 328 field := &Field{Name: name, proj: p, idx: p.nFields} 329 p.nFields++ 330 group.Sub = append(group.Sub, field) 331 // Clear the flat cache. 332 if p.flatCache != nil { 333 p.flatCache = nil 334 p.flatCacheOnce = new(sync.Once) 335 } 336 // Add to the row buffer. 337 p.row = append(p.row, "") 338 return field 339 } 340 341 func (p *Projection) addGroup(group *Field, name string) *Field { 342 field := &Field{Name: name, IsTuple: true, proj: p, idx: -1} 343 group.Sub = append(group.Sub, field) 344 return field 345 } 346 347 // Fields returns the fields of p. These correspond exactly to the 348 // fields in the Projection's projection expression. 349 // 350 // The caller must not modify the returned slice. 351 func (p *Projection) Fields() []*Field { 352 return p.root.Sub 353 } 354 355 // FlattenedFields is like Fields, but expands tuple Fields 356 // (specifically, ".config") into their sub-Fields. This is also the 357 // sequence of Fields used for sorting Keys returned from this 358 // Projection. 359 // 360 // The caller must not modify the returned slice. 361 func (p *Projection) FlattenedFields() []*Field { 362 // This can reasonably be called in parallel after all results have 363 // been projected, so we make sure it's thread-safe. 364 p.flatCacheOnce.Do(func() { 365 p.flatCache = []*Field{} 366 var walk func(f *Field) 367 walk = func(f *Field) { 368 if f.idx != -1 { 369 p.flatCache = append(p.flatCache, f) 370 return 371 } 372 for _, sub := range f.Sub { 373 walk(sub) 374 } 375 } 376 walk(p.root) 377 }) 378 return p.flatCache 379 } 380 381 // A Field is a single field of a Projection. 382 // 383 // For example, in the projection ".name,/gomaxprocs", ".name" and 384 // "/gomaxprocs" are both Fields. 385 // 386 // A Field may be a group field with sub-Fields. 387 type Field struct { 388 Name string 389 390 // IsTuple indicates that this Field is a tuple that does not itself 391 // have a string value. 392 IsTuple bool 393 394 // Sub is the sequence of sub-Fields for a group field. 395 Sub []*Field 396 397 proj *Projection 398 399 // idx gives the index of this field's values in a keyNode. 400 // 401 // Indexes are assigned sequentially as new sub-Fields are added to 402 // group Fields. This allows the set of Fields to grow without 403 // invalidating existing Keys. 404 // 405 // idx is -1 for Fields that are not directly stored in a keyNode, 406 // such as the root Field and ".config". 407 idx int 408 409 // cmp is the comparison function for values of this field. It 410 // returns <0 if a < b, >0 if a > b, or 0 if a == b or a and b 411 // are unorderable. 412 cmp func(a, b string) int 413 414 // order, if non-nil, records the observation order of this 415 // field. 416 order map[string]int 417 } 418 419 // String returns the name of Field f. 420 func (f Field) String() string { 421 return f.Name 422 } 423 424 var keySeed = maphash.MakeSeed() 425 426 // Project extracts fields from benchmark Result r according to 427 // Projection s and returns them as a Key. 428 // 429 // Two Keys produced by Project will be == if and only if their 430 // projected fields have the same values. Notably, this means Keys can 431 // be used as Go map keys, which is useful for grouping benchmark 432 // results. 433 // 434 // Calling Project may add new sub-Fields to group Fields in this 435 // Projection. For example, if the Projection has a ".config" field and 436 // r has a never-before-seen file configuration key, this will add a new 437 // sub-Field to the ".config" Field. 438 // 439 // If this Projection includes a .units field, it will be left as "" in 440 // the resulting Key. The caller should use ProjectValues instead. 441 func (p *Projection) Project(r *benchfmt.Result) Key { 442 p.populateRow(r) 443 return p.internRow() 444 } 445 446 // ProjectValues is like Project, but for each benchmark value of 447 // r.Values individually. The returned slice corresponds to the 448 // r.Values slice. 449 // 450 // If this Projection includes a .unit field, it will differ between 451 // these Keys. If not, then all of the Keys will be identical 452 // because the benchmark values vary only on .unit. 453 func (p *Projection) ProjectValues(r *benchfmt.Result) []Key { 454 p.populateRow(r) 455 out := make([]Key, len(r.Values)) 456 if p.unitField == nil { 457 // There's no .unit, so the Keys will all be the same. 458 key := p.internRow() 459 for i := range out { 460 out[i] = key 461 } 462 return out 463 } 464 // Vary the .unit field. 465 for i, val := range r.Values { 466 p.row[p.unitField.idx] = val.Unit 467 out[i] = p.internRow() 468 } 469 return out 470 } 471 472 func (p *Projection) populateRow(r *benchfmt.Result) { 473 // Clear the row buffer. 474 for i := range p.row { 475 p.row[i] = "" 476 } 477 478 // Run the projection functions to fill in row. 479 for _, proj := range p.project { 480 // proj may add fields and grow row. 481 proj(r, &p.row) 482 } 483 } 484 485 func (p *Projection) internRow() Key { 486 // Hash the row. This must be invariant to unused trailing fields: the 487 // field set can grow, and if those new fields are later cleared, 488 // we want Keys from before the growth to equal Keys from after the growth. 489 row := p.row 490 for len(row) > 0 && row[len(row)-1] == "" { 491 row = row[:len(row)-1] 492 } 493 var h maphash.Hash 494 h.SetSeed(keySeed) 495 for _, val := range row { 496 h.WriteString(val) 497 } 498 hash := h.Sum64() 499 500 // Check if we already have this key. 501 keys := p.keys[hash] 502 for _, key := range keys { 503 if key.equalRow(row) { 504 return Key{key} 505 } 506 } 507 508 // Update observation orders. 509 for _, field := range p.Fields() { 510 if field.order == nil { 511 // Not tracking observation order for this field. 512 continue 513 } 514 var val string 515 if field.idx < len(row) { 516 val = row[field.idx] 517 } 518 if _, ok := field.order[val]; !ok { 519 field.order[val] = len(field.order) 520 } 521 } 522 523 // Save the key. 524 key := &keyNode{p, append([]string(nil), row...)} 525 p.keys[hash] = append(p.keys[hash], key) 526 return Key{key} 527 } 528 529 func (p *Projection) intern(b []byte) string { 530 if str, ok := p.interns[string(b)]; ok { 531 return str 532 } 533 str := string(b) 534 p.interns[str] = str 535 return str 536 }