github.com/jgbaldwinbrown/perf@v0.1.1/benchseries/benchseries.go (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package benchseries 6 7 import ( 8 "fmt" 9 "math" 10 "math/rand" 11 "os" 12 "regexp" 13 "sort" 14 "time" 15 16 "golang.org/x/perf/benchfmt" 17 "golang.org/x/perf/benchproc" 18 ) 19 20 // A Cell is the observations for part of a benchmark comparison. 21 type Cell struct { 22 Values []float64 // Actual values observed for this cell (sorted). Typically 1-100. 23 24 // Residues is the set of residue Keys mapped to this cell. 25 // It is used to check for non-unique keys. 26 Residues map[benchproc.Key]struct{} 27 } 28 29 // A Comparison is a pair of numerator and denominator measurements, 30 // the date that they were collected (or the latest date if they were accumulated), 31 // an optional slice of medians of ratios of bootstrapped estimates 32 // and an optional summary node that contains the spreadsheet/json/database 33 // summary of this same information. 34 type Comparison struct { 35 Numerator, Denominator *Cell 36 Date string 37 ratios []float64 // these are from bootstrapping. Typically 1000ish. 38 Summary *ComparisonSummary 39 } 40 41 // A ComparisonSummary is a summary of the comparison of a particular benchmark measurement 42 // for two different versions of the toolchain. Low, Center, and High are lower, middle and 43 // upper estimates of the value, most likely 2.5%ile, 50%ile, and 97.5%ile from a bootstrap 44 // of the original measurement ratios. Date is the (latest) date at which the measurements 45 // were taken. Present indicates that Low/Center/High/Date are valid; if comparison is non-nil, 46 // then there is a bootstrap that can be used or was used to initialize the other fields. 47 // (otherwise the source was JSON or a database). 48 type ComparisonSummary struct { 49 Low float64 `json:"low"` 50 Center float64 `json:"center"` 51 High float64 `json:"high"` 52 Date string `json:"date"` 53 Present bool `json:"present"` // is this initialized? 54 comparison *Comparison // backlink for K-S computation, also indicates initialization of L/C/H 55 } 56 57 func (s *ComparisonSummary) Defined() bool { 58 return s != nil && s.Present 59 } 60 61 // ComparisonHashes contains the git hashes of the two tool chains being compared. 62 type ComparisonHashes struct { 63 NumHash, DenHash string 64 } 65 66 type StringAndSlice struct { 67 S string `json:"s"` 68 Slice []string `json:"slice"` 69 } 70 71 // A ComparisonSeries describes a table/graph, indexed by paired elements of Benchmarks, Series. 72 // Summaries contains the points in the graph. 73 // HashPairs includes annotations for the Series axis. 74 type ComparisonSeries struct { 75 Unit string `json:"unit"` 76 77 Benchmarks []string `json:"benchmarks"` 78 Series []string `json:"series"` 79 Summaries [][]*ComparisonSummary `json:"summaries"` 80 81 HashPairs map[string]ComparisonHashes `json:"hashpairs"` // maps a series point to the hashes compared at that point. 82 83 Residues []StringAndSlice `json:"residues"` 84 85 cells map[SeriesKey]*Comparison 86 } 87 88 // SeriesKey is a map key used to index a single cell in a ComparisonSeries. 89 // ordering is by benchmark, then "series" (== commit) order 90 type SeriesKey struct { 91 Benchmark, Series string 92 } 93 94 // tableKey is a map key used to index a single cell in a lower-t table. 95 // ordering is by benchmark, then experiment order 96 type tableKey struct { 97 Benchmark, Experiment benchproc.Key 98 } 99 100 type unitTableKey struct { 101 unit, table benchproc.Key 102 } 103 104 type table struct { 105 cells map[tableKey]*trial 106 107 benchmarks map[benchproc.Key]struct{} 108 exps map[benchproc.Key]struct{} 109 } 110 111 type trial struct { 112 baseline *Cell 113 baselineHash benchproc.Key 114 baselineHashString string 115 tests map[benchproc.Key]*Cell // map from test hash id to test information 116 } 117 118 // A Builder collects benchmark results into a set of tables, and transforms that into a slice of ComparisonSeries. 119 type Builder struct { 120 // one table per unit; each table maps from (benchmark,experiment) to a single trial of baseline vs one or more tests 121 tables map[unitTableKey]*table 122 123 // numHashBy to numerator order. 124 hashToOrder map[benchproc.Key]benchproc.Key 125 126 filter *benchproc.Filter 127 128 unitBy, tableBy, pkgBy, experimentBy, benchBy, seriesBy, compareBy, numHashBy, denHashBy *benchproc.Projection 129 denCompareVal string // the string value of compareBy that indicates the control/baseline in a comparison. 130 numCompareVal string // the string value of compareBy that indicates the test in a comparison. 131 residue *benchproc.Projection 132 133 unitField *benchproc.Field 134 135 Residues map[benchproc.Key]struct{} 136 137 warn func(format string, args ...interface{}) 138 } 139 140 type BuilderOptions struct { 141 Filter string // how to filter benchmark results, as a benchproc option (e.g., ".unit:/.*/") 142 Series string // the name of the benchmark key that contains the time of the last commit to the experiment branch (e.g. "numerator_stamp", "tip-commit-time") 143 Table string // list of benchmark keys to group ComparisonSeries tables by, in addition to .unit (e.g., "goarch,goos", "" (none)) 144 Experiment string // the name of the benchmark key that contains the time at which the comparative benchmarks were run (e.g., "upload-time", "runstamp") 145 Compare string // the name of the benchmark key that contains the id/role of the toolchain being compared (e.g., "toolchain", "role") 146 Numerator string // the value of the Compare key that indicates the numerator in the ratios (i.e., "test", "tip", "experiment") 147 Denominator string // the value of the Compare key that indicates the denominator in the ratios (i.e., "control", "base", "baseline") 148 NumeratorHash string // the name of the benchmark key that contains the git hash of the numerator (test) toolchain 149 DenominatorHash string // the name of the benchmark key that contains the git hash of the denominator (control) toolchain 150 Ignore string // list of benchmark keys to ignore entirely (e.g. "tip,base,bentstamp,suite") 151 Warn func(format string, args ...interface{}) 152 } 153 154 func BentBuilderOptions() *BuilderOptions { 155 return &BuilderOptions{ 156 Filter: ".unit:/.*/", 157 Series: "numerator_stamp", 158 Table: "goarch,goos,builder_id", 159 Experiment: "runstamp", 160 Compare: "toolchain", 161 Numerator: "Tip", 162 Denominator: "Base", 163 NumeratorHash: "numerator_hash", 164 DenominatorHash: "denominator_hash", 165 Ignore: "go,tip,base,bentstamp,suite,cpu,denominator_branch,.fullname,shortname", 166 Warn: func(format string, args ...interface{}) { 167 fmt.Fprintf(os.Stderr, format, args...) 168 }, 169 } 170 } 171 172 func DefaultBuilderOptions() *BuilderOptions { 173 return &BuilderOptions{ 174 Filter: ".unit:/.*/", 175 Series: "experiment-commit-time", 176 Table: "", // .unit only 177 Experiment: "runstamp", 178 Compare: "toolchain", 179 Numerator: "experiment", 180 Denominator: "baseline", 181 NumeratorHash: "experiment-commit", 182 DenominatorHash: "baseline-commit", 183 Ignore: "go,tip,base,bentstamp,shortname,suite", 184 Warn: func(format string, args ...interface{}) { 185 fmt.Fprintf(os.Stderr, format, args...) 186 }, 187 } 188 } 189 190 var noPuncDate = regexp.MustCompile("^[0-9]{8}T[0-9]{6}$") 191 192 // RFC3339NanoNoZ has the property that formatted date&time.000000000 < date&time.000000001, 193 // unlike RFC3339Nano where date&timeZ > date&timeZ.000000001Z 194 // i.e., "Z" > "."" but "+" < "." so if ".000000000" is elided must use "+00:00" 195 // to express the Z time zone to get the sort right. 196 const RFC3339NanoNoZ = "2006-01-02T15:04:05.999999999-07:00" 197 198 // NormalizeDateString converts dates in two formats used in bent/benchmarking 199 // into UTC, so that all sort properly into a single order with no confusion. 200 func NormalizeDateString(in string) string { 201 if noPuncDate.MatchString(in) { 202 //20211229T213212 203 //2021-12-29T21:32:12 204 in = in[0:4] + "-" + in[4:6] + "-" + in[6:11] + ":" + in[11:13] + ":" + in[13:15] + "+00:00" 205 } 206 t, err := time.Parse(time.RFC3339Nano, in) 207 if err == nil { 208 return t.UTC().Format(RFC3339NanoNoZ) 209 } 210 panic(err) 211 } 212 213 // ParseNormalizedDateString parses a time in the format returned by 214 // NormalizeDateString. 215 func ParseNormalizedDateString(in string) (time.Time, error) { 216 return time.Parse(RFC3339NanoNoZ, in) 217 } 218 219 // NewBuilder creates a new Builder for collecting benchmark results 220 // into tables. Each result will be mapped to a Table by seriesBy. 221 // Within each table, the results are mapped to cells by benchBy and 222 // seriesBy. Any results within a single cell that vary by residue will 223 // be reported as warnings. 224 func NewBuilder(bo *BuilderOptions) (*Builder, error) { 225 226 filter, err := benchproc.NewFilter(bo.Filter) 227 if err != nil { 228 return nil, fmt.Errorf("parsing -filter: %s", err) 229 } 230 231 var parserErr error 232 var parser benchproc.ProjectionParser 233 mustParse := func(name, val string) *benchproc.Projection { 234 schema, err := parser.Parse(val, filter) 235 if err != nil { 236 parserErr = fmt.Errorf("parsing %s: %s", name, err) 237 } 238 return schema 239 } 240 241 unitBy, unitField, err := parser.ParseWithUnit("", nil) 242 if err != nil { 243 panic("Couldn't parse the unit schema") 244 } 245 246 tableBy, err := parser.Parse(bo.Table, nil) 247 if err != nil { 248 panic("Couldn't parse the table schema") 249 } 250 251 benchBy, err := parser.Parse(".fullname", nil) 252 if err != nil { 253 panic("Couldn't parse the .name schema") 254 } 255 256 pkgBy, err := parser.Parse("pkg", nil) 257 if err != nil { 258 panic("Couldn't parse 'pkg' schema") 259 } 260 261 seriesBy := mustParse("-series", bo.Series) 262 experimentBy := mustParse("-experiment", bo.Experiment) 263 compareBy := mustParse("-compare", bo.Compare) 264 numHashBy := mustParse("-numerator-hash", bo.NumeratorHash) 265 denHashBy := mustParse("-denominator-hash", bo.DenominatorHash) 266 267 mustParse("-ignore", bo.Ignore) 268 269 if parserErr != nil { 270 return nil, parserErr 271 } 272 273 residue := parser.Residue() 274 275 return &Builder{ 276 filter: filter, 277 unitBy: unitBy, 278 tableBy: tableBy, 279 pkgBy: pkgBy, 280 experimentBy: experimentBy, 281 benchBy: benchBy, 282 seriesBy: seriesBy, 283 compareBy: compareBy, 284 numHashBy: numHashBy, 285 denHashBy: denHashBy, 286 denCompareVal: bo.Denominator, 287 numCompareVal: bo.Numerator, 288 residue: residue, 289 unitField: unitField, 290 hashToOrder: make(map[benchproc.Key]benchproc.Key), 291 tables: make(map[unitTableKey]*table), 292 Residues: make(map[benchproc.Key]struct{}), 293 warn: bo.Warn, 294 }, nil 295 } 296 297 func (b *Builder) AddFiles(files benchfmt.Files) error { 298 for files.Scan() { 299 rec := files.Result() 300 if err, ok := rec.(*benchfmt.SyntaxError); ok { 301 // Non-fatal result parse error. Warn 302 // but keep going. 303 b.warn("%v\n", err) 304 continue 305 } 306 res := rec.(*benchfmt.Result) 307 308 b.Add(res) 309 } 310 if err := files.Err(); err != nil { 311 return err 312 } 313 return nil 314 } 315 316 // Add adds all of the values in result to the tables in the Builder. 317 func (b *Builder) Add(result *benchfmt.Result) { 318 if ok, _ := b.filter.Apply(result); !ok { 319 return 320 } 321 322 // Project the result. 323 unitCfgs := b.unitBy.ProjectValues(result) 324 tableCfg := b.tableBy.Project(result) 325 326 _ = b.pkgBy.Project(result) // for now we are dropping pkg on the floor 327 328 expCfg := b.experimentBy.Project(result) 329 benchCfg := b.benchBy.Project(result) 330 serCfg := b.seriesBy.Project(result) 331 cmpCfg := b.compareBy.Project(result) 332 numHashCfg := b.numHashBy.Project(result) 333 denHashCfg := b.denHashBy.Project(result) 334 335 // tableBy, experimentBy, benchBy, seriesBy, compareBy, numHashBy, denHashBy 336 337 residueCfg := b.residue.Project(result) 338 cellCfg := tableKey{Benchmark: benchCfg, Experiment: expCfg} 339 340 // Map to tables. 341 for unitI, unitCfg := range unitCfgs { 342 tuk := unitTableKey{unitCfg, tableCfg} 343 table := b.tables[tuk] 344 if table == nil { 345 table = b.newTable() 346 b.tables[tuk] = table 347 } 348 349 // Map to a trial. 350 t := table.cells[cellCfg] 351 if t == nil { 352 t = new(trial) 353 table.cells[cellCfg] = t 354 t.tests = make(map[benchproc.Key]*Cell) 355 356 table.exps[expCfg] = struct{}{} 357 table.benchmarks[benchCfg] = struct{}{} 358 359 } 360 361 var c *Cell 362 newCell := func() *Cell { 363 return &Cell{Residues: make(map[benchproc.Key]struct{})} 364 } 365 if cmpCfg.StringValues() == b.denCompareVal { 366 c = t.baseline 367 if c == nil { 368 c = newCell() 369 t.baseline = c 370 t.baselineHash = denHashCfg 371 t.baselineHashString = denHashCfg.StringValues() 372 } 373 } else { 374 c = t.tests[numHashCfg] 375 if c == nil { 376 c = newCell() 377 t.tests[numHashCfg] = c 378 b.hashToOrder[numHashCfg] = serCfg 379 } 380 } 381 382 // Add to the cell. 383 c.Values = append(c.Values, result.Values[unitI].Value) 384 c.Residues[residueCfg] = struct{}{} 385 b.Residues[residueCfg] = struct{}{} 386 } 387 } 388 389 func (b *Builder) newTable() *table { 390 return &table{ 391 benchmarks: make(map[benchproc.Key]struct{}), 392 exps: make(map[benchproc.Key]struct{}), 393 cells: make(map[tableKey]*trial), 394 } 395 } 396 397 // union combines two sets of benchproc.Key into one. 398 func union(a, b map[benchproc.Key]struct{}) map[benchproc.Key]struct{} { 399 if len(b) < len(a) { 400 a, b = b, a 401 } 402 for k := range a { 403 if _, ok := b[k]; !ok { 404 // a member of the not-larger set was not present in the larger set 405 c := make(map[benchproc.Key]struct{}) 406 for k := range a { 407 c[k] = struct{}{} 408 } 409 for k := range b { 410 c[k] = struct{}{} 411 } 412 return c 413 } 414 } 415 return b 416 } 417 418 func concat(a, b []float64) []float64 { 419 return append(append([]float64{}, a...), b...) 420 } 421 422 const ( 423 DUPE_REPLACE = iota 424 DUPE_COMBINE 425 // TODO DUPE_REPEAT 426 ) 427 428 // AllComparisonSeries converts the accumulated "experiments" into a slice of series of comparisons, 429 // with one slice element per goos-goarch-unit. The experiments need not have occurred in any 430 // sensible order; this deals with that, including overlaps (depend on flag, either replaces old with 431 // younger or combines, REPLACE IS PREFERRED and works properly with combining old summary data with 432 // fresh benchmarking data) and possibly also with previously processed summaries. 433 func (b *Builder) AllComparisonSeries(existing []*ComparisonSeries, dupeHow int) []*ComparisonSeries { 434 old := make(map[string]*ComparisonSeries) 435 for _, cs := range existing { 436 old[cs.Unit] = cs 437 } 438 var css []*ComparisonSeries 439 440 // Iterate over units. 441 for _, u := range sortTableKeys(b.tables) { 442 t := b.tables[u] 443 uString := u.unit.StringValues() 444 if ts := u.table.StringValues(); ts != "" { 445 uString += " " + u.table.StringValues() 446 } 447 var cs *ComparisonSeries 448 449 sers := make(map[string]struct{}) 450 benches := make(map[string]struct{}) 451 452 if o := old[uString]; o != nil { 453 cs = o 454 delete(old, uString) 455 456 cs.cells = make(map[SeriesKey]*Comparison) 457 for i, s := range cs.Series { 458 for j, b := range cs.Benchmarks { 459 if cs.Summaries[i][j].Defined() { 460 sk := SeriesKey{ 461 Benchmark: b, 462 Series: s, 463 } 464 benches[b] = struct{}{} 465 sers[s] = struct{}{} 466 sum := cs.Summaries[i][j] 467 cc := &Comparison{Summary: sum, Date: sum.Date} 468 sum.comparison = cc 469 cs.cells[sk] = cc 470 } 471 } 472 } 473 474 } else { 475 cs = &ComparisonSeries{Unit: uString, 476 HashPairs: make(map[string]ComparisonHashes), 477 cells: make(map[SeriesKey]*Comparison), 478 } 479 } 480 481 // TODO not handling overlapping samples between "existing" and "newly read" yet. 482 483 // Rearrange into paired comparisons, gathering repeats of same comparison from multiple experiments. 484 for tk, tr := range t.cells { 485 // tk == bench, experiment, tr == baseline, tests, tests == map hash -> cell. 486 bench := tk.Benchmark 487 dateString := NormalizeDateString(tk.Experiment.StringValues()) 488 benchString := bench.StringValues() 489 benches[benchString] = struct{}{} 490 for hash, cell := range tr.tests { 491 hashString := hash.StringValues() 492 ser := b.hashToOrder[hash] 493 serString := NormalizeDateString(ser.StringValues()) 494 sers[serString] = struct{}{} 495 sk := SeriesKey{ 496 Benchmark: benchString, 497 Series: serString, 498 } 499 cc := cs.cells[sk] 500 if cc == nil || dupeHow == DUPE_REPLACE { 501 if cc == nil || cc.Date < dateString { 502 cc = &Comparison{ 503 Numerator: cell, 504 Denominator: tr.baseline, 505 Date: dateString, 506 } 507 cs.cells[sk] = cc 508 } 509 510 hp, ok := cs.HashPairs[serString] 511 if !ok { 512 cs.HashPairs[serString] = ComparisonHashes{NumHash: hashString, DenHash: tr.baselineHashString} 513 } else { 514 if hp.NumHash != hashString || hp.DenHash != tr.baselineHashString { 515 fmt.Fprintf(os.Stderr, "numerator/denominator mismatch, expected %s/%s got %s/%s\n", 516 hp.NumHash, hp.DenHash, hashString, tr.baselineHashString) 517 } 518 } 519 520 } else { // Current augments, but this will do the wrong thing if one is an old summary; also need to think about "repeat" 521 // augment an existing measurement (i.e., a second experiment on this same datapoint) 522 // fmt.Printf("Augment u:%s,b:%s,ch:%s,cd:%s; cc=%v[n(%d+%d)d(%d+%d)]\n", 523 // u.StringValues(), bench.StringValues(), hash.StringValues(), ser.StringValues(), 524 // cc, len(cc.Numerator.Values), len(cell.Values), len(cc.Denominator.Values), len(tr.baseline.Values)) 525 cc.Numerator = &Cell{ 526 Values: concat(cc.Numerator.Values, cell.Values), 527 Residues: union(cc.Numerator.Residues, cell.Residues), 528 } 529 cc.Denominator = &Cell{ 530 Values: concat(cc.Denominator.Values, tr.baseline.Values), 531 Residues: union(cc.Denominator.Residues, tr.baseline.Residues), 532 } 533 if cc.Date < dateString { 534 cc.Date = dateString 535 } 536 } 537 } 538 } 539 540 cs.Benchmarks = sortStringSet(benches) 541 cs.Series = sortStringSet(sers) 542 for _, b := range cs.Benchmarks { 543 for _, s := range cs.Series { 544 cc := cs.cells[SeriesKey{Benchmark: b, Series: s}] 545 if cc != nil && cc.Numerator != nil && cc.Denominator != nil { 546 sort.Float64s(cc.Numerator.Values) 547 sort.Float64s(cc.Denominator.Values) 548 } 549 } 550 } 551 552 // Accumulate residues for this unit's table 553 type seenKey struct { 554 f *benchproc.Field 555 s string 556 } 557 558 seen := make(map[seenKey]bool) 559 rmap := make(map[string][]string) 560 561 for _, c := range cs.cells { 562 for _, f := range b.residue.FlattenedFields() { 563 if c.Numerator == nil { 564 continue 565 } 566 for k, _ := range c.Numerator.Residues { 567 s := k.Get(f) 568 if !seen[seenKey{f, s}] { 569 seen[seenKey{f, s}] = true 570 rmap[f.Name] = append(rmap[f.Name], s) 571 } 572 } 573 for k, _ := range c.Denominator.Residues { 574 s := k.Get(f) 575 if !seen[seenKey{f, s}] { 576 seen[seenKey{f, s}] = true 577 rmap[f.Name] = append(rmap[f.Name], s) 578 } 579 } 580 } 581 } 582 583 sas := []StringAndSlice{} 584 for k, v := range rmap { 585 sort.Strings(v) 586 sas = append(sas, StringAndSlice{k, v}) 587 } 588 sort.Slice(sas, func(i, j int) bool { return sas[i].S < sas[j].S }) 589 590 if len(cs.Residues) > 0 { 591 // Need to merge old and new 592 osas, nsas := cs.Residues, []StringAndSlice{} 593 for i, j := 0, 0; i < len(sas) || j < len(osas); { 594 if i == len(sas) || j < len(osas) && osas[j].S < sas[i].S { 595 nsas = append(nsas, osas[j]) 596 j++ 597 continue 598 } 599 if j == len(osas) || osas[j].S > sas[i].S { 600 nsas = append(nsas, sas[i]) 601 i++ 602 continue 603 } 604 605 // S (keys) are equal, merge value slices 606 sl, osl, nsl := sas[i].Slice, osas[j].Slice, []string{} 607 for ii, jj := 0, 0; ii < len(sl) || jj < len(osl); { 608 if ii == len(sl) || jj < len(osl) && osl[jj] < sl[ii] { 609 nsl = append(nsl, osl[jj]) 610 jj++ 611 continue 612 } 613 if jj == len(osl) || osl[jj] > sl[ii] { 614 nsl = append(nsl, sl[ii]) 615 ii++ 616 continue 617 } 618 nsl = append(nsl, sl[ii]) 619 ii++ 620 jj++ 621 } 622 nsas = append(nsas, StringAndSlice{sas[i].S, nsl}) 623 i++ 624 j++ 625 } 626 sas = nsas 627 } 628 629 cs.Residues = sas 630 631 css = append(css, cs) 632 } 633 634 for _, cs := range existing { 635 if o := old[cs.Unit]; o != nil { 636 css = append(css, cs) 637 } 638 } 639 640 return css 641 } 642 643 func sortStringSet(m map[string]struct{}) []string { 644 var s []string 645 for k := range m { 646 s = append(s, k) 647 } 648 sort.Strings(s) 649 return s 650 } 651 652 func sortTableKeys(m map[unitTableKey]*table) []unitTableKey { 653 var s []unitTableKey 654 for k := range m { 655 s = append(s, k) 656 } 657 sort.Slice(s, func(i, j int) bool { 658 if s[i].unit != s[j].unit { 659 return s[i].unit.StringValues() < s[j].unit.StringValues() 660 } 661 if s[i].table == s[j].table { 662 return false 663 } 664 return s[i].table.StringValues() < s[j].table.StringValues() 665 666 }) 667 return s 668 } 669 670 func absSortedPermFor(a []float64) []int { 671 p := make([]int, len(a), len(a)) 672 for i := range p { 673 p[i] = i 674 } 675 sort.Slice(p, func(i, j int) bool { 676 return math.Abs(a[p[i]]) < math.Abs(a[p[j]]) 677 }) 678 return p 679 } 680 681 func permute(a []float64, p []int) []float64 { 682 b := make([]float64, len(a), len(a)) 683 for i, j := range p { 684 b[i] = a[j] 685 } 686 return b 687 } 688 689 // TODO Does this need to export the individual cells? What's the expected/intended use? 690 691 func (cs *ComparisonSeries) ComparisonAt(benchmark, series string) (*Comparison, bool) { 692 if cc := cs.cells[SeriesKey{Benchmark: benchmark, Series: series}]; cc != nil { 693 return cc, true 694 } 695 return nil, false 696 } 697 698 func (cs *ComparisonSeries) SummaryAt(benchmark, series string) (*ComparisonSummary, bool) { 699 if cc := cs.cells[SeriesKey{Benchmark: benchmark, Series: series}]; cc != nil { 700 return cc.Summary, true 701 } 702 return nil, false 703 } 704 705 func (c *Cell) resampleInto(r *rand.Rand, x []float64) { 706 l := len(x) 707 for i := range x { 708 x[i] = c.Values[r.Intn(l)] 709 } 710 sort.Float64s(x) 711 } 712 713 const rot = 23 714 715 func (c *Cell) hash() int64 { 716 var x int64 717 for _, v := range c.Values { 718 xlow := (x >> (64 - rot)) & (1<<rot - 1) 719 x = (x << rot) ^ xlow ^ int64(math.Float64bits(v)) 720 } 721 return x 722 } 723 724 // ratio computes a bootstrapped estimate of the confidence interval for 725 // the ratio of measurements in nu divided by measurements in de. 726 func ratio(nu, de *Cell, confidence float64, r *rand.Rand, ratios []float64) (center, low, high float64) { 727 N := len(ratios) 728 rnu := make([]float64, len(nu.Values), len(nu.Values)) 729 rde := make([]float64, len(de.Values), len(de.Values)) 730 for i := 0; i < N; i++ { 731 nu.resampleInto(r, rnu) 732 de.resampleInto(r, rde) 733 den := median(rde) 734 if den == 0 { 735 num := median(rnu) 736 if num >= 0 { 737 ratios[i] = (num + 1) 738 } else { 739 ratios[i] = (num - 1) 740 } 741 } else { 742 ratios[i] = median(rnu) / den 743 } 744 } 745 sort.Float64s(ratios) 746 p := (1 - confidence) / 2 747 low = percentile(ratios, p) 748 high = percentile(ratios, 1-p) 749 center = median(ratios) 750 return 751 } 752 753 func percentile(a []float64, p float64) float64 { 754 if len(a) == 0 { 755 return math.NaN() 756 } 757 if p == 0 { 758 return a[0] 759 } 760 n := len(a) 761 if p == 1 { 762 return a[n-1] 763 } 764 f := float64(float64(n) * p) // Suppress fused-multiply-add 765 i := int(f) 766 x := f - float64(i) 767 r := a[i] 768 if x > 0 && i+1 < len(a) { 769 r = float64(r*(1-x)) + float64(a[i+1]*x) // Suppress fused-multiply-add 770 } 771 return r 772 } 773 774 func median(a []float64) float64 { 775 l := len(a) 776 if l&1 == 1 { 777 return a[l/2] 778 } 779 return (a[l/2] + a[l/2-1]) / 2 780 } 781 782 func norm(a []float64, l float64) float64 { 783 if len(a) == 0 { 784 return math.NaN() 785 } 786 n := 0.0 787 sum := 0.0 788 for _, x := range a { 789 if math.IsInf(x, 0) || math.IsNaN(x) { 790 continue 791 } 792 sum += math.Pow(math.Abs(x), l) 793 n++ 794 } 795 return math.Pow(sum/n, 1/l) 796 } 797 798 // ChangeScore returns an indicator of the change and direction. 799 // This is a heuristic measure of the lack of overlap between 800 // two confidence intervals; minimum lack of overlap (i.e., same 801 // confidence intervals) is zero. Exact non-overlap, meaning 802 // the high end of one interval is equal to the low end of the 803 // other, is one. A gap of size G between the two intervals 804 // yields a score of 1 + G/M where M is the size of the smaller 805 // interval (this penalizes a ChangeScore in noise, which is also a 806 // ChangeScore). A partial overlap of size G yields a score of 807 // 1 - G/M. 808 // 809 // Empty confidence intervals are problematic and produces infinities 810 // or NaNs. 811 func ChangeScore(l1, c1, h1, l2, c2, h2 float64) float64 { 812 sign := 1.0 813 if c1 > c2 { 814 l1, c1, h1, l2, c2, h2 = l2, c2, h2, l1, c1, h1 815 sign = -sign 816 } 817 r := math.Min(h1-l1, h2-l2) 818 // we know l1 < c1 < h1, c1 < c2, l2 < c2 < h2 819 // therefore l1 < c1 < c2 < h2 820 if h1 > l2 { // overlap 821 if h1 > h2 { 822 h1 = h2 823 } 824 if l2 < l1 { 825 l2 = l1 826 } 827 return sign * (1 - (h1-l2)/r) // perfect overlap == 0 828 } else { // no overlap 829 return sign * (1 + (l2-h1)/r) // 830 } 831 } 832 833 type compareFn func(c *Comparison) (center, low, high float64) 834 835 func withBootstrap(confidence float64, N int) compareFn { 836 return func(c *Comparison) (center, low, high float64) { 837 c.ratios = make([]float64, N, N) 838 r := rand.New(rand.NewSource(c.Numerator.hash() * c.Denominator.hash())) 839 center, low, high = ratio(c.Numerator, c.Denominator, confidence, r, c.ratios) 840 return 841 } 842 } 843 844 // KSov returns the size-adjusted Kolmogorov-Smirnov statistic, 845 // equal to D_{n,m} / sqrt((n+m)/n*m). The result can be compared 846 // to c(α) where α is the level at which the null hypothesis is rejected. 847 // 848 // α: 0.2 0.15 0.10 0.05 0.025 0.01 0.005 0.001 849 // c(α): 1.073 1.138 1.224 1.358 1.48 1.628 1.731 1.949 850 // 851 // see 852 // https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test#Two-sample_Kolmogorov%E2%80%93Smirnov_test 853 func (a *ComparisonSummary) KSov(b *ComparisonSummary) float64 { 854 // TODO Kolmogorov-Smirnov hasn't worked that well 855 ra, rb := a.comparison.ratios, b.comparison.ratios 856 ia, ib := 0, 0 857 la, lb := len(ra), len(rb) 858 fla, flb := float64(la), float64(lb) 859 860 gap := 0.0 861 862 for ia < la && ib < lb { 863 if ra[ia] < rb[ib] { 864 ia++ 865 } else if ra[ia] > rb[ib] { 866 ib++ 867 } else { 868 ia++ 869 ib++ 870 } 871 g := math.Abs(float64(ia)/fla - float64(ib)/flb) 872 if g > gap { 873 gap = g 874 } 875 } 876 return gap * math.Sqrt(fla*flb/(fla+flb)) 877 } 878 879 // HeurOverlap computes a heuristic overlap between two confidence intervals 880 func (a *ComparisonSummary) HeurOverlap(b *ComparisonSummary, threshold float64) float64 { 881 if a.Low == a.High && b.Low == b.High { 882 ca, cb, sign := a.Center, b.Center, 100.0 883 if cb < ca { 884 ca, cb, sign = cb, ca, -100.0 885 } 886 if ca == 0 { 887 if cb > threshold { 888 return sign 889 } 890 } else if (cb-ca)/ca > threshold { 891 return sign 892 } 893 return 0 894 } 895 return ChangeScore(a.Low, a.Center, a.High, b.Low, b.Center, b.High) 896 } 897 898 // AddSumaries computes the summary data (bootstrapped estimated of the specified 899 // confidence interval) for the comparison series cs. The 3rd parameter N specifies 900 // the number of sampled bootstraps to use; 1000 is recommended, but 500 is good enough 901 // for testing. 902 func (cs *ComparisonSeries) AddSummaries(confidence float64, N int) { 903 fn := withBootstrap(confidence, N) 904 var tab [][]*ComparisonSummary 905 for _, s := range cs.Series { 906 row := []*ComparisonSummary{} 907 for _, b := range cs.Benchmarks { 908 if c, ok := cs.ComparisonAt(b, s); ok { 909 sum := c.Summary 910 if sum == nil || (!sum.Present && sum.comparison == nil) { 911 sum = &ComparisonSummary{comparison: c, Date: c.Date} 912 sum.Center, sum.Low, sum.High = fn(c) 913 sum.Present = true 914 c.Summary = sum 915 } 916 row = append(row, sum) 917 } else { 918 row = append(row, &ComparisonSummary{}) 919 } 920 } 921 tab = append(tab, row) 922 } 923 cs.Summaries = tab 924 }