github.com/jgbaldwinbrown/perf@v0.1.1/benchseries/benchseries.go

github.com/jgbaldwinbrown/perf@v0.1.1/benchseries/benchseries.go (about)

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package benchseries
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  	"math/rand"
    11  	"os"
    12  	"regexp"
    13  	"sort"
    14  	"time"
    15  
    16  	"golang.org/x/perf/benchfmt"
    17  	"golang.org/x/perf/benchproc"
    18  )
    19  
    20  // A Cell is the observations for part of a benchmark comparison.
    21  type Cell struct {
    22  	Values []float64 // Actual values observed for this cell (sorted).  Typically 1-100.
    23  
    24  	// Residues is the set of residue Keys mapped to this cell.
    25  	// It is used to check for non-unique keys.
    26  	Residues map[benchproc.Key]struct{}
    27  }
    28  
    29  // A Comparison is a pair of numerator and denominator measurements,
    30  // the date that they were collected (or the latest date if they were accumulated),
    31  // an optional slice of medians of ratios of bootstrapped estimates
    32  // and an optional summary node that contains the spreadsheet/json/database
    33  // summary of this same information.
    34  type Comparison struct {
    35  	Numerator, Denominator *Cell
    36  	Date                   string
    37  	ratios                 []float64 // these are from bootstrapping. Typically 1000ish.
    38  	Summary                *ComparisonSummary
    39  }
    40  
    41  // A ComparisonSummary is a summary of the comparison of a particular benchmark measurement
    42  // for two different versions of the toolchain.  Low, Center, and High are lower, middle and
    43  // upper estimates of the value, most likely 2.5%ile, 50%ile, and 97.5%ile from a bootstrap
    44  // of the original measurement ratios.  Date is the (latest) date at which the measurements
    45  // were taken.  Present indicates that Low/Center/High/Date are valid; if comparison is non-nil,
    46  // then there is a bootstrap that can be used or was used to initialize the other fields.
    47  // (otherwise the source was JSON or a database).
    48  type ComparisonSummary struct {
    49  	Low        float64     `json:"low"`
    50  	Center     float64     `json:"center"`
    51  	High       float64     `json:"high"`
    52  	Date       string      `json:"date"`
    53  	Present    bool        `json:"present"` // is this initialized?
    54  	comparison *Comparison // backlink for K-S computation, also indicates initialization of L/C/H
    55  }
    56  
    57  func (s *ComparisonSummary) Defined() bool {
    58  	return s != nil && s.Present
    59  }
    60  
    61  // ComparisonHashes contains the git hashes of the two tool chains being compared.
    62  type ComparisonHashes struct {
    63  	NumHash, DenHash string
    64  }
    65  
    66  type StringAndSlice struct {
    67  	S     string   `json:"s"`
    68  	Slice []string `json:"slice"`
    69  }
    70  
    71  // A ComparisonSeries describes a table/graph, indexed by paired elements of Benchmarks, Series.
    72  // Summaries contains the points in the graph.
    73  // HashPairs includes annotations for the Series axis.
    74  type ComparisonSeries struct {
    75  	Unit string `json:"unit"`
    76  
    77  	Benchmarks []string               `json:"benchmarks"`
    78  	Series     []string               `json:"series"`
    79  	Summaries  [][]*ComparisonSummary `json:"summaries"`
    80  
    81  	HashPairs map[string]ComparisonHashes `json:"hashpairs"` // maps a series point to the hashes compared at that point.
    82  
    83  	Residues []StringAndSlice `json:"residues"`
    84  
    85  	cells map[SeriesKey]*Comparison
    86  }
    87  
    88  // SeriesKey is a map key used to index a single cell in a ComparisonSeries.
    89  // ordering is by benchmark, then "series" (== commit) order
    90  type SeriesKey struct {
    91  	Benchmark, Series string
    92  }
    93  
    94  // tableKey is a map key used to index a single cell in a lower-t table.
    95  // ordering is by benchmark, then experiment order
    96  type tableKey struct {
    97  	Benchmark, Experiment benchproc.Key
    98  }
    99  
   100  type unitTableKey struct {
   101  	unit, table benchproc.Key
   102  }
   103  
   104  type table struct {
   105  	cells map[tableKey]*trial
   106  
   107  	benchmarks map[benchproc.Key]struct{}
   108  	exps       map[benchproc.Key]struct{}
   109  }
   110  
   111  type trial struct {
   112  	baseline           *Cell
   113  	baselineHash       benchproc.Key
   114  	baselineHashString string
   115  	tests              map[benchproc.Key]*Cell // map from test hash id to test information
   116  }
   117  
   118  // A Builder collects benchmark results into a set of tables, and transforms that into a slice of ComparisonSeries.
   119  type Builder struct {
   120  	// one table per unit; each table maps from (benchmark,experiment) to a single trial of baseline vs one or more tests
   121  	tables map[unitTableKey]*table
   122  
   123  	// numHashBy to numerator order.
   124  	hashToOrder map[benchproc.Key]benchproc.Key
   125  
   126  	filter *benchproc.Filter
   127  
   128  	unitBy, tableBy, pkgBy, experimentBy, benchBy, seriesBy, compareBy, numHashBy, denHashBy *benchproc.Projection
   129  	denCompareVal                                                                            string // the string value of compareBy that indicates the control/baseline in a comparison.
   130  	numCompareVal                                                                            string // the string value of compareBy that indicates the test in a comparison.
   131  	residue                                                                                  *benchproc.Projection
   132  
   133  	unitField *benchproc.Field
   134  
   135  	Residues map[benchproc.Key]struct{}
   136  
   137  	warn func(format string, args ...interface{})
   138  }
   139  
   140  type BuilderOptions struct {
   141  	Filter          string // how to filter benchmark results, as a benchproc option (e.g., ".unit:/.*/")
   142  	Series          string // the name of the benchmark key that contains the time of the last commit to the experiment branch (e.g. "numerator_stamp", "tip-commit-time")
   143  	Table           string // list of benchmark keys to group ComparisonSeries tables by, in addition to .unit (e.g., "goarch,goos", "" (none))
   144  	Experiment      string // the name of the benchmark key that contains the time at which the comparative benchmarks were run (e.g., "upload-time", "runstamp")
   145  	Compare         string // the name of the benchmark key that contains the id/role of the toolchain being compared (e.g., "toolchain", "role")
   146  	Numerator       string // the value of the Compare key that indicates the numerator in the ratios (i.e., "test", "tip", "experiment")
   147  	Denominator     string // the value of the Compare key that indicates the denominator in the ratios (i.e., "control", "base", "baseline")
   148  	NumeratorHash   string // the name of the benchmark key that contains the git hash of the numerator (test) toolchain
   149  	DenominatorHash string // the name of the benchmark key that contains the git hash of the denominator (control) toolchain
   150  	Ignore          string // list of benchmark keys to ignore entirely (e.g. "tip,base,bentstamp,suite")
   151  	Warn            func(format string, args ...interface{})
   152  }
   153  
   154  func BentBuilderOptions() *BuilderOptions {
   155  	return &BuilderOptions{
   156  		Filter:          ".unit:/.*/",
   157  		Series:          "numerator_stamp",
   158  		Table:           "goarch,goos,builder_id",
   159  		Experiment:      "runstamp",
   160  		Compare:         "toolchain",
   161  		Numerator:       "Tip",
   162  		Denominator:     "Base",
   163  		NumeratorHash:   "numerator_hash",
   164  		DenominatorHash: "denominator_hash",
   165  		Ignore:          "go,tip,base,bentstamp,suite,cpu,denominator_branch,.fullname,shortname",
   166  		Warn: func(format string, args ...interface{}) {
   167  			fmt.Fprintf(os.Stderr, format, args...)
   168  		},
   169  	}
   170  }
   171  
   172  func DefaultBuilderOptions() *BuilderOptions {
   173  	return &BuilderOptions{
   174  		Filter:          ".unit:/.*/",
   175  		Series:          "experiment-commit-time",
   176  		Table:           "", // .unit only
   177  		Experiment:      "runstamp",
   178  		Compare:         "toolchain",
   179  		Numerator:       "experiment",
   180  		Denominator:     "baseline",
   181  		NumeratorHash:   "experiment-commit",
   182  		DenominatorHash: "baseline-commit",
   183  		Ignore:          "go,tip,base,bentstamp,shortname,suite",
   184  		Warn: func(format string, args ...interface{}) {
   185  			fmt.Fprintf(os.Stderr, format, args...)
   186  		},
   187  	}
   188  }
   189  
   190  var noPuncDate = regexp.MustCompile("^[0-9]{8}T[0-9]{6}$")
   191  
   192  // RFC3339NanoNoZ has the property that formatted date&time.000000000 < date&time.000000001,
   193  // unlike RFC3339Nano where date&timeZ > date&timeZ.000000001Z
   194  // i.e., "Z" > "."" but "+" < "." so if ".000000000" is elided must use "+00:00"
   195  // to express the Z time zone to get the sort right.
   196  const RFC3339NanoNoZ = "2006-01-02T15:04:05.999999999-07:00"
   197  
   198  // NormalizeDateString converts dates in two formats used in bent/benchmarking
   199  // into UTC, so that all sort properly into a single order with no confusion.
   200  func NormalizeDateString(in string) string {
   201  	if noPuncDate.MatchString(in) {
   202  		//20211229T213212
   203  		//2021-12-29T21:32:12
   204  		in = in[0:4] + "-" + in[4:6] + "-" + in[6:11] + ":" + in[11:13] + ":" + in[13:15] + "+00:00"
   205  	}
   206  	t, err := time.Parse(time.RFC3339Nano, in)
   207  	if err == nil {
   208  		return t.UTC().Format(RFC3339NanoNoZ)
   209  	}
   210  	panic(err)
   211  }
   212  
   213  // ParseNormalizedDateString parses a time in the format returned by
   214  // NormalizeDateString.
   215  func ParseNormalizedDateString(in string) (time.Time, error) {
   216  	return time.Parse(RFC3339NanoNoZ, in)
   217  }
   218  
   219  // NewBuilder creates a new Builder for collecting benchmark results
   220  // into tables. Each result will be mapped to a Table by seriesBy.
   221  // Within each table, the results are mapped to cells by benchBy and
   222  // seriesBy. Any results within a single cell that vary by residue will
   223  // be reported as warnings.
   224  func NewBuilder(bo *BuilderOptions) (*Builder, error) {
   225  
   226  	filter, err := benchproc.NewFilter(bo.Filter)
   227  	if err != nil {
   228  		return nil, fmt.Errorf("parsing -filter: %s", err)
   229  	}
   230  
   231  	var parserErr error
   232  	var parser benchproc.ProjectionParser
   233  	mustParse := func(name, val string) *benchproc.Projection {
   234  		schema, err := parser.Parse(val, filter)
   235  		if err != nil {
   236  			parserErr = fmt.Errorf("parsing %s: %s", name, err)
   237  		}
   238  		return schema
   239  	}
   240  
   241  	unitBy, unitField, err := parser.ParseWithUnit("", nil)
   242  	if err != nil {
   243  		panic("Couldn't parse the unit schema")
   244  	}
   245  
   246  	tableBy, err := parser.Parse(bo.Table, nil)
   247  	if err != nil {
   248  		panic("Couldn't parse the table schema")
   249  	}
   250  
   251  	benchBy, err := parser.Parse(".fullname", nil)
   252  	if err != nil {
   253  		panic("Couldn't parse the .name schema")
   254  	}
   255  
   256  	pkgBy, err := parser.Parse("pkg", nil)
   257  	if err != nil {
   258  		panic("Couldn't parse 'pkg' schema")
   259  	}
   260  
   261  	seriesBy := mustParse("-series", bo.Series)
   262  	experimentBy := mustParse("-experiment", bo.Experiment)
   263  	compareBy := mustParse("-compare", bo.Compare)
   264  	numHashBy := mustParse("-numerator-hash", bo.NumeratorHash)
   265  	denHashBy := mustParse("-denominator-hash", bo.DenominatorHash)
   266  
   267  	mustParse("-ignore", bo.Ignore)
   268  
   269  	if parserErr != nil {
   270  		return nil, parserErr
   271  	}
   272  
   273  	residue := parser.Residue()
   274  
   275  	return &Builder{
   276  		filter:        filter,
   277  		unitBy:        unitBy,
   278  		tableBy:       tableBy,
   279  		pkgBy:         pkgBy,
   280  		experimentBy:  experimentBy,
   281  		benchBy:       benchBy,
   282  		seriesBy:      seriesBy,
   283  		compareBy:     compareBy,
   284  		numHashBy:     numHashBy,
   285  		denHashBy:     denHashBy,
   286  		denCompareVal: bo.Denominator,
   287  		numCompareVal: bo.Numerator,
   288  		residue:       residue,
   289  		unitField:     unitField,
   290  		hashToOrder:   make(map[benchproc.Key]benchproc.Key),
   291  		tables:        make(map[unitTableKey]*table),
   292  		Residues:      make(map[benchproc.Key]struct{}),
   293  		warn:          bo.Warn,
   294  	}, nil
   295  }
   296  
   297  func (b *Builder) AddFiles(files benchfmt.Files) error {
   298  	for files.Scan() {
   299  		rec := files.Result()
   300  		if err, ok := rec.(*benchfmt.SyntaxError); ok {
   301  			// Non-fatal result parse error. Warn
   302  			// but keep going.
   303  			b.warn("%v\n", err)
   304  			continue
   305  		}
   306  		res := rec.(*benchfmt.Result)
   307  
   308  		b.Add(res)
   309  	}
   310  	if err := files.Err(); err != nil {
   311  		return err
   312  	}
   313  	return nil
   314  }
   315  
   316  // Add adds all of the values in result to the tables in the Builder.
   317  func (b *Builder) Add(result *benchfmt.Result) {
   318  	if ok, _ := b.filter.Apply(result); !ok {
   319  		return
   320  	}
   321  
   322  	// Project the result.
   323  	unitCfgs := b.unitBy.ProjectValues(result)
   324  	tableCfg := b.tableBy.Project(result)
   325  
   326  	_ = b.pkgBy.Project(result) // for now we are dropping pkg on the floor
   327  
   328  	expCfg := b.experimentBy.Project(result)
   329  	benchCfg := b.benchBy.Project(result)
   330  	serCfg := b.seriesBy.Project(result)
   331  	cmpCfg := b.compareBy.Project(result)
   332  	numHashCfg := b.numHashBy.Project(result)
   333  	denHashCfg := b.denHashBy.Project(result)
   334  
   335  	// tableBy, experimentBy, benchBy, seriesBy, compareBy, numHashBy, denHashBy
   336  
   337  	residueCfg := b.residue.Project(result)
   338  	cellCfg := tableKey{Benchmark: benchCfg, Experiment: expCfg}
   339  
   340  	// Map to tables.
   341  	for unitI, unitCfg := range unitCfgs {
   342  		tuk := unitTableKey{unitCfg, tableCfg}
   343  		table := b.tables[tuk]
   344  		if table == nil {
   345  			table = b.newTable()
   346  			b.tables[tuk] = table
   347  		}
   348  
   349  		// Map to a trial.
   350  		t := table.cells[cellCfg]
   351  		if t == nil {
   352  			t = new(trial)
   353  			table.cells[cellCfg] = t
   354  			t.tests = make(map[benchproc.Key]*Cell)
   355  
   356  			table.exps[expCfg] = struct{}{}
   357  			table.benchmarks[benchCfg] = struct{}{}
   358  
   359  		}
   360  
   361  		var c *Cell
   362  		newCell := func() *Cell {
   363  			return &Cell{Residues: make(map[benchproc.Key]struct{})}
   364  		}
   365  		if cmpCfg.StringValues() == b.denCompareVal {
   366  			c = t.baseline
   367  			if c == nil {
   368  				c = newCell()
   369  				t.baseline = c
   370  				t.baselineHash = denHashCfg
   371  				t.baselineHashString = denHashCfg.StringValues()
   372  			}
   373  		} else {
   374  			c = t.tests[numHashCfg]
   375  			if c == nil {
   376  				c = newCell()
   377  				t.tests[numHashCfg] = c
   378  				b.hashToOrder[numHashCfg] = serCfg
   379  			}
   380  		}
   381  
   382  		// Add to the cell.
   383  		c.Values = append(c.Values, result.Values[unitI].Value)
   384  		c.Residues[residueCfg] = struct{}{}
   385  		b.Residues[residueCfg] = struct{}{}
   386  	}
   387  }
   388  
   389  func (b *Builder) newTable() *table {
   390  	return &table{
   391  		benchmarks: make(map[benchproc.Key]struct{}),
   392  		exps:       make(map[benchproc.Key]struct{}),
   393  		cells:      make(map[tableKey]*trial),
   394  	}
   395  }
   396  
   397  // union combines two sets of benchproc.Key into one.
   398  func union(a, b map[benchproc.Key]struct{}) map[benchproc.Key]struct{} {
   399  	if len(b) < len(a) {
   400  		a, b = b, a
   401  	}
   402  	for k := range a {
   403  		if _, ok := b[k]; !ok {
   404  			// a member of the not-larger set was not present in the larger set
   405  			c := make(map[benchproc.Key]struct{})
   406  			for k := range a {
   407  				c[k] = struct{}{}
   408  			}
   409  			for k := range b {
   410  				c[k] = struct{}{}
   411  			}
   412  			return c
   413  		}
   414  	}
   415  	return b
   416  }
   417  
   418  func concat(a, b []float64) []float64 {
   419  	return append(append([]float64{}, a...), b...)
   420  }
   421  
   422  const (
   423  	DUPE_REPLACE = iota
   424  	DUPE_COMBINE
   425  	// TODO DUPE_REPEAT
   426  )
   427  
   428  // AllComparisonSeries converts the accumulated "experiments" into a slice of series of comparisons,
   429  // with one slice element per goos-goarch-unit.  The experiments need not have occurred in any
   430  // sensible order; this deals with that, including overlaps (depend on flag, either replaces old with
   431  // younger or combines, REPLACE IS PREFERRED and works properly with combining old summary data with
   432  // fresh benchmarking data) and possibly also with previously processed summaries.
   433  func (b *Builder) AllComparisonSeries(existing []*ComparisonSeries, dupeHow int) []*ComparisonSeries {
   434  	old := make(map[string]*ComparisonSeries)
   435  	for _, cs := range existing {
   436  		old[cs.Unit] = cs
   437  	}
   438  	var css []*ComparisonSeries
   439  
   440  	// Iterate over units.
   441  	for _, u := range sortTableKeys(b.tables) {
   442  		t := b.tables[u]
   443  		uString := u.unit.StringValues()
   444  		if ts := u.table.StringValues(); ts != "" {
   445  			uString += " " + u.table.StringValues()
   446  		}
   447  		var cs *ComparisonSeries
   448  
   449  		sers := make(map[string]struct{})
   450  		benches := make(map[string]struct{})
   451  
   452  		if o := old[uString]; o != nil {
   453  			cs = o
   454  			delete(old, uString)
   455  
   456  			cs.cells = make(map[SeriesKey]*Comparison)
   457  			for i, s := range cs.Series {
   458  				for j, b := range cs.Benchmarks {
   459  					if cs.Summaries[i][j].Defined() {
   460  						sk := SeriesKey{
   461  							Benchmark: b,
   462  							Series:    s,
   463  						}
   464  						benches[b] = struct{}{}
   465  						sers[s] = struct{}{}
   466  						sum := cs.Summaries[i][j]
   467  						cc := &Comparison{Summary: sum, Date: sum.Date}
   468  						sum.comparison = cc
   469  						cs.cells[sk] = cc
   470  					}
   471  				}
   472  			}
   473  
   474  		} else {
   475  			cs = &ComparisonSeries{Unit: uString,
   476  				HashPairs: make(map[string]ComparisonHashes),
   477  				cells:     make(map[SeriesKey]*Comparison),
   478  			}
   479  		}
   480  
   481  		// TODO not handling overlapping samples between "existing" and "newly read" yet.
   482  
   483  		// Rearrange into paired comparisons, gathering repeats of same comparison from multiple experiments.
   484  		for tk, tr := range t.cells {
   485  			// tk == bench, experiment, tr == baseline, tests, tests == map hash -> cell.
   486  			bench := tk.Benchmark
   487  			dateString := NormalizeDateString(tk.Experiment.StringValues())
   488  			benchString := bench.StringValues()
   489  			benches[benchString] = struct{}{}
   490  			for hash, cell := range tr.tests {
   491  				hashString := hash.StringValues()
   492  				ser := b.hashToOrder[hash]
   493  				serString := NormalizeDateString(ser.StringValues())
   494  				sers[serString] = struct{}{}
   495  				sk := SeriesKey{
   496  					Benchmark: benchString,
   497  					Series:    serString,
   498  				}
   499  				cc := cs.cells[sk]
   500  				if cc == nil || dupeHow == DUPE_REPLACE {
   501  					if cc == nil || cc.Date < dateString {
   502  						cc = &Comparison{
   503  							Numerator:   cell,
   504  							Denominator: tr.baseline,
   505  							Date:        dateString,
   506  						}
   507  						cs.cells[sk] = cc
   508  					}
   509  
   510  					hp, ok := cs.HashPairs[serString]
   511  					if !ok {
   512  						cs.HashPairs[serString] = ComparisonHashes{NumHash: hashString, DenHash: tr.baselineHashString}
   513  					} else {
   514  						if hp.NumHash != hashString || hp.DenHash != tr.baselineHashString {
   515  							fmt.Fprintf(os.Stderr, "numerator/denominator mismatch, expected %s/%s got %s/%s\n",
   516  								hp.NumHash, hp.DenHash, hashString, tr.baselineHashString)
   517  						}
   518  					}
   519  
   520  				} else { // Current augments, but this will do the wrong thing if one is an old summary; also need to think about "repeat"
   521  					// augment an existing measurement (i.e., a second experiment on this same datapoint)
   522  					// fmt.Printf("Augment u:%s,b:%s,ch:%s,cd:%s; cc=%v[n(%d+%d)d(%d+%d)]\n",
   523  					// 	u.StringValues(), bench.StringValues(), hash.StringValues(), ser.StringValues(),
   524  					// 	cc, len(cc.Numerator.Values), len(cell.Values), len(cc.Denominator.Values), len(tr.baseline.Values))
   525  					cc.Numerator = &Cell{
   526  						Values:   concat(cc.Numerator.Values, cell.Values),
   527  						Residues: union(cc.Numerator.Residues, cell.Residues),
   528  					}
   529  					cc.Denominator = &Cell{
   530  						Values:   concat(cc.Denominator.Values, tr.baseline.Values),
   531  						Residues: union(cc.Denominator.Residues, tr.baseline.Residues),
   532  					}
   533  					if cc.Date < dateString {
   534  						cc.Date = dateString
   535  					}
   536  				}
   537  			}
   538  		}
   539  
   540  		cs.Benchmarks = sortStringSet(benches)
   541  		cs.Series = sortStringSet(sers)
   542  		for _, b := range cs.Benchmarks {
   543  			for _, s := range cs.Series {
   544  				cc := cs.cells[SeriesKey{Benchmark: b, Series: s}]
   545  				if cc != nil && cc.Numerator != nil && cc.Denominator != nil {
   546  					sort.Float64s(cc.Numerator.Values)
   547  					sort.Float64s(cc.Denominator.Values)
   548  				}
   549  			}
   550  		}
   551  
   552  		// Accumulate residues for this unit's table
   553  		type seenKey struct {
   554  			f *benchproc.Field
   555  			s string
   556  		}
   557  
   558  		seen := make(map[seenKey]bool)
   559  		rmap := make(map[string][]string)
   560  
   561  		for _, c := range cs.cells {
   562  			for _, f := range b.residue.FlattenedFields() {
   563  				if c.Numerator == nil {
   564  					continue
   565  				}
   566  				for k, _ := range c.Numerator.Residues {
   567  					s := k.Get(f)
   568  					if !seen[seenKey{f, s}] {
   569  						seen[seenKey{f, s}] = true
   570  						rmap[f.Name] = append(rmap[f.Name], s)
   571  					}
   572  				}
   573  				for k, _ := range c.Denominator.Residues {
   574  					s := k.Get(f)
   575  					if !seen[seenKey{f, s}] {
   576  						seen[seenKey{f, s}] = true
   577  						rmap[f.Name] = append(rmap[f.Name], s)
   578  					}
   579  				}
   580  			}
   581  		}
   582  
   583  		sas := []StringAndSlice{}
   584  		for k, v := range rmap {
   585  			sort.Strings(v)
   586  			sas = append(sas, StringAndSlice{k, v})
   587  		}
   588  		sort.Slice(sas, func(i, j int) bool { return sas[i].S < sas[j].S })
   589  
   590  		if len(cs.Residues) > 0 {
   591  			// Need to merge old and new
   592  			osas, nsas := cs.Residues, []StringAndSlice{}
   593  			for i, j := 0, 0; i < len(sas) || j < len(osas); {
   594  				if i == len(sas) || j < len(osas) && osas[j].S < sas[i].S {
   595  					nsas = append(nsas, osas[j])
   596  					j++
   597  					continue
   598  				}
   599  				if j == len(osas) || osas[j].S > sas[i].S {
   600  					nsas = append(nsas, sas[i])
   601  					i++
   602  					continue
   603  				}
   604  
   605  				// S (keys) are equal, merge value slices
   606  				sl, osl, nsl := sas[i].Slice, osas[j].Slice, []string{}
   607  				for ii, jj := 0, 0; ii < len(sl) || jj < len(osl); {
   608  					if ii == len(sl) || jj < len(osl) && osl[jj] < sl[ii] {
   609  						nsl = append(nsl, osl[jj])
   610  						jj++
   611  						continue
   612  					}
   613  					if jj == len(osl) || osl[jj] > sl[ii] {
   614  						nsl = append(nsl, sl[ii])
   615  						ii++
   616  						continue
   617  					}
   618  					nsl = append(nsl, sl[ii])
   619  					ii++
   620  					jj++
   621  				}
   622  				nsas = append(nsas, StringAndSlice{sas[i].S, nsl})
   623  				i++
   624  				j++
   625  			}
   626  			sas = nsas
   627  		}
   628  
   629  		cs.Residues = sas
   630  
   631  		css = append(css, cs)
   632  	}
   633  
   634  	for _, cs := range existing {
   635  		if o := old[cs.Unit]; o != nil {
   636  			css = append(css, cs)
   637  		}
   638  	}
   639  
   640  	return css
   641  }
   642  
   643  func sortStringSet(m map[string]struct{}) []string {
   644  	var s []string
   645  	for k := range m {
   646  		s = append(s, k)
   647  	}
   648  	sort.Strings(s)
   649  	return s
   650  }
   651  
   652  func sortTableKeys(m map[unitTableKey]*table) []unitTableKey {
   653  	var s []unitTableKey
   654  	for k := range m {
   655  		s = append(s, k)
   656  	}
   657  	sort.Slice(s, func(i, j int) bool {
   658  		if s[i].unit != s[j].unit {
   659  			return s[i].unit.StringValues() < s[j].unit.StringValues()
   660  		}
   661  		if s[i].table == s[j].table {
   662  			return false
   663  		}
   664  		return s[i].table.StringValues() < s[j].table.StringValues()
   665  
   666  	})
   667  	return s
   668  }
   669  
   670  func absSortedPermFor(a []float64) []int {
   671  	p := make([]int, len(a), len(a))
   672  	for i := range p {
   673  		p[i] = i
   674  	}
   675  	sort.Slice(p, func(i, j int) bool {
   676  		return math.Abs(a[p[i]]) < math.Abs(a[p[j]])
   677  	})
   678  	return p
   679  }
   680  
   681  func permute(a []float64, p []int) []float64 {
   682  	b := make([]float64, len(a), len(a))
   683  	for i, j := range p {
   684  		b[i] = a[j]
   685  	}
   686  	return b
   687  }
   688  
   689  // TODO Does this need to export the individual cells? What's the expected/intended use?
   690  
   691  func (cs *ComparisonSeries) ComparisonAt(benchmark, series string) (*Comparison, bool) {
   692  	if cc := cs.cells[SeriesKey{Benchmark: benchmark, Series: series}]; cc != nil {
   693  		return cc, true
   694  	}
   695  	return nil, false
   696  }
   697  
   698  func (cs *ComparisonSeries) SummaryAt(benchmark, series string) (*ComparisonSummary, bool) {
   699  	if cc := cs.cells[SeriesKey{Benchmark: benchmark, Series: series}]; cc != nil {
   700  		return cc.Summary, true
   701  	}
   702  	return nil, false
   703  }
   704  
   705  func (c *Cell) resampleInto(r *rand.Rand, x []float64) {
   706  	l := len(x)
   707  	for i := range x {
   708  		x[i] = c.Values[r.Intn(l)]
   709  	}
   710  	sort.Float64s(x)
   711  }
   712  
   713  const rot = 23
   714  
   715  func (c *Cell) hash() int64 {
   716  	var x int64
   717  	for _, v := range c.Values {
   718  		xlow := (x >> (64 - rot)) & (1<<rot - 1)
   719  		x = (x << rot) ^ xlow ^ int64(math.Float64bits(v))
   720  	}
   721  	return x
   722  }
   723  
   724  // ratio computes a bootstrapped estimate of the confidence interval for
   725  // the ratio of measurements in nu divided by measurements in de.
   726  func ratio(nu, de *Cell, confidence float64, r *rand.Rand, ratios []float64) (center, low, high float64) {
   727  	N := len(ratios)
   728  	rnu := make([]float64, len(nu.Values), len(nu.Values))
   729  	rde := make([]float64, len(de.Values), len(de.Values))
   730  	for i := 0; i < N; i++ {
   731  		nu.resampleInto(r, rnu)
   732  		de.resampleInto(r, rde)
   733  		den := median(rde)
   734  		if den == 0 {
   735  			num := median(rnu)
   736  			if num >= 0 {
   737  				ratios[i] = (num + 1)
   738  			} else {
   739  				ratios[i] = (num - 1)
   740  			}
   741  		} else {
   742  			ratios[i] = median(rnu) / den
   743  		}
   744  	}
   745  	sort.Float64s(ratios)
   746  	p := (1 - confidence) / 2
   747  	low = percentile(ratios, p)
   748  	high = percentile(ratios, 1-p)
   749  	center = median(ratios)
   750  	return
   751  }
   752  
   753  func percentile(a []float64, p float64) float64 {
   754  	if len(a) == 0 {
   755  		return math.NaN()
   756  	}
   757  	if p == 0 {
   758  		return a[0]
   759  	}
   760  	n := len(a)
   761  	if p == 1 {
   762  		return a[n-1]
   763  	}
   764  	f := float64(float64(n) * p) // Suppress fused-multiply-add
   765  	i := int(f)
   766  	x := f - float64(i)
   767  	r := a[i]
   768  	if x > 0 && i+1 < len(a) {
   769  		r = float64(r*(1-x)) + float64(a[i+1]*x) // Suppress fused-multiply-add
   770  	}
   771  	return r
   772  }
   773  
   774  func median(a []float64) float64 {
   775  	l := len(a)
   776  	if l&1 == 1 {
   777  		return a[l/2]
   778  	}
   779  	return (a[l/2] + a[l/2-1]) / 2
   780  }
   781  
   782  func norm(a []float64, l float64) float64 {
   783  	if len(a) == 0 {
   784  		return math.NaN()
   785  	}
   786  	n := 0.0
   787  	sum := 0.0
   788  	for _, x := range a {
   789  		if math.IsInf(x, 0) || math.IsNaN(x) {
   790  			continue
   791  		}
   792  		sum += math.Pow(math.Abs(x), l)
   793  		n++
   794  	}
   795  	return math.Pow(sum/n, 1/l)
   796  }
   797  
   798  // ChangeScore returns an indicator of the change and direction.
   799  // This is a heuristic measure of the lack of overlap between
   800  // two confidence intervals; minimum lack of overlap (i.e., same
   801  // confidence intervals) is zero.  Exact non-overlap, meaning
   802  // the high end of one interval is equal to the low end of the
   803  // other, is one.  A gap of size G between the two intervals
   804  // yields a score of 1 + G/M where M is the size of the smaller
   805  // interval (this penalizes a ChangeScore in noise, which is also a
   806  // ChangeScore). A partial overlap of size G yields a score of
   807  // 1 - G/M.
   808  //
   809  // Empty confidence intervals are problematic and produces infinities
   810  // or NaNs.
   811  func ChangeScore(l1, c1, h1, l2, c2, h2 float64) float64 {
   812  	sign := 1.0
   813  	if c1 > c2 {
   814  		l1, c1, h1, l2, c2, h2 = l2, c2, h2, l1, c1, h1
   815  		sign = -sign
   816  	}
   817  	r := math.Min(h1-l1, h2-l2)
   818  	// we know l1 < c1 < h1, c1 < c2, l2 < c2 < h2
   819  	// therefore l1 < c1 < c2 < h2
   820  	if h1 > l2 { // overlap
   821  		if h1 > h2 {
   822  			h1 = h2
   823  		}
   824  		if l2 < l1 {
   825  			l2 = l1
   826  		}
   827  		return sign * (1 - (h1-l2)/r) // perfect overlap == 0
   828  	} else { // no overlap
   829  		return sign * (1 + (l2-h1)/r) //
   830  	}
   831  }
   832  
   833  type compareFn func(c *Comparison) (center, low, high float64)
   834  
   835  func withBootstrap(confidence float64, N int) compareFn {
   836  	return func(c *Comparison) (center, low, high float64) {
   837  		c.ratios = make([]float64, N, N)
   838  		r := rand.New(rand.NewSource(c.Numerator.hash() * c.Denominator.hash()))
   839  		center, low, high = ratio(c.Numerator, c.Denominator, confidence, r, c.ratios)
   840  		return
   841  	}
   842  }
   843  
   844  // KSov returns the size-adjusted Kolmogorov-Smirnov statistic,
   845  // equal to D_{n,m} / sqrt((n+m)/n*m).  The result can be compared
   846  // to c(α) where α is the level at which the null hypothesis is rejected.
   847  //
   848  //	   α:  0.2   0.15  0.10  0.05  0.025 0.01  0.005 0.001
   849  //	c(α):  1.073 1.138 1.224 1.358 1.48  1.628 1.731 1.949
   850  //
   851  // see
   852  // https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test#Two-sample_Kolmogorov%E2%80%93Smirnov_test
   853  func (a *ComparisonSummary) KSov(b *ComparisonSummary) float64 {
   854  	// TODO Kolmogorov-Smirnov hasn't worked that well
   855  	ra, rb := a.comparison.ratios, b.comparison.ratios
   856  	ia, ib := 0, 0
   857  	la, lb := len(ra), len(rb)
   858  	fla, flb := float64(la), float64(lb)
   859  
   860  	gap := 0.0
   861  
   862  	for ia < la && ib < lb {
   863  		if ra[ia] < rb[ib] {
   864  			ia++
   865  		} else if ra[ia] > rb[ib] {
   866  			ib++
   867  		} else {
   868  			ia++
   869  			ib++
   870  		}
   871  		g := math.Abs(float64(ia)/fla - float64(ib)/flb)
   872  		if g > gap {
   873  			gap = g
   874  		}
   875  	}
   876  	return gap * math.Sqrt(fla*flb/(fla+flb))
   877  }
   878  
   879  // HeurOverlap computes a heuristic overlap between two confidence intervals
   880  func (a *ComparisonSummary) HeurOverlap(b *ComparisonSummary, threshold float64) float64 {
   881  	if a.Low == a.High && b.Low == b.High {
   882  		ca, cb, sign := a.Center, b.Center, 100.0
   883  		if cb < ca {
   884  			ca, cb, sign = cb, ca, -100.0
   885  		}
   886  		if ca == 0 {
   887  			if cb > threshold {
   888  				return sign
   889  			}
   890  		} else if (cb-ca)/ca > threshold {
   891  			return sign
   892  		}
   893  		return 0
   894  	}
   895  	return ChangeScore(a.Low, a.Center, a.High, b.Low, b.Center, b.High)
   896  }
   897  
   898  // AddSumaries computes the summary data (bootstrapped estimated of the specified
   899  // confidence interval) for the comparison series cs.  The 3rd parameter N specifies
   900  // the number of sampled bootstraps to use; 1000 is recommended, but 500 is good enough
   901  // for testing.
   902  func (cs *ComparisonSeries) AddSummaries(confidence float64, N int) {
   903  	fn := withBootstrap(confidence, N)
   904  	var tab [][]*ComparisonSummary
   905  	for _, s := range cs.Series {
   906  		row := []*ComparisonSummary{}
   907  		for _, b := range cs.Benchmarks {
   908  			if c, ok := cs.ComparisonAt(b, s); ok {
   909  				sum := c.Summary
   910  				if sum == nil || (!sum.Present && sum.comparison == nil) {
   911  					sum = &ComparisonSummary{comparison: c, Date: c.Date}
   912  					sum.Center, sum.Low, sum.High = fn(c)
   913  					sum.Present = true
   914  					c.Summary = sum
   915  				}
   916  				row = append(row, sum)
   917  			} else {
   918  				row = append(row, &ComparisonSummary{})
   919  			}
   920  		}
   921  		tab = append(tab, row)
   922  	}
   923  	cs.Summaries = tab
   924  }