github.com/jgbaldwinbrown/perf@v0.1.1/analysis/app/trend.go (about)

     1  // Copyright 2017 The Go Authors.  All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Loosely based on github.com/aclements/go-misc/benchplot
     6  
     7  package app
     8  
     9  import (
    10  	"bytes"
    11  	"encoding/json"
    12  	"fmt"
    13  	"html/template"
    14  	"math"
    15  	"net/http"
    16  	"os"
    17  	"path/filepath"
    18  	"sort"
    19  	"strconv"
    20  	"strings"
    21  
    22  	"github.com/aclements/go-gg/generic/slice"
    23  	"github.com/aclements/go-gg/ggstat"
    24  	"github.com/aclements/go-gg/table"
    25  	"golang.org/x/net/context"
    26  	"golang.org/x/perf/storage"
    27  )
    28  
    29  // trend handles /trend.
    30  // With no query, it prints the list of recent uploads containing a "trend" key.
    31  // With a query, it shows a graph of the matching benchmark results.
    32  func (a *App) trend(w http.ResponseWriter, r *http.Request) {
    33  	ctx := requestContext(r)
    34  
    35  	if err := r.ParseForm(); err != nil {
    36  		http.Error(w, err.Error(), 500)
    37  		return
    38  	}
    39  
    40  	q := r.Form.Get("q")
    41  
    42  	tmpl, err := os.ReadFile(filepath.Join(a.BaseDir, "template/trend.html"))
    43  	if err != nil {
    44  		http.Error(w, err.Error(), 500)
    45  		return
    46  	}
    47  
    48  	t, err := template.New("main").Parse(string(tmpl))
    49  	if err != nil {
    50  		http.Error(w, err.Error(), 500)
    51  		return
    52  	}
    53  
    54  	opt := plotOptions{
    55  		x:   r.Form.Get("x"),
    56  		raw: r.Form.Get("raw") == "1",
    57  	}
    58  
    59  	data := a.trendQuery(ctx, q, opt)
    60  
    61  	w.Header().Set("Content-Type", "text/html; charset=utf-8")
    62  	if err := t.Execute(w, data); err != nil {
    63  		http.Error(w, err.Error(), 500)
    64  		return
    65  	}
    66  }
    67  
    68  // trendData is the struct passed to the trend.html template.
    69  type trendData struct {
    70  	Q            string
    71  	Error        string
    72  	TrendUploads []storage.UploadInfo
    73  	PlotData     template.JS
    74  	PlotType     template.JS
    75  }
    76  
    77  // trendData computes the values for the template and returns a trendData for display.
    78  func (a *App) trendQuery(ctx context.Context, q string, opt plotOptions) *trendData {
    79  	d := &trendData{Q: q}
    80  	if q == "" {
    81  		ul := a.StorageClient.ListUploads(ctx, `trend>`, []string{"by", "upload-time", "trend"}, 16)
    82  		defer ul.Close()
    83  		for ul.Next() {
    84  			d.TrendUploads = append(d.TrendUploads, ul.Info())
    85  		}
    86  		if err := ul.Err(); err != nil {
    87  			errorf(ctx, "failed to fetch recent trend uploads: %v", err)
    88  		}
    89  		return d
    90  	}
    91  
    92  	// TODO(quentin): Chunk query based on matching upload IDs.
    93  	res := a.StorageClient.Query(ctx, q)
    94  	defer res.Close()
    95  	t, resultCols := queryToTable(res)
    96  	if err := res.Err(); err != nil {
    97  		errorf(ctx, "failed to read query results: %v", err)
    98  		d.Error = fmt.Sprintf("failed to read query results: %v", err)
    99  		return d
   100  	}
   101  	for _, col := range []string{"commit", "commit-time", "branch", "name"} {
   102  		if !hasStringColumn(t, col) {
   103  			d.Error = fmt.Sprintf("results missing %q label", col)
   104  			return d
   105  		}
   106  	}
   107  	if opt.x != "" && !hasStringColumn(t, opt.x) {
   108  		d.Error = fmt.Sprintf("results missing x label %q", opt.x)
   109  		return d
   110  	}
   111  	data := plot(t, resultCols, opt)
   112  
   113  	// TODO(quentin): Give the user control over across vs. plotting in separate graphs, instead of only showing one graph with ns/op for each benchmark.
   114  
   115  	if opt.raw {
   116  		data = table.MapTables(data, func(_ table.GroupID, t *table.Table) *table.Table {
   117  			// From http://tristen.ca/hcl-picker/#/hlc/9/1.13/F1796F/B3EC6C
   118  			colors := []string{"#F1796F", "#B3EC6C", "#F67E9D", "#6CEB98", "#E392CB", "#0AE4C6", "#B7ABEC", "#16D7E9", "#75C4F7"}
   119  			colorIdx := 0
   120  			partColors := make(map[string]string)
   121  			styles := make([]string, t.Len())
   122  			for i, part := range t.MustColumn("upload-part").([]string) {
   123  				if _, ok := partColors[part]; !ok {
   124  					partColors[part] = colors[colorIdx]
   125  					colorIdx++
   126  					if colorIdx >= len(colors) {
   127  						colorIdx = 0
   128  					}
   129  				}
   130  				styles[i] = "color: " + partColors[part]
   131  			}
   132  			return table.NewBuilder(t).Add("style", styles).Done()
   133  		})
   134  		columns := []column{
   135  			{Name: "commit-index"},
   136  			{Name: "result"},
   137  			{Name: "style", Role: "style"},
   138  			{Name: "commit", Role: "tooltip"},
   139  		}
   140  		d.PlotData = tableToJS(data.Table(data.Tables()[0]), columns)
   141  		d.PlotType = "ScatterChart"
   142  		return d
   143  	}
   144  
   145  	// Pivot all of the benchmarks into columns of a single table.
   146  	ar := &aggResults{
   147  		Across: "name",
   148  		Values: []string{"filtered normalized mean result", "normalized mean result", "normalized median result", "normalized min result", "normalized max result"},
   149  	}
   150  	data = ggstat.Agg("commit", "branch", "commit-index")(ar.agg).F(data)
   151  
   152  	tables := data.Tables()
   153  	infof(ctx, "tables: %v", tables)
   154  	columns := []column{
   155  		{Name: "commit-index"},
   156  		{Name: "commit", Role: "tooltip"},
   157  	}
   158  	for _, prefix := range ar.Prefixes {
   159  		if len(ar.Prefixes) == 1 {
   160  			columns = append(columns,
   161  				column{Name: prefix + "/normalized mean result"},
   162  				column{Name: prefix + "/normalized min result", Role: "interval"},
   163  				column{Name: prefix + "/normalized max result", Role: "interval"},
   164  				column{Name: prefix + "/normalized median result"},
   165  			)
   166  		}
   167  		columns = append(columns,
   168  			column{Name: prefix + "/filtered normalized mean result"},
   169  		)
   170  	}
   171  	d.PlotData = tableToJS(data.Table(tables[0]), columns)
   172  	d.PlotType = "LineChart"
   173  	return d
   174  }
   175  
   176  // queryToTable converts the result of a Query into a Table for later processing.
   177  // Each label is placed in a column named after the key.
   178  // Each metric is placed in a separate result column named after the unit.
   179  func queryToTable(q *storage.Query) (t *table.Table, resultCols []string) {
   180  	var names []string
   181  	labels := make(map[string][]string)
   182  	results := make(map[string][]float64)
   183  	i := 0
   184  	for q.Next() {
   185  		res := q.Result()
   186  		// TODO(quentin): Handle multiple results with the same name but different NameLabels.
   187  		names = append(names, res.NameLabels["name"])
   188  		for k := range res.Labels {
   189  			if labels[k] == nil {
   190  				labels[k] = make([]string, i)
   191  			}
   192  		}
   193  		for k := range labels {
   194  			labels[k] = append(labels[k], res.Labels[k])
   195  		}
   196  		f := strings.Fields(res.Content)
   197  		metrics := make(map[string]float64)
   198  		for j := 2; j+2 <= len(f); j += 2 {
   199  			val, err := strconv.ParseFloat(f[j], 64)
   200  			if err != nil {
   201  				continue
   202  			}
   203  			unit := f[j+1]
   204  			if results[unit] == nil {
   205  				results[unit] = make([]float64, i)
   206  			}
   207  			metrics[unit] = val
   208  		}
   209  		for k := range results {
   210  			results[k] = append(results[k], metrics[k])
   211  		}
   212  		i++
   213  	}
   214  
   215  	tab := new(table.Builder).Add("name", names)
   216  
   217  	for k, v := range labels {
   218  		tab.Add(k, v)
   219  	}
   220  	for k, v := range results {
   221  		tab.Add(k, v)
   222  		resultCols = append(resultCols, k)
   223  	}
   224  
   225  	sort.Strings(resultCols)
   226  
   227  	return tab.Done(), resultCols
   228  }
   229  
   230  type plotOptions struct {
   231  	// x names the column to use for the X axis.
   232  	// If unspecified, "commit" is used.
   233  	x string
   234  	// raw will return the raw points without any averaging/smoothing.
   235  	// The only result column will be "result".
   236  	raw bool
   237  	// correlate will use the string column "upload-part" as an indication that results came from the same machine. Commits present in multiple parts will be used to correlate results.
   238  	correlate bool
   239  }
   240  
   241  // plot takes raw benchmark data in t and produces a Grouping object containing filtered, normalized metric results for a graph.
   242  // t must contain the string columns "commit", "commit-time", "branch". resultCols specifies the names of float64 columns containing metric results.
   243  // The returned grouping has columns "commit", "commit-time", "commit-index", "branch", "metric", "normalized min result", "normalized max result", "normalized mean result", "filtered normalized mean result".
   244  // This is roughly the algorithm from github.com/aclements/go-misc/benchplot
   245  func plot(t table.Grouping, resultCols []string, opt plotOptions) table.Grouping {
   246  	nrows := len(table.GroupBy(t, "name").Tables())
   247  
   248  	// Turn ordered commit-time into a "commit-index" column.
   249  	if opt.x == "" {
   250  		opt.x = "commit"
   251  	}
   252  	// TODO(quentin): One SortBy call should do this, but
   253  	// sometimes it seems to sort by the second column instead of
   254  	// the first. Do them in separate steps until SortBy is fixed.
   255  	t = table.SortBy(t, opt.x)
   256  	t = table.SortBy(t, "commit-time")
   257  	t = colIndex{col: opt.x}.F(t)
   258  
   259  	// Unpivot all of the metrics into one column.
   260  	t = table.Unpivot(t, "metric", "result", resultCols...)
   261  
   262  	// TODO(quentin): Let user choose which metric(s) to keep.
   263  	t = table.FilterEq(t, "metric", "ns/op")
   264  
   265  	if opt.raw {
   266  		return t
   267  	}
   268  
   269  	// Average each result at each commit (but keep columns names
   270  	// the same to keep things easier to read).
   271  	t = ggstat.Agg("commit", "name", "metric", "branch", "commit-index")(ggstat.AggMean("result"), ggstat.AggQuantile("median", .5, "result"), ggstat.AggMin("result"), ggstat.AggMax("result")).F(t)
   272  	y := "mean result"
   273  
   274  	// Normalize to earliest commit on master. It's important to
   275  	// do this before the geomean if there are commits missing.
   276  	// Unfortunately, that also means we have to *temporarily*
   277  	// group by name and metric, since the geomean needs to be
   278  	// done on a different grouping.
   279  	t = table.GroupBy(t, "name", "metric")
   280  	t = ggstat.Normalize{X: "branch", By: firstMasterIndex, Cols: []string{"mean result", "median result", "max result", "min result"}, DenomCols: []string{"mean result", "mean result", "mean result", "mean result"}}.F(t)
   281  	y = "normalized " + y
   282  	for _, col := range []string{"mean result", "median result", "max result", "min result"} {
   283  		t = table.Remove(t, col)
   284  	}
   285  	t = table.Ungroup(table.Ungroup(t))
   286  
   287  	// Compute geomean for each metric at each commit if there's
   288  	// more than one benchmark.
   289  	if len(table.GroupBy(t, "name").Tables()) > 1 {
   290  		gt := removeNaNs(t, y)
   291  		gt = ggstat.Agg("commit", "metric", "branch", "commit-index")(ggstat.AggGeoMean(y, "normalized median result"), ggstat.AggMin("normalized min result"), ggstat.AggMax("normalized max result")).F(gt)
   292  		gt = table.MapTables(gt, func(_ table.GroupID, t *table.Table) *table.Table {
   293  			return table.NewBuilder(t).AddConst("name", " geomean").Done()
   294  		})
   295  		gt = table.Rename(gt, "geomean "+y, y)
   296  		gt = table.Rename(gt, "geomean normalized median result", "normalized median result")
   297  		gt = table.Rename(gt, "min normalized min result", "normalized min result")
   298  		gt = table.Rename(gt, "max normalized max result", "normalized max result")
   299  		t = table.Concat(t, gt)
   300  		nrows++
   301  	}
   302  
   303  	// Filter the data to reduce noise.
   304  	t = table.GroupBy(t, "name", "metric")
   305  	t = kza{y, 15, 3}.F(t)
   306  	y = "filtered " + y
   307  	t = table.Ungroup(table.Ungroup(t))
   308  
   309  	return t
   310  }
   311  
   312  // hasStringColumn returns whether t has a []string column called col.
   313  func hasStringColumn(t table.Grouping, col string) bool {
   314  	c := t.Table(t.Tables()[0]).Column(col)
   315  	if c == nil {
   316  		return false
   317  	}
   318  	_, ok := c.([]string)
   319  	return ok
   320  }
   321  
   322  // aggResults pivots the table, taking the columns in Values and making a new column for each distinct value in Across.
   323  // aggResults("in", []string{"value1", "value2"} will reshape a table like
   324  //
   325  //	in		value1	value2
   326  //	one	1	2
   327  //	two	3	4
   328  //
   329  // and will turn in into a table like
   330  //
   331  //	one/value1	one/value2	two/value1	two/value2
   332  //	1		2		3		4
   333  //
   334  // across columns must be []string, and value columns must be []float64.
   335  type aggResults struct {
   336  	// Across is the name of the column whose values are the column prefix.
   337  	Across string
   338  	// Values is the name of the columns to split.
   339  	Values []string
   340  	// Prefixes is filled in after calling agg with the name of each prefix that was found.
   341  	Prefixes []string
   342  }
   343  
   344  // agg implements ggstat.Aggregator and allows using a with ggstat.Agg.
   345  func (a *aggResults) agg(input table.Grouping, output *table.Builder) {
   346  	var prefixes []string
   347  	rows := len(input.Tables())
   348  	columns := make(map[string][]float64)
   349  	for i, gid := range input.Tables() {
   350  		var vs [][]float64
   351  		for _, col := range a.Values {
   352  			vs = append(vs, input.Table(gid).MustColumn(col).([]float64))
   353  		}
   354  		as := input.Table(gid).MustColumn(a.Across).([]string)
   355  		for j, prefix := range as {
   356  			for k, col := range a.Values {
   357  				key := prefix + "/" + col
   358  				if columns[key] == nil {
   359  					if k == 0 {
   360  						// First time we've seen this prefix, track it.
   361  						prefixes = append(prefixes, prefix)
   362  					}
   363  					columns[key] = make([]float64, rows)
   364  					for i := range columns[key] {
   365  						columns[key][i] = math.NaN()
   366  					}
   367  				}
   368  				columns[key][i] = vs[k][j]
   369  			}
   370  		}
   371  	}
   372  	sort.Strings(prefixes)
   373  	a.Prefixes = prefixes
   374  	for _, prefix := range prefixes {
   375  		for _, col := range a.Values {
   376  			key := prefix + "/" + col
   377  			output.Add(key, columns[key])
   378  		}
   379  	}
   380  }
   381  
   382  // firstMasterIndex returns the index of the first commit on master.
   383  // This is used to find the value to normalize against.
   384  func firstMasterIndex(bs []string) int {
   385  	return slice.Index(bs, "master")
   386  }
   387  
   388  // colIndex is a gg.Stat that adds a column called "commit-index" sequentially counting unique values of the column "commit".
   389  type colIndex struct {
   390  	// col specifies the string column to assign indices to. If unspecified, "commit" will be used.
   391  	col string
   392  }
   393  
   394  func (ci colIndex) F(g table.Grouping) table.Grouping {
   395  	if ci.col == "" {
   396  		ci.col = "commit"
   397  	}
   398  	return table.MapTables(g, func(_ table.GroupID, t *table.Table) *table.Table {
   399  		idxs := make([]int, t.Len())
   400  		last, idx := "", -1
   401  		for i, hash := range t.MustColumn(ci.col).([]string) {
   402  			if hash != last {
   403  				idx++
   404  				last = hash
   405  			}
   406  			idxs[i] = idx
   407  		}
   408  		t = table.NewBuilder(t).Add("commit-index", idxs).Done()
   409  
   410  		return t
   411  	})
   412  }
   413  
   414  // removeNaNs returns a new Grouping with rows containing NaN in col removed.
   415  func removeNaNs(g table.Grouping, col string) table.Grouping {
   416  	return table.Filter(g, func(result float64) bool {
   417  		return !math.IsNaN(result)
   418  	}, col)
   419  }
   420  
   421  // kza implements adaptive Kolmogorov-Zurbenko filtering on the data in X.
   422  type kza struct {
   423  	X    string
   424  	M, K int
   425  }
   426  
   427  func (k kza) F(g table.Grouping) table.Grouping {
   428  	return table.MapTables(g, func(_ table.GroupID, t *table.Table) *table.Table {
   429  		var xs []float64
   430  		slice.Convert(&xs, t.MustColumn(k.X))
   431  		nxs := AdaptiveKolmogorovZurbenko(xs, k.M, k.K)
   432  		return table.NewBuilder(t).Add("filtered "+k.X, nxs).Done()
   433  	})
   434  }
   435  
   436  // column represents a column in a google.visualization.DataTable
   437  type column struct {
   438  	Name string `json:"id"`
   439  	Role string `json:"role,omitempty"`
   440  	// These fields are filled in by tableToJS if unspecified.
   441  	Type  string `json:"type"`
   442  	Label string `json:"label"`
   443  }
   444  
   445  // tableToJS converts a Table to a javascript literal which can be passed to "new google.visualization.DataTable".
   446  func tableToJS(t *table.Table, columns []column) template.JS {
   447  	var out bytes.Buffer
   448  	fmt.Fprint(&out, "{cols: [")
   449  	var slices []table.Slice
   450  	for i, c := range columns {
   451  		if i > 0 {
   452  			fmt.Fprint(&out, ",\n")
   453  		}
   454  		col := t.Column(c.Name)
   455  		slices = append(slices, col)
   456  		if c.Type == "" {
   457  			switch col.(type) {
   458  			case []string:
   459  				c.Type = "string"
   460  			case []int, []float64:
   461  				c.Type = "number"
   462  			default:
   463  				// Matches the hardcoded string below.
   464  				c.Type = "string"
   465  			}
   466  		}
   467  		if c.Label == "" {
   468  			c.Label = c.Name
   469  		}
   470  		data, err := json.Marshal(c)
   471  		if err != nil {
   472  			panic(err)
   473  		}
   474  		out.Write(data)
   475  	}
   476  	fmt.Fprint(&out, "],\nrows: [")
   477  	for i := 0; i < t.Len(); i++ {
   478  		if i > 0 {
   479  			fmt.Fprint(&out, ",\n")
   480  		}
   481  		fmt.Fprint(&out, "{c:[")
   482  		for j := range columns {
   483  			if j > 0 {
   484  				fmt.Fprint(&out, ", ")
   485  			}
   486  			fmt.Fprint(&out, "{v: ")
   487  			var value []byte
   488  			var err error
   489  			switch column := slices[j].(type) {
   490  			case []string:
   491  				value, err = json.Marshal(column[i])
   492  			case []int:
   493  				value, err = json.Marshal(column[i])
   494  			case []float64:
   495  				value, err = json.Marshal(column[i])
   496  			default:
   497  				value = []byte(`"unknown column type"`)
   498  			}
   499  			if err != nil {
   500  				panic(err)
   501  			}
   502  			out.Write(value)
   503  			fmt.Fprint(&out, "}")
   504  		}
   505  		fmt.Fprint(&out, "]}")
   506  	}
   507  	fmt.Fprint(&out, "]}")
   508  	return template.JS(out.String())
   509  }