github.com/jgbaldwinbrown/perf@v0.1.1/analysis/app/trend.go (about) 1 // Copyright 2017 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Loosely based on github.com/aclements/go-misc/benchplot 6 7 package app 8 9 import ( 10 "bytes" 11 "encoding/json" 12 "fmt" 13 "html/template" 14 "math" 15 "net/http" 16 "os" 17 "path/filepath" 18 "sort" 19 "strconv" 20 "strings" 21 22 "github.com/aclements/go-gg/generic/slice" 23 "github.com/aclements/go-gg/ggstat" 24 "github.com/aclements/go-gg/table" 25 "golang.org/x/net/context" 26 "golang.org/x/perf/storage" 27 ) 28 29 // trend handles /trend. 30 // With no query, it prints the list of recent uploads containing a "trend" key. 31 // With a query, it shows a graph of the matching benchmark results. 32 func (a *App) trend(w http.ResponseWriter, r *http.Request) { 33 ctx := requestContext(r) 34 35 if err := r.ParseForm(); err != nil { 36 http.Error(w, err.Error(), 500) 37 return 38 } 39 40 q := r.Form.Get("q") 41 42 tmpl, err := os.ReadFile(filepath.Join(a.BaseDir, "template/trend.html")) 43 if err != nil { 44 http.Error(w, err.Error(), 500) 45 return 46 } 47 48 t, err := template.New("main").Parse(string(tmpl)) 49 if err != nil { 50 http.Error(w, err.Error(), 500) 51 return 52 } 53 54 opt := plotOptions{ 55 x: r.Form.Get("x"), 56 raw: r.Form.Get("raw") == "1", 57 } 58 59 data := a.trendQuery(ctx, q, opt) 60 61 w.Header().Set("Content-Type", "text/html; charset=utf-8") 62 if err := t.Execute(w, data); err != nil { 63 http.Error(w, err.Error(), 500) 64 return 65 } 66 } 67 68 // trendData is the struct passed to the trend.html template. 69 type trendData struct { 70 Q string 71 Error string 72 TrendUploads []storage.UploadInfo 73 PlotData template.JS 74 PlotType template.JS 75 } 76 77 // trendData computes the values for the template and returns a trendData for display. 78 func (a *App) trendQuery(ctx context.Context, q string, opt plotOptions) *trendData { 79 d := &trendData{Q: q} 80 if q == "" { 81 ul := a.StorageClient.ListUploads(ctx, `trend>`, []string{"by", "upload-time", "trend"}, 16) 82 defer ul.Close() 83 for ul.Next() { 84 d.TrendUploads = append(d.TrendUploads, ul.Info()) 85 } 86 if err := ul.Err(); err != nil { 87 errorf(ctx, "failed to fetch recent trend uploads: %v", err) 88 } 89 return d 90 } 91 92 // TODO(quentin): Chunk query based on matching upload IDs. 93 res := a.StorageClient.Query(ctx, q) 94 defer res.Close() 95 t, resultCols := queryToTable(res) 96 if err := res.Err(); err != nil { 97 errorf(ctx, "failed to read query results: %v", err) 98 d.Error = fmt.Sprintf("failed to read query results: %v", err) 99 return d 100 } 101 for _, col := range []string{"commit", "commit-time", "branch", "name"} { 102 if !hasStringColumn(t, col) { 103 d.Error = fmt.Sprintf("results missing %q label", col) 104 return d 105 } 106 } 107 if opt.x != "" && !hasStringColumn(t, opt.x) { 108 d.Error = fmt.Sprintf("results missing x label %q", opt.x) 109 return d 110 } 111 data := plot(t, resultCols, opt) 112 113 // TODO(quentin): Give the user control over across vs. plotting in separate graphs, instead of only showing one graph with ns/op for each benchmark. 114 115 if opt.raw { 116 data = table.MapTables(data, func(_ table.GroupID, t *table.Table) *table.Table { 117 // From http://tristen.ca/hcl-picker/#/hlc/9/1.13/F1796F/B3EC6C 118 colors := []string{"#F1796F", "#B3EC6C", "#F67E9D", "#6CEB98", "#E392CB", "#0AE4C6", "#B7ABEC", "#16D7E9", "#75C4F7"} 119 colorIdx := 0 120 partColors := make(map[string]string) 121 styles := make([]string, t.Len()) 122 for i, part := range t.MustColumn("upload-part").([]string) { 123 if _, ok := partColors[part]; !ok { 124 partColors[part] = colors[colorIdx] 125 colorIdx++ 126 if colorIdx >= len(colors) { 127 colorIdx = 0 128 } 129 } 130 styles[i] = "color: " + partColors[part] 131 } 132 return table.NewBuilder(t).Add("style", styles).Done() 133 }) 134 columns := []column{ 135 {Name: "commit-index"}, 136 {Name: "result"}, 137 {Name: "style", Role: "style"}, 138 {Name: "commit", Role: "tooltip"}, 139 } 140 d.PlotData = tableToJS(data.Table(data.Tables()[0]), columns) 141 d.PlotType = "ScatterChart" 142 return d 143 } 144 145 // Pivot all of the benchmarks into columns of a single table. 146 ar := &aggResults{ 147 Across: "name", 148 Values: []string{"filtered normalized mean result", "normalized mean result", "normalized median result", "normalized min result", "normalized max result"}, 149 } 150 data = ggstat.Agg("commit", "branch", "commit-index")(ar.agg).F(data) 151 152 tables := data.Tables() 153 infof(ctx, "tables: %v", tables) 154 columns := []column{ 155 {Name: "commit-index"}, 156 {Name: "commit", Role: "tooltip"}, 157 } 158 for _, prefix := range ar.Prefixes { 159 if len(ar.Prefixes) == 1 { 160 columns = append(columns, 161 column{Name: prefix + "/normalized mean result"}, 162 column{Name: prefix + "/normalized min result", Role: "interval"}, 163 column{Name: prefix + "/normalized max result", Role: "interval"}, 164 column{Name: prefix + "/normalized median result"}, 165 ) 166 } 167 columns = append(columns, 168 column{Name: prefix + "/filtered normalized mean result"}, 169 ) 170 } 171 d.PlotData = tableToJS(data.Table(tables[0]), columns) 172 d.PlotType = "LineChart" 173 return d 174 } 175 176 // queryToTable converts the result of a Query into a Table for later processing. 177 // Each label is placed in a column named after the key. 178 // Each metric is placed in a separate result column named after the unit. 179 func queryToTable(q *storage.Query) (t *table.Table, resultCols []string) { 180 var names []string 181 labels := make(map[string][]string) 182 results := make(map[string][]float64) 183 i := 0 184 for q.Next() { 185 res := q.Result() 186 // TODO(quentin): Handle multiple results with the same name but different NameLabels. 187 names = append(names, res.NameLabels["name"]) 188 for k := range res.Labels { 189 if labels[k] == nil { 190 labels[k] = make([]string, i) 191 } 192 } 193 for k := range labels { 194 labels[k] = append(labels[k], res.Labels[k]) 195 } 196 f := strings.Fields(res.Content) 197 metrics := make(map[string]float64) 198 for j := 2; j+2 <= len(f); j += 2 { 199 val, err := strconv.ParseFloat(f[j], 64) 200 if err != nil { 201 continue 202 } 203 unit := f[j+1] 204 if results[unit] == nil { 205 results[unit] = make([]float64, i) 206 } 207 metrics[unit] = val 208 } 209 for k := range results { 210 results[k] = append(results[k], metrics[k]) 211 } 212 i++ 213 } 214 215 tab := new(table.Builder).Add("name", names) 216 217 for k, v := range labels { 218 tab.Add(k, v) 219 } 220 for k, v := range results { 221 tab.Add(k, v) 222 resultCols = append(resultCols, k) 223 } 224 225 sort.Strings(resultCols) 226 227 return tab.Done(), resultCols 228 } 229 230 type plotOptions struct { 231 // x names the column to use for the X axis. 232 // If unspecified, "commit" is used. 233 x string 234 // raw will return the raw points without any averaging/smoothing. 235 // The only result column will be "result". 236 raw bool 237 // correlate will use the string column "upload-part" as an indication that results came from the same machine. Commits present in multiple parts will be used to correlate results. 238 correlate bool 239 } 240 241 // plot takes raw benchmark data in t and produces a Grouping object containing filtered, normalized metric results for a graph. 242 // t must contain the string columns "commit", "commit-time", "branch". resultCols specifies the names of float64 columns containing metric results. 243 // The returned grouping has columns "commit", "commit-time", "commit-index", "branch", "metric", "normalized min result", "normalized max result", "normalized mean result", "filtered normalized mean result". 244 // This is roughly the algorithm from github.com/aclements/go-misc/benchplot 245 func plot(t table.Grouping, resultCols []string, opt plotOptions) table.Grouping { 246 nrows := len(table.GroupBy(t, "name").Tables()) 247 248 // Turn ordered commit-time into a "commit-index" column. 249 if opt.x == "" { 250 opt.x = "commit" 251 } 252 // TODO(quentin): One SortBy call should do this, but 253 // sometimes it seems to sort by the second column instead of 254 // the first. Do them in separate steps until SortBy is fixed. 255 t = table.SortBy(t, opt.x) 256 t = table.SortBy(t, "commit-time") 257 t = colIndex{col: opt.x}.F(t) 258 259 // Unpivot all of the metrics into one column. 260 t = table.Unpivot(t, "metric", "result", resultCols...) 261 262 // TODO(quentin): Let user choose which metric(s) to keep. 263 t = table.FilterEq(t, "metric", "ns/op") 264 265 if opt.raw { 266 return t 267 } 268 269 // Average each result at each commit (but keep columns names 270 // the same to keep things easier to read). 271 t = ggstat.Agg("commit", "name", "metric", "branch", "commit-index")(ggstat.AggMean("result"), ggstat.AggQuantile("median", .5, "result"), ggstat.AggMin("result"), ggstat.AggMax("result")).F(t) 272 y := "mean result" 273 274 // Normalize to earliest commit on master. It's important to 275 // do this before the geomean if there are commits missing. 276 // Unfortunately, that also means we have to *temporarily* 277 // group by name and metric, since the geomean needs to be 278 // done on a different grouping. 279 t = table.GroupBy(t, "name", "metric") 280 t = ggstat.Normalize{X: "branch", By: firstMasterIndex, Cols: []string{"mean result", "median result", "max result", "min result"}, DenomCols: []string{"mean result", "mean result", "mean result", "mean result"}}.F(t) 281 y = "normalized " + y 282 for _, col := range []string{"mean result", "median result", "max result", "min result"} { 283 t = table.Remove(t, col) 284 } 285 t = table.Ungroup(table.Ungroup(t)) 286 287 // Compute geomean for each metric at each commit if there's 288 // more than one benchmark. 289 if len(table.GroupBy(t, "name").Tables()) > 1 { 290 gt := removeNaNs(t, y) 291 gt = ggstat.Agg("commit", "metric", "branch", "commit-index")(ggstat.AggGeoMean(y, "normalized median result"), ggstat.AggMin("normalized min result"), ggstat.AggMax("normalized max result")).F(gt) 292 gt = table.MapTables(gt, func(_ table.GroupID, t *table.Table) *table.Table { 293 return table.NewBuilder(t).AddConst("name", " geomean").Done() 294 }) 295 gt = table.Rename(gt, "geomean "+y, y) 296 gt = table.Rename(gt, "geomean normalized median result", "normalized median result") 297 gt = table.Rename(gt, "min normalized min result", "normalized min result") 298 gt = table.Rename(gt, "max normalized max result", "normalized max result") 299 t = table.Concat(t, gt) 300 nrows++ 301 } 302 303 // Filter the data to reduce noise. 304 t = table.GroupBy(t, "name", "metric") 305 t = kza{y, 15, 3}.F(t) 306 y = "filtered " + y 307 t = table.Ungroup(table.Ungroup(t)) 308 309 return t 310 } 311 312 // hasStringColumn returns whether t has a []string column called col. 313 func hasStringColumn(t table.Grouping, col string) bool { 314 c := t.Table(t.Tables()[0]).Column(col) 315 if c == nil { 316 return false 317 } 318 _, ok := c.([]string) 319 return ok 320 } 321 322 // aggResults pivots the table, taking the columns in Values and making a new column for each distinct value in Across. 323 // aggResults("in", []string{"value1", "value2"} will reshape a table like 324 // 325 // in value1 value2 326 // one 1 2 327 // two 3 4 328 // 329 // and will turn in into a table like 330 // 331 // one/value1 one/value2 two/value1 two/value2 332 // 1 2 3 4 333 // 334 // across columns must be []string, and value columns must be []float64. 335 type aggResults struct { 336 // Across is the name of the column whose values are the column prefix. 337 Across string 338 // Values is the name of the columns to split. 339 Values []string 340 // Prefixes is filled in after calling agg with the name of each prefix that was found. 341 Prefixes []string 342 } 343 344 // agg implements ggstat.Aggregator and allows using a with ggstat.Agg. 345 func (a *aggResults) agg(input table.Grouping, output *table.Builder) { 346 var prefixes []string 347 rows := len(input.Tables()) 348 columns := make(map[string][]float64) 349 for i, gid := range input.Tables() { 350 var vs [][]float64 351 for _, col := range a.Values { 352 vs = append(vs, input.Table(gid).MustColumn(col).([]float64)) 353 } 354 as := input.Table(gid).MustColumn(a.Across).([]string) 355 for j, prefix := range as { 356 for k, col := range a.Values { 357 key := prefix + "/" + col 358 if columns[key] == nil { 359 if k == 0 { 360 // First time we've seen this prefix, track it. 361 prefixes = append(prefixes, prefix) 362 } 363 columns[key] = make([]float64, rows) 364 for i := range columns[key] { 365 columns[key][i] = math.NaN() 366 } 367 } 368 columns[key][i] = vs[k][j] 369 } 370 } 371 } 372 sort.Strings(prefixes) 373 a.Prefixes = prefixes 374 for _, prefix := range prefixes { 375 for _, col := range a.Values { 376 key := prefix + "/" + col 377 output.Add(key, columns[key]) 378 } 379 } 380 } 381 382 // firstMasterIndex returns the index of the first commit on master. 383 // This is used to find the value to normalize against. 384 func firstMasterIndex(bs []string) int { 385 return slice.Index(bs, "master") 386 } 387 388 // colIndex is a gg.Stat that adds a column called "commit-index" sequentially counting unique values of the column "commit". 389 type colIndex struct { 390 // col specifies the string column to assign indices to. If unspecified, "commit" will be used. 391 col string 392 } 393 394 func (ci colIndex) F(g table.Grouping) table.Grouping { 395 if ci.col == "" { 396 ci.col = "commit" 397 } 398 return table.MapTables(g, func(_ table.GroupID, t *table.Table) *table.Table { 399 idxs := make([]int, t.Len()) 400 last, idx := "", -1 401 for i, hash := range t.MustColumn(ci.col).([]string) { 402 if hash != last { 403 idx++ 404 last = hash 405 } 406 idxs[i] = idx 407 } 408 t = table.NewBuilder(t).Add("commit-index", idxs).Done() 409 410 return t 411 }) 412 } 413 414 // removeNaNs returns a new Grouping with rows containing NaN in col removed. 415 func removeNaNs(g table.Grouping, col string) table.Grouping { 416 return table.Filter(g, func(result float64) bool { 417 return !math.IsNaN(result) 418 }, col) 419 } 420 421 // kza implements adaptive Kolmogorov-Zurbenko filtering on the data in X. 422 type kza struct { 423 X string 424 M, K int 425 } 426 427 func (k kza) F(g table.Grouping) table.Grouping { 428 return table.MapTables(g, func(_ table.GroupID, t *table.Table) *table.Table { 429 var xs []float64 430 slice.Convert(&xs, t.MustColumn(k.X)) 431 nxs := AdaptiveKolmogorovZurbenko(xs, k.M, k.K) 432 return table.NewBuilder(t).Add("filtered "+k.X, nxs).Done() 433 }) 434 } 435 436 // column represents a column in a google.visualization.DataTable 437 type column struct { 438 Name string `json:"id"` 439 Role string `json:"role,omitempty"` 440 // These fields are filled in by tableToJS if unspecified. 441 Type string `json:"type"` 442 Label string `json:"label"` 443 } 444 445 // tableToJS converts a Table to a javascript literal which can be passed to "new google.visualization.DataTable". 446 func tableToJS(t *table.Table, columns []column) template.JS { 447 var out bytes.Buffer 448 fmt.Fprint(&out, "{cols: [") 449 var slices []table.Slice 450 for i, c := range columns { 451 if i > 0 { 452 fmt.Fprint(&out, ",\n") 453 } 454 col := t.Column(c.Name) 455 slices = append(slices, col) 456 if c.Type == "" { 457 switch col.(type) { 458 case []string: 459 c.Type = "string" 460 case []int, []float64: 461 c.Type = "number" 462 default: 463 // Matches the hardcoded string below. 464 c.Type = "string" 465 } 466 } 467 if c.Label == "" { 468 c.Label = c.Name 469 } 470 data, err := json.Marshal(c) 471 if err != nil { 472 panic(err) 473 } 474 out.Write(data) 475 } 476 fmt.Fprint(&out, "],\nrows: [") 477 for i := 0; i < t.Len(); i++ { 478 if i > 0 { 479 fmt.Fprint(&out, ",\n") 480 } 481 fmt.Fprint(&out, "{c:[") 482 for j := range columns { 483 if j > 0 { 484 fmt.Fprint(&out, ", ") 485 } 486 fmt.Fprint(&out, "{v: ") 487 var value []byte 488 var err error 489 switch column := slices[j].(type) { 490 case []string: 491 value, err = json.Marshal(column[i]) 492 case []int: 493 value, err = json.Marshal(column[i]) 494 case []float64: 495 value, err = json.Marshal(column[i]) 496 default: 497 value = []byte(`"unknown column type"`) 498 } 499 if err != nil { 500 panic(err) 501 } 502 out.Write(value) 503 fmt.Fprint(&out, "}") 504 } 505 fmt.Fprint(&out, "]}") 506 } 507 fmt.Fprint(&out, "]}") 508 return template.JS(out.String()) 509 }