golang.org/x/build@v0.0.0-20240506185731-218518f32b70/perf/app/influx.go (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package app 6 7 import ( 8 "bytes" 9 "context" 10 "fmt" 11 "io" 12 "log" 13 "net/http" 14 "strings" 15 "time" 16 17 "cloud.google.com/go/compute/metadata" 18 secretmanager "cloud.google.com/go/secretmanager/apiv1" 19 "cloud.google.com/go/secretmanager/apiv1/secretmanagerpb" 20 influxdb2 "github.com/influxdata/influxdb-client-go/v2" 21 "golang.org/x/build/internal/influx" 22 "golang.org/x/build/perfdata" 23 "golang.org/x/perf/benchfmt" 24 "golang.org/x/perf/benchseries" 25 "google.golang.org/api/idtoken" 26 ) 27 28 const ( 29 backfillWindow = 30 * 24 * time.Hour // 30 days. 30 ) 31 32 func (a *App) influxClient(ctx context.Context) (influxdb2.Client, error) { 33 if a.InfluxHost == "" { 34 return nil, fmt.Errorf("Influx host unknown (set INFLUX_HOST?)") 35 } 36 37 token, err := a.findInfluxToken(ctx) 38 if err != nil { 39 return nil, fmt.Errorf("error finding Influx token: %w", err) 40 } 41 42 return influxdb2.NewClient(a.InfluxHost, token), nil 43 } 44 45 // syncInflux handles /cron/syncinflux, which updates an InfluxDB instance with 46 // the latest data from perfdata.golang.org (i.e. storage), or backfills it. 47 func (a *App) syncInflux(w http.ResponseWriter, r *http.Request) { 48 ctx := r.Context() 49 50 if a.AuthCronEmail != "" { 51 if err := checkCronAuth(ctx, r, a.AuthCronEmail); err != nil { 52 log.Printf("Dropping invalid request to /cron/syncinflux: %v", err) 53 http.Error(w, err.Error(), 403) 54 return 55 } 56 } 57 58 ifxc, err := a.influxClient(ctx) 59 if err != nil { 60 log.Printf("Error getting Influx client: %v", err) 61 http.Error(w, err.Error(), 500) 62 return 63 } 64 defer ifxc.Close() 65 66 log.Printf("Connecting to influx...") 67 68 lastPush, err := latestInfluxTimestamp(ctx, ifxc) 69 if err != nil { 70 http.Error(w, err.Error(), 500) 71 return 72 } 73 if lastPush.IsZero() { 74 // Pick the backfill window. 75 lastPush = time.Now().Add(-backfillWindow) 76 } 77 78 log.Printf("Last push to influx: %v", lastPush) 79 80 uploads, err := a.uploadsSince(ctx, lastPush) 81 if err != nil { 82 http.Error(w, err.Error(), 500) 83 return 84 } 85 86 log.Printf("Uploads since last push: %d", len(uploads)) 87 88 var errs []error 89 for _, u := range uploads { 90 log.Printf("Processing upload %s...", u.UploadID) 91 if err := a.pushRunToInflux(ctx, ifxc, u); err != nil { 92 errs = append(errs, err) 93 log.Printf("Error processing upload %s: %v", u.UploadID, err) 94 } 95 } 96 if len(errs) > 0 { 97 var failures strings.Builder 98 for _, err := range errs { 99 failures.WriteString(err.Error()) 100 failures.WriteString("\n") 101 } 102 http.Error(w, failures.String(), 500) 103 } 104 } 105 106 func checkCronAuth(ctx context.Context, r *http.Request, wantEmail string) error { 107 const audience = "/cron/syncinflux" 108 109 const authHeaderPrefix = "Bearer " 110 authHeader := r.Header.Get("Authorization") 111 if !strings.HasPrefix(authHeader, authHeaderPrefix) { 112 return fmt.Errorf("missing Authorization header") 113 } 114 token := authHeader[len(authHeaderPrefix):] 115 116 p, err := idtoken.Validate(ctx, token, audience) 117 if err != nil { 118 return err 119 } 120 121 if p.Issuer != "https://accounts.google.com" { 122 return fmt.Errorf("issuer must be https://accounts.google.com, but is %s", p.Issuer) 123 } 124 125 e, ok := p.Claims["email"] 126 if !ok { 127 return fmt.Errorf("email missing from token") 128 } 129 email, ok := e.(string) 130 if !ok { 131 return fmt.Errorf("email unexpected type %T", e) 132 } 133 134 if email != wantEmail { 135 return fmt.Errorf("email got %s want %s", email, wantEmail) 136 } 137 138 return nil 139 } 140 141 func (a *App) findInfluxToken(ctx context.Context) (string, error) { 142 if a.InfluxToken != "" { 143 return a.InfluxToken, nil 144 } 145 146 var project string 147 if a.InfluxProject != "" { 148 project = a.InfluxProject 149 } else { 150 var err error 151 project, err = metadata.ProjectID() 152 if err != nil { 153 return "", fmt.Errorf("error determining GCP project ID (set INFLUX_TOKEN or INFLUX_PROJECT?): %w", err) 154 } 155 } 156 157 log.Printf("Fetching Influx token from %s...", project) 158 159 token, err := fetchInfluxToken(ctx, project) 160 if err != nil { 161 return "", fmt.Errorf("error fetching Influx token: %w", err) 162 } 163 164 return token, nil 165 } 166 167 func fetchInfluxToken(ctx context.Context, project string) (string, error) { 168 client, err := secretmanager.NewClient(ctx) 169 if err != nil { 170 return "", fmt.Errorf("error creating secret manager client: %w", err) 171 } 172 defer client.Close() 173 174 req := &secretmanagerpb.AccessSecretVersionRequest{ 175 Name: "projects/" + project + "/secrets/" + influx.AdminTokenSecretName + "/versions/latest", 176 } 177 178 result, err := client.AccessSecretVersion(ctx, req) 179 if err != nil { 180 return "", fmt.Errorf("failed to access secret version: %w", err) 181 } 182 183 return string(result.Payload.Data), nil 184 } 185 186 func latestInfluxTimestamp(ctx context.Context, ifxc influxdb2.Client) (time.Time, error) { 187 qc := ifxc.QueryAPI(influx.Org) 188 // Find the latest upload in the last month. 189 q := fmt.Sprintf(`from(bucket:%q) 190 |> range(start: -%dh) 191 |> filter(fn: (r) => r["_measurement"] == "benchmark-result") 192 |> filter(fn: (r) => r["_field"] == "upload-time") 193 |> group() 194 |> sort(columns: ["_value"], desc: true) 195 |> limit(n: 1)`, influx.Bucket, backfillWindow/time.Hour) 196 result, err := influxQuery(ctx, qc, q) 197 if err != nil { 198 return time.Time{}, err 199 } 200 for result.Next() { 201 // Except for the point timestamp, all other timestamps are stored as strings, specifically 202 // as the RFC3339Nano format. 203 // 204 // We only care about the first result, and there should be just one. 205 return time.Parse(time.RFC3339Nano, result.Record().Value().(string)) 206 } 207 return time.Time{}, result.Err() 208 } 209 210 func (a *App) uploadsSince(ctx context.Context, since time.Time) ([]perfdata.UploadInfo, error) { 211 query := strings.Join([]string{ 212 // Limit results to the window from since to now. 213 "upload-time>" + since.UTC().Format(time.RFC3339), 214 // Only take results generated by the coordinator. This ensures that nobody can 215 // just upload data to perfdata.golang.org and spoof us (accidentally or intentionally). 216 "by:public-worker-builder@golang-ci-luci.iam.gserviceaccount.com", 217 // Only take results that were generated from post-submit runs, not trybots. 218 "post-submit:true", 219 }, " ") 220 uploadList := a.StorageClient.ListUploads( 221 ctx, 222 query, 223 nil, 224 500, // TODO(mknyszek): page results if this isn't enough. 225 ) 226 defer uploadList.Close() 227 228 var uploads []perfdata.UploadInfo 229 for uploadList.Next() { 230 uploads = append(uploads, uploadList.Info()) 231 } 232 if err := uploadList.Err(); err != nil { 233 return nil, err 234 } 235 return uploads, nil 236 } 237 238 func (a *App) pushRunToInflux(ctx context.Context, ifxc influxdb2.Client, u perfdata.UploadInfo) error { 239 s, err := a.StorageClient.Query(ctx, fmt.Sprintf("upload:%s", u.UploadID)) 240 if err != nil { 241 return err 242 } 243 244 // We need to read the upload multiple times via benchfmt.Reader, so 245 // copy to a buffer we can seek back to the beginning. 246 var buf bytes.Buffer 247 if _, err := io.Copy(&buf, s); err != nil { 248 return fmt.Errorf("error reading upload: %w", err) 249 } 250 if err := s.Close(); err != nil { 251 return fmt.Errorf("error closing upload: %w", err) 252 } 253 254 comparisons := []struct { 255 suffix string 256 compare string 257 numerator string 258 denominator string 259 filter string 260 }{ 261 { 262 // Default: toolchain:baseline vs experiment without PGO 263 compare: "toolchain", 264 numerator: "experiment", 265 denominator: "baseline", 266 filter: "-pgo:on", // "off" or unset (bent doesn't set pgo). 267 }, 268 { 269 // toolchain:baseline vs experiment with PGO 270 suffix: "/pgo=on,toolchain:baseline-vs-experiment", 271 compare: "toolchain", 272 numerator: "experiment", 273 denominator: "baseline", 274 filter: "pgo:on", 275 }, 276 { 277 // pgo:off vs on with experiment toolchain (impact of enabling PGO) 278 suffix: "/toolchain:experiment,pgo=off-vs-on", 279 compare: "pgo", 280 numerator: "on", 281 denominator: "off", 282 filter: "toolchain:experiment", 283 }, 284 } 285 for _, c := range comparisons { 286 r := bytes.NewReader(buf.Bytes()) 287 fmtr := benchfmt.NewReader(r, u.UploadID) 288 289 // Use the default comparisons. Namely: 290 // 1. Build a series out of commit dates (in our case, this is length 1). 291 // 2. Split out comparisons by benchmark name (unit we get for free). 292 // 293 // Copy the options for mutation. 294 opts := *benchseries.DefaultBuilderOptions() 295 opts.Compare = c.compare 296 opts.Numerator = c.numerator 297 opts.Denominator = c.denominator 298 if opts.Filter == "" { 299 opts.Filter = c.filter 300 } else { 301 opts.Filter += " " + c.filter 302 } 303 304 if err := a.compareAndPush(ctx, ifxc, fmtr, &opts, c.suffix); err != nil { 305 return fmt.Errorf("error in compareAndPush(%s): %w", c.suffix, err) 306 } 307 } 308 309 return nil 310 } 311 312 func (a *App) compareAndPush(ctx context.Context, ifxc influxdb2.Client, r *benchfmt.Reader, opts *benchseries.BuilderOptions, suffix string) error { 313 // Scan the results into a benchseries builder. 314 builder, err := benchseries.NewBuilder(opts) 315 if err != nil { 316 return fmt.Errorf("failed to create benchseries builder: %v", err) 317 } 318 for r.Scan() { 319 rec := r.Result() 320 if err, ok := rec.(*benchfmt.SyntaxError); ok { 321 // Non-fatal result parse error. Warn 322 // but keep going. 323 log.Printf("Parse error: %v", err) 324 continue 325 } 326 res := rec.(*benchfmt.Result) 327 builder.Add(res) 328 } 329 if err := r.Err(); err != nil { 330 return err 331 } 332 333 // Run the comparison. We don't have any existing results so our 334 // duplicate policy doesn't matter here. Just pick replacement. 335 comparisons, err := builder.AllComparisonSeries(nil, benchseries.DUPE_REPLACE) 336 if err != nil { 337 return fmt.Errorf("failed to creation comparison series: %w", err) 338 } 339 340 const ( 341 confidence = 0.95 342 bootstrap = 1000 343 ) 344 345 // Iterate over the comparisons, extract the results, and push them to Influx. 346 wapi := ifxc.WriteAPIBlocking(influx.Org, influx.Bucket) 347 comparisonLoop: 348 for _, cs := range comparisons { 349 cs.AddSummaries(confidence, bootstrap) 350 351 summaries := cs.Summaries 352 353 // Build a map of residues with single values. Our benchmark pipeline enforces 354 // that the only key that has a differing value across benchmark runs of the same 355 // name and unit is "toolchain." 356 // 357 // Most other keys are singular for *all* benchmarks in a run (like "goos") but 358 // even those that are not (like "pkg") remain the same even if "toolchain" differs. 359 // 360 // We build a map instead of just using them because we need to decide at upload 361 // time whether the key is an Influx tag or field. 362 residues := make(map[string]string) 363 for _, r := range cs.Residues { 364 if len(r.Slice) > 1 { 365 log.Printf("found non-singular key %q with values %v; comparison may be invalid, skipping...", r.S, r.Slice) 366 continue comparisonLoop 367 } 368 residues[r.S] = r.Slice[0] 369 } 370 371 // N.B. In our case Series should have length 1, because we're processing 372 // a single result here. By default the string value here is the commit date. 373 for i, series := range cs.Series { 374 for j, benchmarkName := range cs.Benchmarks { 375 sum := summaries[i][j] 376 if !sum.Defined() { 377 log.Printf("Summary not defined for %s %s", series, benchmarkName) 378 continue 379 } 380 381 measurement := "benchmark-result" // measurement 382 benchmarkName = benchmarkName + suffix // tag 383 series = series // time 384 center, low, high := sum.Center, sum.Low, sum.High // fields 385 unit := cs.Unit // tag 386 uploadTime := residues["upload-time"] // field 387 cpu := residues["cpu"] // tag 388 goarch := residues["goarch"] // tag 389 goos := residues["goos"] // tag 390 benchmarksCommit := residues["benchmarks-commit"] // field 391 baselineCommit := cs.HashPairs[series].DenHash // field 392 experimentCommit := cs.HashPairs[series].NumHash // field 393 repository := residues["repository"] // tag 394 branch := residues["branch"] // tag 395 396 // cmd/bench didn't set repository prior to 397 // CL 413915. Older runs are all against go. 398 if repository == "" { 399 repository = "go" 400 } 401 402 // Push to influx. 403 t, err := benchseries.ParseNormalizedDateString(series) 404 if err != nil { 405 return fmt.Errorf("error parsing normalized date: %w", err) 406 } 407 fields := map[string]interface{}{ 408 "center": center, 409 "low": low, 410 "high": high, 411 "upload-time": uploadTime, 412 "benchmarks-commit": benchmarksCommit, 413 "baseline-commit": baselineCommit, 414 "experiment-commit": experimentCommit, 415 } 416 tags := map[string]string{ 417 "name": benchmarkName, 418 "unit": unit, 419 "cpu": cpu, 420 "goarch": goarch, 421 "goos": goos, 422 "repository": repository, 423 "branch": branch, 424 // TODO(prattmic): Add pkg, which 425 // benchseries currently can't handle. 426 } 427 p := influxdb2.NewPoint(measurement, tags, fields, t) 428 if err := wapi.WritePoint(ctx, p); err != nil { 429 return fmt.Errorf("error writing point: %w", err) 430 } 431 } 432 } 433 } 434 return nil 435 }