golang.org/x/build@v0.0.0-20240506185731-218518f32b70/cmd/coordinator/buildstatus.go (about) 1 // Copyright 2021 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build linux || darwin 6 7 package main 8 9 import ( 10 "bytes" 11 "context" 12 "errors" 13 "fmt" 14 "html" 15 "html/template" 16 "io" 17 "log" 18 "os" 19 "path" 20 "strings" 21 "sync" 22 "sync/atomic" 23 "time" 24 25 "cloud.google.com/go/errorreporting" 26 "go4.org/syncutil" 27 "golang.org/x/build/buildenv" 28 "golang.org/x/build/buildlet" 29 "golang.org/x/build/dashboard" 30 "golang.org/x/build/internal/buildgo" 31 "golang.org/x/build/internal/buildstats" 32 "golang.org/x/build/internal/coordinator/pool" 33 "golang.org/x/build/internal/coordinator/pool/queue" 34 "golang.org/x/build/internal/coordinator/schedule" 35 "golang.org/x/build/internal/singleflight" 36 "golang.org/x/build/internal/sourcecache" 37 "golang.org/x/build/internal/spanlog" 38 "golang.org/x/build/livelog" 39 "golang.org/x/build/maintner/maintnerd/apipb" 40 "golang.org/x/build/types" 41 "golang.org/x/mod/semver" 42 perfstorage "golang.org/x/perf/storage" 43 ) 44 45 // newBuild constructs a new *buildStatus from rev and commit details. 46 // detail may be only partially populated, but it must have at least RevBranch set. 47 // If rev.SubRev is set, then detail.SubRevBranch must also be set. 48 func newBuild(rev buildgo.BuilderRev, detail commitDetail) (*buildStatus, error) { 49 // Note: can't acquire statusMu in newBuild, as this is called 50 // from findTryWork -> newTrySet, which holds statusMu. 51 52 conf, ok := dashboard.Builders[rev.Name] 53 if !ok { 54 return nil, fmt.Errorf("unknown builder type %q", rev.Name) 55 } 56 if rev.Rev == "" { 57 return nil, fmt.Errorf("required field Rev is empty; got %+v", rev) 58 } 59 if detail.RevBranch == "" { 60 return nil, fmt.Errorf("required field RevBranch is empty; got %+v", detail) 61 } 62 if rev.SubRev != "" && detail.SubRevBranch == "" { 63 return nil, fmt.Errorf("field SubRevBranch is empty, required because SubRev is present; got %+v", detail) 64 } 65 66 ctx, cancel := context.WithCancel(context.Background()) 67 return &buildStatus{ 68 buildID: "B" + randHex(9), 69 BuilderRev: rev, 70 commitDetail: detail, 71 conf: conf, 72 startTime: time.Now(), 73 ctx: ctx, 74 cancel: cancel, 75 }, nil 76 } 77 78 // buildStatus is the status of a build. 79 type buildStatus struct { 80 // Immutable: 81 buildgo.BuilderRev 82 commitDetail 83 buildID string // "B" + 9 random hex 84 conf *dashboard.BuildConfig 85 startTime time.Time // actually time of newBuild (~same thing) 86 trySet *trySet // or nil 87 88 onceInitHelpers sync.Once // guards call of onceInitHelpersFunc 89 helpers <-chan buildlet.Client 90 ctx context.Context // used to start the build 91 cancel context.CancelFunc // used to cancel context; for use by setDone only 92 93 hasBuildlet int32 // atomic: non-zero if this build has a buildlet; for status.go. 94 95 mu sync.Mutex // guards following 96 canceled bool // whether this build was forcefully canceled, so errors should be ignored 97 schedItem *queue.SchedItem // for the initial buildlet (ignoring helpers for now) 98 logURL string // if non-empty, permanent URL of log 99 bc buildlet.Client // nil initially, until pool returns one 100 done time.Time // finished running 101 succeeded bool // set when done 102 output livelog.Buffer // stdout and stderr 103 events []eventAndTime 104 useSnapshotMemo map[string]bool // memoized result of useSnapshotFor(rev), where the key is rev 105 } 106 107 func (st *buildStatus) NameAndBranch() string { 108 result := st.Name 109 if st.RevBranch != "master" { 110 // For the common and currently-only case of 111 // "release-branch.go1.15" say "linux-amd64 (Go 1.15.x)" 112 const releasePrefix = "release-branch.go" 113 if strings.HasPrefix(st.RevBranch, releasePrefix) { 114 result = fmt.Sprintf("%s (Go %s.x)", st.Name, strings.TrimPrefix(st.RevBranch, releasePrefix)) 115 } else { 116 // But if we ever support building other branches, 117 // fall back to something verbose until we add a 118 // special case: 119 result = fmt.Sprintf("%s (go branch %s)", st.Name, st.RevBranch) 120 } 121 } 122 // For an x repo running on a CL in a different repo, 123 // add a prefix specifying the name of the x repo. 124 if st.SubName != "" && st.trySet != nil && st.SubName != st.trySet.Project { 125 result = "(x/" + st.SubName + ") " + result 126 } 127 return result 128 } 129 130 // cancelBuild marks a build as no longer wanted, cancels its context, 131 // and tears down its buildlet. 132 func (st *buildStatus) cancelBuild() { 133 st.mu.Lock() 134 if st.canceled { 135 // Already done. Shouldn't happen currently, but make 136 // it safe for duplicate calls in the future. 137 st.mu.Unlock() 138 return 139 } 140 141 st.canceled = true 142 st.output.Close() 143 // cancel the context, which stops the creation of helper 144 // buildlets, etc. The context isn't plumbed everywhere yet, 145 // so we also forcefully close its buildlet out from under it 146 // to trigger a failure. When we get the failure later, we 147 // just ignore it (knowing that the canceled bit was set 148 // true). 149 st.cancel() 150 bc := st.bc 151 st.mu.Unlock() 152 153 if bc != nil { 154 // closing the buildlet may be slow (up to ~10 seconds 155 // on a wedged buildlet) so run it in its own 156 // goroutine, so we're not holding st.mu for too long. 157 bc.Close() 158 } 159 } 160 161 func (st *buildStatus) setDone(succeeded bool) { 162 st.mu.Lock() 163 defer st.mu.Unlock() 164 if st.canceled { 165 return 166 } 167 st.succeeded = succeeded 168 st.done = time.Now() 169 st.output.Close() 170 st.cancel() 171 } 172 173 func (st *buildStatus) isRunning() bool { 174 st.mu.Lock() 175 defer st.mu.Unlock() 176 return st.isRunningLocked() 177 } 178 179 func (st *buildStatus) isRunningLocked() bool { return st.done.IsZero() } 180 181 func (st *buildStatus) logf(format string, args ...interface{}) { 182 log.Printf("[build %s %s]: %s", st.Name, st.Rev, fmt.Sprintf(format, args...)) 183 } 184 185 // start starts the build in a new goroutine. 186 // The buildStatus's context is closed when the build is complete, 187 // successfully or not. 188 func (st *buildStatus) start() { 189 setStatus(st.BuilderRev, st) 190 go func() { 191 err := st.build() 192 if err == errSkipBuildDueToDeps { 193 st.setDone(true) 194 } else { 195 if err != nil { 196 fmt.Fprintf(st, "\n\nError: %v\n", err) 197 log.Println(st.BuilderRev, "failed:", err) 198 } 199 st.setDone(err == nil) 200 pool.CoordinatorProcess().PutBuildRecord(st.buildRecord()) 201 } 202 markDone(st.BuilderRev) 203 }() 204 } 205 206 func (st *buildStatus) buildletPool() pool.Buildlet { 207 return pool.ForHost(st.conf.HostConfig()) 208 } 209 210 func (st *buildStatus) expectedMakeBashDuration() time.Duration { 211 // TODO: base this on historical measurements, instead of statically configured. 212 // TODO: move this to dashboard/builders.go? But once we based on on historical 213 // measurements, it'll need GCE services (bigtable/bigquery?), so it's probably 214 // better in this file. 215 goos, goarch := st.conf.GOOS(), st.conf.GOARCH() 216 217 if goos == "linux" { 218 if goarch == "arm" { 219 return 4 * time.Minute 220 } 221 return 45 * time.Second 222 } 223 return 60 * time.Second 224 } 225 226 func (st *buildStatus) expectedBuildletStartDuration() time.Duration { 227 // TODO: move this to dashboard/builders.go? But once we based on on historical 228 // measurements, it'll need GCE services (bigtable/bigquery?), so it's probably 229 // better in this file. 230 p := st.buildletPool() 231 switch p.(type) { 232 case *pool.GCEBuildlet: 233 if strings.HasPrefix(st.Name, "android-") { 234 // about a minute for buildlet + minute for Android emulator to be usable 235 return 2 * time.Minute 236 } 237 return time.Minute 238 case *pool.EC2Buildlet: 239 // lack of historical data. 2 * time.Minute is a safe overestimate 240 return 2 * time.Minute 241 case *pool.ReverseBuildletPool: 242 goos, arch := st.conf.GOOS(), st.conf.GOARCH() 243 if goos == "darwin" { 244 if arch == "arm" || arch == "arm64" { 245 // iOS; idle or it's not. 246 return 0 247 } 248 if arch == "amd64" || arch == "386" { 249 return 0 // TODO: remove this once we're using VMware 250 // return 1 * time.Minute // VMware boot of hermetic OS X 251 } 252 } 253 } 254 return 0 255 } 256 257 // getHelpersReadySoon waits a bit (as a function of the build 258 // configuration) and starts getting the buildlets for test sharding 259 // ready, such that they're ready when make.bash is done. But we don't 260 // want to start too early, lest we waste idle resources during make.bash. 261 func (st *buildStatus) getHelpersReadySoon() { 262 if st.IsSubrepo() || st.conf.NumTestHelpers(st.isTry()) == 0 || st.conf.IsReverse() { 263 return 264 } 265 time.AfterFunc(st.expectedMakeBashDuration()-st.expectedBuildletStartDuration(), 266 func() { 267 st.LogEventTime("starting_helpers") 268 st.getHelpers() // and ignore the result. 269 }) 270 } 271 272 // getHelpers returns a channel of buildlet test helpers, with an item 273 // sent as they become available. The channel is closed at the end. 274 func (st *buildStatus) getHelpers() <-chan buildlet.Client { 275 st.onceInitHelpers.Do(st.onceInitHelpersFunc) 276 return st.helpers 277 } 278 279 func (st *buildStatus) onceInitHelpersFunc() { 280 schedTmpl := &queue.SchedItem{ 281 BuilderRev: st.BuilderRev, 282 HostType: st.conf.HostType, 283 IsTry: st.isTry(), 284 CommitTime: st.commitTime(), 285 Branch: st.RevBranch, 286 Repo: st.RepoOrGo(), 287 User: st.AuthorEmail, 288 } 289 st.helpers = getBuildlets(st.ctx, st.conf.NumTestHelpers(st.isTry()), schedTmpl, st) 290 } 291 292 // useSnapshot reports whether this type of build uses a snapshot of 293 // make.bash if it exists and that the snapshot exists. 294 func (st *buildStatus) useSnapshot() bool { 295 return st.useSnapshotFor(st.Rev) 296 } 297 298 func (st *buildStatus) useSnapshotFor(rev string) bool { 299 if st.conf.SkipSnapshot { 300 return false 301 } 302 st.mu.Lock() 303 defer st.mu.Unlock() 304 if b, ok := st.useSnapshotMemo[rev]; ok { 305 return b 306 } 307 br := st.BuilderRev 308 br.Rev = rev 309 b := br.SnapshotExists(context.TODO(), pool.NewGCEConfiguration().BuildEnv()) 310 if st.useSnapshotMemo == nil { 311 st.useSnapshotMemo = make(map[string]bool) 312 } 313 st.useSnapshotMemo[rev] = b 314 return b 315 } 316 317 func (st *buildStatus) forceSnapshotUsage() { 318 st.mu.Lock() 319 defer st.mu.Unlock() 320 if st.useSnapshotMemo == nil { 321 st.useSnapshotMemo = make(map[string]bool) 322 } 323 st.useSnapshotMemo[st.Rev] = true 324 } 325 326 func (st *buildStatus) checkDep(ctx context.Context, dep string) (have bool, err error) { 327 span := st.CreateSpan("ask_maintner_has_ancestor") 328 defer func() { span.Done(err) }() 329 fails := 0 330 for { 331 res, err := maintnerClient.HasAncestor(ctx, &apipb.HasAncestorRequest{ 332 Commit: st.Rev, 333 Ancestor: dep, 334 }) 335 if err != nil { 336 fails++ 337 if fails == 3 { 338 span.Done(err) 339 return false, err 340 } 341 select { 342 case <-ctx.Done(): 343 return false, ctx.Err() 344 case <-time.After(1 * time.Second): 345 } 346 continue 347 } 348 if res.UnknownCommit { 349 select { 350 case <-ctx.Done(): 351 return false, ctx.Err() 352 case <-time.After(1 * time.Second): 353 } 354 continue 355 } 356 return res.HasAncestor, nil 357 } 358 } 359 360 var errSkipBuildDueToDeps = errors.New("build was skipped due to missing deps") 361 362 func (st *buildStatus) getBuildlet() (buildlet.Client, error) { 363 schedItem := &queue.SchedItem{ 364 HostType: st.conf.HostType, 365 IsTry: st.trySet != nil, 366 BuilderRev: st.BuilderRev, 367 CommitTime: st.commitTime(), 368 Repo: st.RepoOrGo(), 369 Branch: st.RevBranch, 370 User: st.AuthorEmail, 371 } 372 st.mu.Lock() 373 st.schedItem = schedItem 374 st.mu.Unlock() 375 376 sp := st.CreateSpan("get_buildlet") 377 bc, err := sched.GetBuildlet(st.ctx, schedItem) 378 sp.Done(err) 379 if err != nil { 380 err = fmt.Errorf("failed to get a buildlet: %v", err) 381 go st.reportErr(err) 382 return nil, err 383 } 384 atomic.StoreInt32(&st.hasBuildlet, 1) 385 386 st.mu.Lock() 387 st.bc = bc 388 st.mu.Unlock() 389 st.LogEventTime("using_buildlet", bc.IPPort()) 390 391 return bc, nil 392 } 393 394 func (st *buildStatus) build() error { 395 if deps := st.conf.GoDeps; len(deps) > 0 { 396 ctx, cancel := context.WithTimeout(st.ctx, 30*time.Second) 397 defer cancel() 398 for _, dep := range deps { 399 has, err := st.checkDep(ctx, dep) 400 if err != nil { 401 fmt.Fprintf(st, "Error checking whether commit %s includes ancestor %s: %v\n", st.Rev, dep, err) 402 return err 403 } 404 if !has { 405 st.LogEventTime(eventSkipBuildMissingDep) 406 fmt.Fprintf(st, "skipping build; commit %s lacks ancestor %s\n", st.Rev, dep) 407 return errSkipBuildDueToDeps 408 } 409 } 410 cancel() 411 } 412 413 pool.CoordinatorProcess().PutBuildRecord(st.buildRecord()) 414 415 bc, err := st.getBuildlet() 416 if err != nil { 417 return err 418 } 419 defer bc.Close() 420 421 if st.useSnapshot() { 422 if err := st.writeGoSnapshot(); err != nil { 423 return err 424 } 425 } else { 426 // Write the Go source and bootstrap tool chain in parallel. 427 var grp syncutil.Group 428 grp.Go(st.writeGoSource) 429 grp.Go(st.writeBootstrapToolchain) 430 if err := grp.Err(); err != nil { 431 return err 432 } 433 } 434 435 execStartTime := time.Now() 436 fmt.Fprintf(st, "%s at %v", st.Name, st.Rev) 437 if st.IsSubrepo() { 438 fmt.Fprintf(st, " building %v at %v", st.SubName, st.SubRev) 439 } 440 fmt.Fprint(st, "\n\n") 441 442 makeTest := st.CreateSpan("make_and_test") // warning: magic event named used by handleLogs 443 444 remoteErr, err := st.runAllSharded() 445 makeTest.Done(err) 446 447 // bc (aka st.bc) may be invalid past this point, so let's 448 // close it to make sure we don't accidentally use it. 449 bc.Close() 450 451 doneMsg := "all tests passed" 452 if remoteErr != nil { 453 doneMsg = "with test failures" 454 } else if err != nil { 455 doneMsg = "comm error: " + err.Error() 456 } 457 // If a build fails multiple times due to communication 458 // problems with the buildlet, assume something's wrong with 459 // the buildlet or machine and fail the build, rather than 460 // looping forever. This promotes the err (communication 461 // error) to a remoteErr (an error that occurred remotely and 462 // is terminal). 463 if rerr := st.repeatedCommunicationError(err); rerr != nil { 464 remoteErr = rerr 465 err = nil 466 doneMsg = "communication error to buildlet (promoted to terminal error): " + rerr.Error() 467 fmt.Fprintf(st, "\n%s\n", doneMsg) 468 } 469 if err != nil { 470 // Return the error *before* we create the magic 471 // "done" event. (which the try coordinator looks for) 472 return err 473 } 474 st.LogEventTime(eventDone, doneMsg) 475 476 if devPause { 477 st.LogEventTime("DEV_MAIN_SLEEP") 478 time.Sleep(5 * time.Minute) 479 } 480 481 if st.trySet == nil { 482 buildLog := st.logs() 483 if remoteErr != nil { 484 // If we just have the line-or-so little 485 // banner at top, that means we didn't get any 486 // interesting output from the remote side, so 487 // include the remoteErr text. Otherwise, 488 // assume that remoteErr is redundant with the 489 // buildlog text itself. 490 if strings.Count(buildLog, "\n") < 10 { 491 buildLog += "\n" + remoteErr.Error() 492 } 493 } 494 if err := recordResult(st.BuilderRev, remoteErr == nil, buildLog, time.Since(execStartTime)); err != nil { 495 if remoteErr != nil { 496 return fmt.Errorf("Remote error was %q but failed to report it to the dashboard: %v", remoteErr, err) 497 } 498 return fmt.Errorf("Build succeeded but failed to report it to the dashboard: %v", err) 499 } 500 } 501 if remoteErr != nil { 502 return remoteErr 503 } 504 return nil 505 } 506 507 func (st *buildStatus) HasBuildlet() bool { return atomic.LoadInt32(&st.hasBuildlet) != 0 } 508 509 // useKeepGoingFlag reports whether this build should use -k flag of 'go tool 510 // dist test', which makes it keep going even when some tests have failed. 511 func (st *buildStatus) useKeepGoingFlag() bool { 512 // For now, keep going for post-submit builders on release branches, 513 // because we prioritize seeing more complete test results over failing fast. 514 // Later on, we may start doing this all post-submit builders on all branches. 515 // See golang.org/issue/14305. 516 // 517 // TODO(golang.org/issue/36181): A more ideal long term solution is one that reports 518 // a failure fast, but still keeps going to make all other test results available. 519 return !st.isTry() && strings.HasPrefix(st.branch(), "release-branch.go") 520 } 521 522 // isTry reports whether the build is a part of a TryBot (pre-submit) run. 523 // It may be a normal TryBot (part of the default try set) or a SlowBot. 524 func (st *buildStatus) isTry() bool { return st.trySet != nil } 525 526 // isSlowBot reports whether the build is an explicitly requested SlowBot. 527 func (st *buildStatus) isSlowBot() bool { 528 if st.trySet == nil { 529 return false 530 } 531 for _, conf := range st.trySet.slowBots { 532 if st.conf == conf { 533 return true 534 } 535 } 536 return false 537 } 538 539 func (st *buildStatus) buildRecord() *types.BuildRecord { 540 rec := &types.BuildRecord{ 541 ID: st.buildID, 542 ProcessID: processID, 543 StartTime: st.startTime, 544 IsTry: st.isTry(), 545 IsSlowBot: st.isSlowBot(), 546 GoRev: st.Rev, 547 Rev: st.SubRevOrGoRev(), 548 Repo: st.RepoOrGo(), 549 Builder: st.Name, 550 OS: st.conf.GOOS(), 551 Arch: st.conf.GOARCH(), 552 } 553 554 // Log whether we used COS, so we can do queries to analyze 555 // Kubernetes vs COS performance for containers. 556 if st.conf.IsContainer() && pool.ForHost(st.conf.HostConfig()) == pool.NewGCEConfiguration().BuildletPool() { 557 rec.ContainerHost = "cos" 558 } 559 560 st.mu.Lock() 561 defer st.mu.Unlock() 562 // TODO: buildlet instance name 563 if !st.done.IsZero() { 564 rec.EndTime = st.done 565 rec.LogURL = st.logURL 566 rec.Seconds = rec.EndTime.Sub(rec.StartTime).Seconds() 567 if st.succeeded { 568 rec.Result = "ok" 569 } else { 570 rec.Result = "fail" 571 } 572 } 573 return rec 574 } 575 576 func (st *buildStatus) SpanRecord(sp *schedule.Span, err error) *types.SpanRecord { 577 rec := &types.SpanRecord{ 578 BuildID: st.buildID, 579 IsTry: st.isTry(), 580 GoRev: st.Rev, 581 Rev: st.SubRevOrGoRev(), 582 Repo: st.RepoOrGo(), 583 Builder: st.Name, 584 OS: st.conf.GOOS(), 585 Arch: st.conf.GOARCH(), 586 587 Event: sp.Event(), 588 Detail: sp.OptText(), 589 StartTime: sp.Start(), 590 EndTime: sp.End(), 591 Seconds: sp.End().Sub(sp.Start()).Seconds(), 592 } 593 if err != nil { 594 rec.Error = err.Error() 595 } 596 return rec 597 } 598 599 // goBuilder returns a GoBuilder for this buildStatus. 600 func (st *buildStatus) goBuilder() buildgo.GoBuilder { 601 forceMake := true 602 if st.RevBranch == "release-branch.go1.20" { 603 // The concept of "broken ports" and -force flag didn't 604 // exist prior to Go 1.21. See go.dev/issue/56679. 605 // TODO: Remove this condition when Go 1.20 is no longer supported. 606 forceMake = false 607 } 608 return buildgo.GoBuilder{ 609 Logger: st, 610 BuilderRev: st.BuilderRev, 611 Conf: st.conf, 612 Goroot: "go", 613 Force: forceMake, 614 } 615 } 616 617 // runAllSharded runs make.bash and then shards the test execution. 618 // remoteErr and err are as described at the top of this file. 619 // 620 // After runAllSharded returns, the caller must assume that st.bc 621 // might be invalid (It's possible that only one of the helper 622 // buildlets survived). 623 func (st *buildStatus) runAllSharded() (remoteErr, err error) { 624 st.getHelpersReadySoon() 625 626 if !st.useSnapshot() { 627 remoteErr, err = st.goBuilder().RunMake(st.ctx, st.bc, st) 628 if err != nil { 629 return nil, err 630 } 631 if remoteErr != nil { 632 return fmt.Errorf("build failed: %v", remoteErr), nil 633 } 634 } 635 if st.conf.StopAfterMake { 636 return nil, nil 637 } 638 639 if err := st.doSnapshot(st.bc); err != nil { 640 return nil, err 641 } 642 643 switch { 644 case st.conf.RunBench: 645 remoteErr, err = st.runBenchmarkTests() 646 case st.IsSubrepo(): 647 remoteErr, err = st.runSubrepoTests() 648 case st.conf.IsCrossCompileOnly(): 649 remoteErr, err = st.buildTestPackages() 650 default: 651 // Only run platform tests if we're not cross-compiling. 652 // dist can't actually build test packages without running them yet. 653 // See #58297. 654 remoteErr, err = st.runTests(st.getHelpers()) 655 } 656 657 if err == errBuildletsGone { 658 // Don't wrap this error. TODO: use xerrors. 659 return nil, errBuildletsGone 660 } 661 if err != nil { 662 return nil, fmt.Errorf("runTests: %v", err) 663 } 664 if remoteErr != nil { 665 return fmt.Errorf("tests failed: %v", remoteErr), nil 666 } 667 return nil, nil 668 } 669 670 // buildTestPackages runs `go tool dist test -compile-only`, which builds all standard 671 // library test packages but does not run any tests. Used in cross-compilation modes. 672 func (st *buildStatus) buildTestPackages() (remoteErr, err error) { 673 if st.RevBranch == "release-branch.go1.20" { 674 // Go 1.20 doesn't support `go tool dist test -compile-only` very well. 675 // TODO(mknyszek): Remove this condition when Go 1.20 is no longer supported. 676 return nil, nil 677 } 678 sp := st.CreateSpan("build_test_pkgs") 679 remoteErr, err = st.bc.Exec(st.ctx, path.Join("go", "bin", "go"), buildlet.ExecOpts{ 680 Output: st, 681 Debug: true, 682 Args: []string{"tool", "dist", "test", "-compile-only"}, 683 }) 684 if err != nil { 685 sp.Done(err) 686 return nil, err 687 } 688 if remoteErr != nil { 689 sp.Done(remoteErr) 690 return fmt.Errorf("go tool dist test -compile-only failed: %v", remoteErr), nil 691 } 692 sp.Done(nil) 693 return nil, nil 694 } 695 696 func (st *buildStatus) doSnapshot(bc buildlet.Client) error { 697 // If we're using a pre-built snapshot, don't make another. 698 if st.useSnapshot() { 699 return nil 700 } 701 if st.conf.SkipSnapshot { 702 return nil 703 } 704 if pool.NewGCEConfiguration().BuildEnv().SnapBucket == "" { 705 // Build environment isn't configured to do snapshots. 706 return nil 707 } 708 if err := st.cleanForSnapshot(bc); err != nil { 709 return fmt.Errorf("cleanForSnapshot: %v", err) 710 } 711 if err := st.writeSnapshot(bc); err != nil { 712 return fmt.Errorf("writeSnapshot: %v", err) 713 } 714 return nil 715 } 716 717 func (st *buildStatus) writeGoSnapshot() (err error) { 718 return st.writeGoSnapshotTo(st.Rev, "go") 719 } 720 721 func (st *buildStatus) writeGoSnapshotTo(rev, dir string) (err error) { 722 sp := st.CreateSpan("write_snapshot_tar") 723 defer func() { sp.Done(err) }() 724 725 snapshotURL := pool.NewGCEConfiguration().BuildEnv().SnapshotURL(st.Name, rev) 726 727 if err := st.bc.PutTarFromURL(st.ctx, snapshotURL, dir); err != nil { 728 return fmt.Errorf("failed to put baseline snapshot to buildlet: %v", err) 729 } 730 return nil 731 } 732 733 func (st *buildStatus) writeGoSource() error { 734 return st.writeGoSourceTo(st.bc, st.Rev, "go") 735 } 736 737 func (st *buildStatus) writeGoSourceTo(bc buildlet.Client, rev, dir string) error { 738 // Write the VERSION file. 739 sp := st.CreateSpan("write_version_tar") 740 if err := bc.PutTar(st.ctx, buildgo.VersionTgz(rev), dir); err != nil { 741 return sp.Done(fmt.Errorf("writing VERSION tgz: %v", err)) 742 } 743 744 srcTar, err := sourcecache.GetSourceTgz(st, "go", rev) 745 if err != nil { 746 return err 747 } 748 sp = st.CreateSpan("write_go_src_tar") 749 if err := bc.PutTar(st.ctx, srcTar, dir); err != nil { 750 return sp.Done(fmt.Errorf("writing tarball from Gerrit: %v", err)) 751 } 752 return sp.Done(nil) 753 } 754 755 func (st *buildStatus) writeBootstrapToolchain() error { 756 u := st.conf.GoBootstrapURL(pool.NewGCEConfiguration().BuildEnv()) 757 if u == "" { 758 return nil 759 } 760 const bootstrapDir = "go1.4" // might be newer; name is the default 761 sp := st.CreateSpan("write_go_bootstrap_tar") 762 return sp.Done(st.bc.PutTarFromURL(st.ctx, u, bootstrapDir)) 763 } 764 765 func (st *buildStatus) cleanForSnapshot(bc buildlet.Client) error { 766 sp := st.CreateSpan("clean_for_snapshot") 767 return sp.Done(bc.RemoveAll(st.ctx, 768 "go/doc/gopher", 769 "go/pkg/bootstrap", 770 )) 771 } 772 773 func (st *buildStatus) writeSnapshot(bc buildlet.Client) (err error) { 774 sp := st.CreateSpan("write_snapshot_to_gcs") 775 defer func() { sp.Done(err) }() 776 // A typical Go snapshot tarball in April 2022 is around 150 MB in size. 777 // Builders with a fast uplink speed can upload the tar within seconds or minutes. 778 // Reverse builders might be far away on the network, so be more lenient for them. 779 // (Fast builds require a sufficiently fast uplink speed or turning off snapshots, 780 // so the timeout here is mostly an upper bound to prevent infinite hangs.) 781 timeout := 5 * time.Minute 782 if st.conf.IsReverse() { 783 timeout *= 3 784 } 785 ctx, cancel := context.WithTimeout(st.ctx, timeout) 786 defer cancel() 787 788 tsp := st.CreateSpan("fetch_snapshot_reader_from_buildlet") 789 tgz, err := bc.GetTar(ctx, "go") 790 tsp.Done(err) 791 if err != nil { 792 return err 793 } 794 defer tgz.Close() 795 796 sc := pool.NewGCEConfiguration().StorageClient() 797 if sc == nil { 798 return errors.New("GCE configuration missing storage client") 799 } 800 bucket := pool.NewGCEConfiguration().BuildEnv().SnapBucket 801 if bucket == "" { 802 return errors.New("build environment missing snapshot bucket") 803 } 804 wr := sc.Bucket(bucket).Object(st.SnapshotObjectName()).NewWriter(ctx) 805 wr.ContentType = "application/octet-stream" 806 if n, err := io.Copy(wr, tgz); err != nil { 807 st.logf("failed to write snapshot to GCS after copying %d bytes: %v", n, err) 808 return err 809 } 810 811 return wr.Close() 812 } 813 814 // toolchainBaselineCommit determines the toolchain baseline commit for this 815 // benchmark run. 816 func (st *buildStatus) toolchainBaselineCommit() (baseline string, err error) { 817 sp := st.CreateSpan("list_go_releases") 818 defer func() { sp.Done(err) }() 819 820 // TODO(prattmic): Cache responses for a while. These won't change often. 821 res, err := maintnerClient.ListGoReleases(st.ctx, &apipb.ListGoReleasesRequest{}) 822 if err != nil { 823 return "", err 824 } 825 826 releases := res.GetReleases() 827 if len(releases) == 0 { 828 return "", fmt.Errorf("no Go releases: %v", res) 829 } 830 831 if st.RevBranch == "master" { 832 // Testing master, baseline is latest release. 833 return releases[0].GetTagCommit(), nil 834 } 835 836 // Testing release branch. Baseline is latest patch version of this 837 // release. 838 for _, r := range releases { 839 if st.RevBranch == r.GetBranchName() { 840 return r.GetTagCommit(), nil 841 } 842 } 843 844 return "", fmt.Errorf("cannot find latest release for %s", st.RevBranch) 845 } 846 847 // Temporarily hard-code the subrepo baseline commits to use. 848 // 849 // TODO(rfindley): in the future, we should use the latestRelease method to 850 // automatically choose the latest patch release of the previous minor version 851 // (e.g. v0.11.x while we're working on v0.12.y). 852 var subrepoBaselines = map[string]string{ 853 "tools": "6ce74ceaddcc4ff081d22ae134f4264a667d394f", // gopls@v0.11.0, with additional instrumentation for memory and CPU usage 854 } 855 856 // subrepoBaselineCommit determines the baseline commit for this subrepo benchmark run. 857 func (st *buildStatus) subrepoBaselineCommit() (baseline string, err error) { 858 commit, ok := subrepoBaselines[st.SubName] 859 if !ok { 860 return "", fmt.Errorf("unknown subrepo for benchmarking %q", st.SubName) 861 } 862 return commit, nil 863 } 864 865 // latestRelease returns the latest release version for a module in subrepo. If 866 // submodule is non-empty, it is the path to a subdirectory containing the 867 // submodule of interest (for example submodule is "gopls" if we are 868 // considering the module golang.org/x/tools/gopls). Otherwise the module is 869 // assumed to be at repo root. 870 // 871 // It is currently unused, but preserved for future use by the 872 // subrepoBaselineCommit method. 873 func (st *buildStatus) latestRelease(submodule string) (string, error) { 874 // Baseline is the latest gopls release tag (but not prerelease). 875 gerritClient := pool.NewGCEConfiguration().GerritClient() 876 tags, err := gerritClient.GetProjectTags(st.ctx, st.SubName) 877 if err != nil { 878 return "", fmt.Errorf("error fetching tags for %q: %w", st.SubName, err) 879 } 880 881 var versions []string 882 revisions := make(map[string]string) 883 prefix := "refs/tags" 884 if submodule != "" { 885 prefix += "/" + submodule // e.g. gopls tags are "gopls/vX.Y.Z" 886 } 887 for ref, ti := range tags { 888 if !strings.HasPrefix(ref, prefix) { 889 continue 890 } 891 version := ref[len(prefix):] 892 versions = append(versions, version) 893 revisions[version] = ti.Revision 894 } 895 896 semver.Sort(versions) 897 898 // Return latest non-prerelease version. 899 for i := len(versions) - 1; i >= 0; i-- { 900 ver := versions[i] 901 if !semver.IsValid(ver) { 902 continue 903 } 904 if semver.Prerelease(ver) != "" { 905 continue 906 } 907 return revisions[ver], nil 908 } 909 910 return "", fmt.Errorf("no valid versions found in %+v", versions) 911 } 912 913 // reportErr reports an error to Stackdriver. 914 func (st *buildStatus) reportErr(err error) { 915 gceErrsClient := pool.NewGCEConfiguration().ErrorsClient() 916 if gceErrsClient == nil { 917 // errorsClient is nil in dev environments. 918 return 919 } 920 921 ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) 922 defer cancel() 923 924 err = fmt.Errorf("buildID: %v, name: %s, hostType: %s, error: %v", st.buildID, st.conf.Name, st.conf.HostType, err) 925 gceErrsClient.ReportSync(ctx, errorreporting.Entry{Error: err}) 926 } 927 928 // distTestList uses 'go tool dist test -list' to get a list of dist test names. 929 // 930 // As of Go 1.21, the dist test naming pattern has changed to always be in the 931 // form of "<pkg>[:<variant>]", where "<pkg>" means what used to be previously 932 // named "go_test:<pkg>". distTestList maps those new dist test names back to 933 // that previous format, a combination of "go_test[_bench]:<pkg>" and others. 934 func (st *buildStatus) distTestList() (names []distTestName, remoteErr, err error) { 935 workDir, err := st.bc.WorkDir(st.ctx) 936 if err != nil { 937 err = fmt.Errorf("distTestList, WorkDir: %v", err) 938 return 939 } 940 goroot := st.conf.FilePathJoin(workDir, "go") 941 942 args := []string{"tool", "dist", "test", "--no-rebuild", "--list"} 943 if st.conf.IsRace() { 944 args = append(args, "--race") 945 } 946 if st.conf.CompileOnly { 947 args = append(args, "--compile-only") 948 } 949 var buf bytes.Buffer 950 remoteErr, err = st.bc.Exec(st.ctx, "./go/bin/go", buildlet.ExecOpts{ 951 Output: &buf, 952 ExtraEnv: append(st.conf.Env(), "GOROOT="+goroot), 953 OnStartExec: func() { st.LogEventTime("discovering_tests") }, 954 Path: []string{st.conf.FilePathJoin("$WORKDIR", "go", "bin"), "$PATH"}, 955 Args: args, 956 }) 957 if remoteErr != nil { 958 remoteErr = fmt.Errorf("Remote error: %v, %s", remoteErr, buf.Bytes()) 959 err = nil 960 return 961 } 962 if err != nil { 963 err = fmt.Errorf("Exec error: %v, %s", err, buf.Bytes()) 964 return 965 } 966 // To avoid needing to update all the existing dist test adjust policies, 967 // it's easier to remap new dist test names in "<pkg>[:<variant>]" format 968 // to ones used in Go 1.20 and prior. Do that for now. 969 for _, test := range go120DistTestNames(strings.Fields(buf.String())) { 970 isNormalTry := st.isTry() && !st.isSlowBot() 971 if !st.conf.ShouldRunDistTest(test.Old, isNormalTry) { 972 continue 973 } 974 names = append(names, test) 975 } 976 return names, nil, nil 977 } 978 979 // go120DistTestNames converts a list of dist test names from 980 // an arbitrary Go distribution to the format used in Go 1.20 981 // and prior versions. (Go 1.21 introduces a simpler format.) 982 // 983 // This exists only to avoid rewriting current dist adjust policies. 984 // We wish to avoid new dist adjust policies, but if they're truly needed, 985 // they can choose to start using new dist test names instead. 986 func go120DistTestNames(names []string) []distTestName { 987 if len(names) == 0 { 988 // Only happens if there's a problem, but no need to panic. 989 return nil 990 } else if strings.HasPrefix(names[0], "go_test:") { 991 // In Go 1.21 and newer no dist tests have a "go_test:" prefix. 992 // In Go 1.20 and older, go tool dist test -list always returns 993 // at least one "go_test:*" test first. 994 // So if we see it, the list is already in Go 1.20 format. 995 var s []distTestName 996 for _, old := range names { 997 s = append(s, distTestName{old, old}) 998 } 999 return s 1000 } 1001 // Remap the new Go 1.21+ dist test names to old ones. 1002 var s []distTestName 1003 for _, new := range names { 1004 var old string 1005 switch pkg, variant, _ := strings.Cut(new, ":"); { 1006 // Special cases. Enough to cover what's used by old dist 1007 // adjust policies. Not much use in going far beyond that. 1008 case variant == "nolibgcc": 1009 old = "nolibgcc:" + pkg 1010 case variant == "race": 1011 old = "race" 1012 case variant == "moved_goroot": 1013 old = "moved_goroot" 1014 case pkg == "cmd/internal/testdir": 1015 if variant == "" { 1016 // Handle this too for when we stop doing special-case sharding only for testdir inside dist. 1017 variant = "0_1" 1018 } 1019 old = "test:" + variant 1020 case pkg == "cmd/api" && variant == "check": 1021 old = "api" 1022 case pkg == "cmd/internal/bootstrap_test": 1023 old = "reboot" 1024 1025 // Easy regular cases. 1026 case variant == "": 1027 old = "go_test:" + pkg 1028 case variant == "racebench": 1029 old = "go_test_bench:" + pkg 1030 1031 // Neither a known special case nor a regular case. 1032 default: 1033 old = new // Less bad than leaving it empty. 1034 } 1035 s = append(s, distTestName{Old: old, Raw: new}) 1036 } 1037 return s 1038 } 1039 1040 type token struct{} 1041 1042 // newTestSet returns a new testSet given the dist test names (from "go tool dist test -list") 1043 // and benchmark items. 1044 func (st *buildStatus) newTestSet(testStats *buildstats.TestStats, names []distTestName) (*testSet, error) { 1045 set := &testSet{ 1046 st: st, 1047 testStats: testStats, 1048 } 1049 for _, name := range names { 1050 set.items = append(set.items, &testItem{ 1051 set: set, 1052 name: name, 1053 duration: testStats.Duration(st.BuilderRev.Name, name.Old), 1054 take: make(chan token, 1), 1055 done: make(chan token), 1056 }) 1057 } 1058 return set, nil 1059 } 1060 1061 var ( 1062 testStats atomic.Value // of *buildstats.TestStats 1063 testStatsLoader singleflight.Group 1064 ) 1065 1066 func getTestStats(sl spanlog.Logger) *buildstats.TestStats { 1067 sp := sl.CreateSpan("get_test_stats") 1068 ts, ok := testStats.Load().(*buildstats.TestStats) 1069 if ok && ts.AsOf.After(time.Now().Add(-1*time.Hour)) { 1070 sp.Done(nil) 1071 return ts 1072 } 1073 v, err, _ := testStatsLoader.Do("", func() (interface{}, error) { 1074 log.Printf("getTestStats: reloading from BigQuery...") 1075 sp := sl.CreateSpan("query_test_stats") 1076 ctx, cancel := context.WithTimeout(context.Background(), time.Minute) 1077 defer cancel() 1078 ts, err := buildstats.QueryTestStats(ctx, pool.NewGCEConfiguration().BuildEnv()) 1079 sp.Done(err) 1080 if err != nil { 1081 log.Printf("getTestStats: error: %v", err) 1082 return nil, err 1083 } 1084 testStats.Store(ts) 1085 return ts, nil 1086 }) 1087 if err != nil { 1088 sp.Done(err) 1089 return nil 1090 } 1091 sp.Done(nil) 1092 return v.(*buildstats.TestStats) 1093 } 1094 1095 func (st *buildStatus) runSubrepoTests() (remoteErr, err error) { 1096 st.LogEventTime("fetching_subrepo", st.SubName) 1097 1098 workDir, err := st.bc.WorkDir(st.ctx) 1099 if err != nil { 1100 err = fmt.Errorf("error discovering workdir for helper %s: %v", st.bc.IPPort(), err) 1101 return nil, err 1102 } 1103 goroot := st.conf.FilePathJoin(workDir, "go") 1104 gopath := st.conf.FilePathJoin(workDir, "gopath") 1105 1106 // A goTestRun represents a single invocation of the 'go test' command. 1107 type goTestRun struct { 1108 Dir string // Directory where 'go test' should be executed. 1109 Patterns []string // Import path patterns to provide to 'go test'. 1110 } 1111 // Test all packages selected by the "./..." pattern at the repository root. 1112 // (If there are modules in subdirectories, they'll be found and handled below.) 1113 repoPath := importPathOfRepo(st.SubName) 1114 testRuns := []goTestRun{{ 1115 Dir: "gopath/src/" + repoPath, 1116 Patterns: []string{"./..."}, 1117 }} 1118 1119 // Check out the provided sub-repo to the buildlet's workspace so we 1120 // can find go.mod files and run tests in it. 1121 { 1122 tgz, err := sourcecache.GetSourceTgz(st, st.SubName, st.SubRev) 1123 if errors.As(err, new(sourcecache.TooBigError)) { 1124 // Source being too big is a non-retryable error. 1125 return err, nil 1126 } else if err != nil { 1127 return nil, err 1128 } 1129 err = st.bc.PutTar(st.ctx, tgz, "gopath/src/"+repoPath) 1130 if err != nil { 1131 return nil, err 1132 } 1133 } 1134 1135 // Look for inner modules, in order to test them too. See golang.org/issue/32528. 1136 sp := st.CreateSpan("listing_subrepo_modules", st.SubName) 1137 err = st.bc.ListDir(st.ctx, "gopath/src/"+repoPath, buildlet.ListDirOpts{Recursive: true}, func(e buildlet.DirEntry) { 1138 goModFile := path.Base(e.Name()) == "go.mod" && !e.IsDir() 1139 if !goModFile { 1140 return 1141 } 1142 // Found a go.mod file in a subdirectory, which indicates the root of a module. 1143 modulePath := path.Join(repoPath, path.Dir(e.Name())) 1144 if modulePath == repoPath { 1145 // This is the go.mod file at the repository root. 1146 // It's already a part of testRuns, so skip it. 1147 return 1148 } else if ignoredByGoTool(modulePath) || isVendored(modulePath) { 1149 // go.mod file is in a directory we're not looking to support, so skip it. 1150 return 1151 } 1152 // Add an additional test run entry that will test all packages in this module. 1153 testRuns = append(testRuns, goTestRun{ 1154 Dir: "gopath/src/" + modulePath, 1155 Patterns: []string{"./..."}, 1156 }) 1157 }) 1158 sp.Done(err) 1159 if err != nil { 1160 return nil, err 1161 } 1162 1163 // Finally, execute all of the test runs. 1164 // If any fail, keep going so that all test results are included in the output. 1165 1166 sp = st.CreateSpan("running_subrepo_tests", st.SubName) 1167 defer func() { sp.Done(err) }() 1168 1169 env := append(st.conf.Env(), 1170 "GOROOT="+goroot, 1171 "GOPATH="+gopath, 1172 ) 1173 env = append(env, st.modulesEnv()...) 1174 1175 args := []string{"test"} 1176 if st.conf.CompileOnly { 1177 // Build all packages, but avoid running the binary by executing /bin/true for the tests. 1178 // We assume for a compile-only build we're just running on a Linux system. 1179 args = append(args, "-exec", "/bin/true") 1180 } else { 1181 if !st.conf.IsLongTest() { 1182 args = append(args, "-short") 1183 } 1184 if st.conf.IsRace() { 1185 args = append(args, "-race") 1186 } 1187 if scale := st.conf.GoTestTimeoutScale(); scale != 1 { 1188 const goTestDefaultTimeout = 10 * time.Minute // Default value taken from Go 1.20. 1189 args = append(args, "-timeout="+(goTestDefaultTimeout*time.Duration(scale)).String()) 1190 } 1191 } 1192 1193 var remoteErrors []error 1194 for _, tr := range testRuns { 1195 rErr, err := st.bc.Exec(st.ctx, "./go/bin/go", buildlet.ExecOpts{ 1196 Debug: true, // make buildlet print extra debug in output for failures 1197 Output: st, 1198 Dir: tr.Dir, 1199 ExtraEnv: env, 1200 Path: []string{st.conf.FilePathJoin("$WORKDIR", "go", "bin"), "$PATH"}, 1201 Args: append(args, tr.Patterns...), 1202 }) 1203 if err != nil { 1204 // A network/communication error. Give up here; 1205 // the caller can retry as it sees fit. 1206 return nil, err 1207 } else if rErr != nil { 1208 // An error occurred remotely and is terminal, but we want to 1209 // keep testing other packages and report their failures too, 1210 // rather than stopping short. 1211 remoteErrors = append(remoteErrors, rErr) 1212 } 1213 } 1214 if len(remoteErrors) > 0 { 1215 return multiError(remoteErrors), nil 1216 } 1217 return nil, nil 1218 } 1219 1220 // ignoredByGoTool reports whether the given import path corresponds 1221 // to a directory that would be ignored by the go tool. 1222 // 1223 // The logic of the go tool for ignoring directories is documented at 1224 // https://golang.org/cmd/go/#hdr-Package_lists_and_patterns: 1225 // 1226 // Directory and file names that begin with "." or "_" are ignored 1227 // by the go tool, as are directories named "testdata". 1228 func ignoredByGoTool(importPath string) bool { 1229 for _, el := range strings.Split(importPath, "/") { 1230 if strings.HasPrefix(el, ".") || strings.HasPrefix(el, "_") || el == "testdata" { 1231 return true 1232 } 1233 } 1234 return false 1235 } 1236 1237 // isVendored reports whether the given import path corresponds 1238 // to a Go package that is inside a vendor directory. 1239 // 1240 // The logic for what is considered a vendor directory is documented at 1241 // https://golang.org/cmd/go/#hdr-Vendor_Directories. 1242 func isVendored(importPath string) bool { 1243 return strings.HasPrefix(importPath, "vendor/") || 1244 strings.Contains(importPath, "/vendor/") 1245 } 1246 1247 // multiError is a concatenation of multiple errors. 1248 // There must be one or more errors, and all must be non-nil. 1249 type multiError []error 1250 1251 // Error concatenates all error strings into a single string, 1252 // using a semicolon and space as a separator. 1253 func (m multiError) Error() string { 1254 if len(m) == 1 { 1255 return m[0].Error() 1256 } 1257 1258 var b strings.Builder 1259 for i, e := range m { 1260 if i != 0 { 1261 b.WriteString("; ") 1262 } 1263 b.WriteString(e.Error()) 1264 } 1265 return b.String() 1266 } 1267 1268 // internalModuleProxy returns the GOPROXY environment value to use for 1269 // most module-enabled tests. 1270 // 1271 // We go through an internal (10.0.0.0/8) proxy that then hits 1272 // https://proxy.golang.org/ so we're still able to firewall 1273 // non-internal outbound connections on builder nodes. 1274 // 1275 // This internalModuleProxy func in prod mode (when running on GKE) returns an 1276 // http URL to the current GKE pod's IP with a Kubernetes NodePort service port 1277 // that forwards back to the coordinator's 8123. See comment below. 1278 func internalModuleProxy() string { 1279 // We run a NodePort service on each GKE node 1280 // (cmd/coordinator/module-proxy-service.yaml) on port 30157 1281 // that maps back the coordinator's port 8123. (We could round 1282 // robin over all the GKE nodes' IPs if we wanted, but the 1283 // coordinator is running on GKE so our node by definition is 1284 // up, so just use it. It won't be much traffic.) 1285 // TODO: migrate to a GKE internal load balancer with an internal static IP 1286 // once we migrate symbolic-datum-552 off a Legacy VPC network to the modern 1287 // scheme that supports internal static IPs. 1288 return "http://" + pool.NewGCEConfiguration().GKENodeHostname() + ":30157" 1289 } 1290 1291 // modulesEnv returns the extra module-specific environment variables 1292 // to append to tests. 1293 func (st *buildStatus) modulesEnv() (env []string) { 1294 // GOPROXY 1295 switch { 1296 case st.SubName == "" && !st.conf.OutboundNetworkAllowed(): 1297 env = append(env, "GOPROXY=off") 1298 case st.conf.PrivateGoProxy(): 1299 // Don't add GOPROXY, the builder is pre-configured. 1300 case pool.NewGCEConfiguration().BuildEnv() == nil || !pool.NewGCEConfiguration().BuildEnv().IsProd: 1301 // Dev mode; use the system default. 1302 env = append(env, "GOPROXY="+os.Getenv("GOPROXY")) 1303 case st.conf.IsGCE(): 1304 // On GCE; the internal proxy is accessible, prefer that. 1305 env = append(env, "GOPROXY="+internalModuleProxy()) 1306 default: 1307 // Everything else uses the public proxy. 1308 env = append(env, "GOPROXY=https://proxy.golang.org") 1309 } 1310 1311 return env 1312 } 1313 1314 // runBenchmarkTests runs benchmarks from x/benchmarks when RunBench is set. 1315 func (st *buildStatus) runBenchmarkTests() (remoteErr, err error) { 1316 if st.SubName == "" { 1317 return nil, fmt.Errorf("benchmark tests must run on a subrepo") 1318 } 1319 1320 // Repository under test. 1321 // 1322 // When running benchmarks, there are numerous variables: 1323 // 1324 // * Go experiment version 1325 // * Go baseline version 1326 // * Subrepo experiment version (if benchmarking subrepo) 1327 // * Subrepo baseline version (if benchmarking subrepo) 1328 // * x/benchmarks version (which defines which benchmarks run and how 1329 // regardless of which repo is under test) 1330 // 1331 // For benchmarking of the main Go repo, the first three are used. 1332 // Ideally, the coordinator scheduler would handle the combinatorics on 1333 // testing these. Unfortunately, the coordinator doesn't handle 1334 // three-way combinations. By running Go benchmarks as a "subrepo test" 1335 // for x/benchmark, we can at least get the scheduler to handle the 1336 // x/benchmarks version (st.SubRev) and Go experiment version (st.Rev). 1337 // The Go baseline version is simply selected as the most recent 1338 // previous release tag (e.g., 1.18.x on release-branch.go1.18) at the 1339 // time this test runs (st.installBaselineToolchain below). 1340 // 1341 // When benchmarking a subrepo, we want to compare a subrepo experiment 1342 // version vs subrepo baseline version (_not_ compare a single subrepo 1343 // version vs baseline/experiment Go versions). We do need to build the 1344 // subrepo with some version of Go, so we choose to use the latest 1345 // released version at the time of testing (same as Go baseline above). 1346 // We'd like the coordinator to handle the combination of x/benchmarks 1347 // and x/<subrepo>, however the coordinator can't do multiple subrepo 1348 // combinations. 1349 // 1350 // Thus, we run these as typical subrepo builders, which gives us the 1351 // subrepo experiment version and a Go experiment version (which we 1352 // will ignore). The Go baseline version is selected as above, and the 1353 // subrepo baseline version is selected as the latest (non-pre-release) 1354 // tag in the subrepo. 1355 // 1356 // This setup is suboptimal because the caller is installing an 1357 // experiment Go version that we won't use when building the subrepo 1358 // (we'll use the Go baseline version). We'll also end up with 1359 // duplicate runs with identical subrepo experiment/baseline and 1360 // x/benchmarks versions, as builds will trigger on every commit to the 1361 // Go repo. Limiting subrepo builders to release branches can 1362 // significantly reduce the number of Go commit triggers. 1363 // 1364 // TODO(prattmic): Cleaning this up is good future work, but these 1365 // deficiencies are not particularly problematic and avoid the need for 1366 // major changes in other parts of the coordinator. 1367 repo := st.SubName 1368 if repo == "benchmarks" { 1369 repo = "go" 1370 } 1371 1372 const ( 1373 baselineDir = "go-baseline" 1374 benchmarksDir = "benchmarks" 1375 subrepoDir = "subrepo" 1376 subrepoBaselineDir = "subrepo-baseline" 1377 ) 1378 1379 workDir, err := st.bc.WorkDir(st.ctx) 1380 if err != nil { 1381 err = fmt.Errorf("error discovering workdir for helper %s: %v", st.bc.IPPort(), err) 1382 return nil, err 1383 } 1384 goroot := st.conf.FilePathJoin(workDir, "go") 1385 baselineGoroot := st.conf.FilePathJoin(workDir, baselineDir) 1386 gopath := st.conf.FilePathJoin(workDir, "gopath") 1387 1388 // Install baseline toolchain in addition to the experiment toolchain. 1389 toolchainBaselineCommit, remoteErr, err := st.installBaselineToolchain(goroot, baselineDir) 1390 if remoteErr != nil || err != nil { 1391 return remoteErr, err 1392 } 1393 1394 // Install x/benchmarks. 1395 benchmarksCommit, remoteErr, err := st.fetchBenchmarksSource(benchmarksDir) 1396 if remoteErr != nil || err != nil { 1397 return remoteErr, err 1398 } 1399 1400 // If testing a repo other than Go, install the subrepo and its baseline. 1401 var subrepoBaselineCommit string 1402 if repo != "go" { 1403 subrepoBaselineCommit, remoteErr, err = st.fetchSubrepoAndBaseline(subrepoDir, subrepoBaselineDir) 1404 if remoteErr != nil || err != nil { 1405 return remoteErr, err 1406 } 1407 } 1408 1409 // Run golang.org/x/benchmarks/cmd/bench to perform benchmarks. 1410 sp := st.CreateSpan("running_benchmark_tests", st.SubName) 1411 defer func() { sp.Done(err) }() 1412 1413 env := append(st.conf.Env(), 1414 "BENCH_BASELINE_GOROOT="+baselineGoroot, 1415 "BENCH_BRANCH="+st.RevBranch, 1416 "BENCH_REPOSITORY="+repo, 1417 "GOROOT="+goroot, 1418 "GOPATH="+gopath, // For module cache storage 1419 ) 1420 env = append(env, st.modulesEnv()...) 1421 if repo != "go" { 1422 env = append(env, "BENCH_SUBREPO_PATH="+st.conf.FilePathJoin(workDir, subrepoDir)) 1423 env = append(env, "BENCH_SUBREPO_BASELINE_PATH="+st.conf.FilePathJoin(workDir, subrepoBaselineDir)) 1424 } 1425 rErr, err := st.bc.Exec(st.ctx, "./go/bin/go", buildlet.ExecOpts{ 1426 Debug: true, // make buildlet print extra debug in output for failures 1427 Output: st, 1428 Dir: benchmarksDir, 1429 ExtraEnv: env, 1430 Path: []string{st.conf.FilePathJoin("$WORKDIR", "go", "bin"), "$PATH"}, 1431 Args: []string{"run", "golang.org/x/benchmarks/cmd/bench"}, 1432 }) 1433 if err != nil || rErr != nil { 1434 return rErr, err 1435 } 1436 1437 // Upload benchmark results on success. 1438 if err := st.uploadBenchResults(toolchainBaselineCommit, subrepoBaselineCommit, benchmarksCommit); err != nil { 1439 return nil, err 1440 } 1441 return nil, nil 1442 } 1443 1444 func (st *buildStatus) uploadBenchResults(toolchainBaselineCommit, subrepoBaselineCommit, benchmarksCommit string) (err error) { 1445 sp := st.CreateSpan("upload_bench_results") 1446 defer func() { sp.Done(err) }() 1447 1448 s := pool.NewGCEConfiguration().BuildEnv().PerfDataURL 1449 if s == "" { 1450 log.Printf("No perfdata URL, skipping benchmark upload") 1451 return nil 1452 } 1453 client := &perfstorage.Client{BaseURL: s, HTTPClient: pool.NewGCEConfiguration().OAuthHTTPClient()} 1454 u := client.NewUpload(st.ctx) 1455 w, err := u.CreateFile("results") 1456 if err != nil { 1457 u.Abort() 1458 return fmt.Errorf("error creating perfdata file: %w", err) 1459 } 1460 1461 // Prepend some useful metadata. 1462 var b strings.Builder 1463 if subrepoBaselineCommit != "" { 1464 // Subrepos compare two subrepo commits. 1465 fmt.Fprintf(&b, "experiment-commit: %s\n", st.SubRev) 1466 fmt.Fprintf(&b, "experiment-commit-time: %s\n", st.SubRevCommitTime.In(time.UTC).Format(time.RFC3339Nano)) 1467 fmt.Fprintf(&b, "baseline-commit: %s\n", subrepoBaselineCommit) 1468 // Subrepo benchmarks typically don't care about the toolchain 1469 // version, but we should still provide the data as toolchain 1470 // version changes may cause a performance discontinuity. 1471 fmt.Fprintf(&b, "toolchain-commit: %s\n", toolchainBaselineCommit) 1472 } else { 1473 // Go repo compares two main repo commits. 1474 fmt.Fprintf(&b, "experiment-commit: %s\n", st.Rev) 1475 fmt.Fprintf(&b, "experiment-commit-time: %s\n", st.RevCommitTime.In(time.UTC).Format(time.RFC3339Nano)) 1476 fmt.Fprintf(&b, "baseline-commit: %s\n", toolchainBaselineCommit) 1477 } 1478 fmt.Fprintf(&b, "benchmarks-commit: %s\n", benchmarksCommit) 1479 fmt.Fprintf(&b, "post-submit: %t\n", st.trySet == nil) 1480 if _, err := w.Write([]byte(b.String())); err != nil { 1481 u.Abort() 1482 return fmt.Errorf("error writing perfdata metadata with contents %q: %w", b.String(), err) 1483 } 1484 1485 // TODO(prattmic): Full log output may contain non-benchmark output 1486 // that can be erroneously parsed as benchfmt. 1487 if _, err := w.Write([]byte(st.logs())); err != nil { 1488 u.Abort() 1489 return fmt.Errorf("error writing perfdata file with contents %q: %w", st.logs(), err) 1490 } 1491 status, err := u.Commit() 1492 if err != nil { 1493 return fmt.Errorf("error committing perfdata file: %w", err) 1494 } 1495 st.LogEventTime("bench_upload", status.UploadID) 1496 return nil 1497 } 1498 1499 func (st *buildStatus) installBaselineToolchain(goroot, baselineDir string) (baselineCommit string, remoteErr, err error) { 1500 sp := st.CreateSpan("install_baseline") 1501 defer func() { sp.Done(err) }() 1502 1503 commit, err := st.toolchainBaselineCommit() 1504 if err != nil { 1505 return "", nil, fmt.Errorf("error finding baseline commit: %w", err) 1506 } 1507 fmt.Fprintf(st, "Baseline toolchain %s\n", commit) 1508 1509 if st.useSnapshotFor(commit) { 1510 if err := st.writeGoSnapshotTo(commit, baselineDir); err != nil { 1511 return "", nil, fmt.Errorf("error writing baseline snapshot: %w", err) 1512 } 1513 return commit, nil, nil 1514 } 1515 1516 if err := st.writeGoSourceTo(st.bc, commit, baselineDir); err != nil { 1517 return "", nil, fmt.Errorf("error writing baseline source: %w", err) 1518 } 1519 1520 br := st.BuilderRev 1521 br.Rev = commit 1522 1523 builder := buildgo.GoBuilder{ 1524 Logger: st, 1525 BuilderRev: br, 1526 Conf: st.conf, 1527 Goroot: baselineDir, 1528 // Use the primary GOROOT as GOROOT_BOOTSTRAP. The 1529 // typical bootstrap toolchain may not be available if 1530 // the primary toolchain was installed from a snapshot. 1531 GorootBootstrap: goroot, 1532 } 1533 remoteErr, err = builder.RunMake(st.ctx, st.bc, st) 1534 if err != nil { 1535 return "", nil, err 1536 } 1537 if remoteErr != nil { 1538 return "", remoteErr, nil 1539 } 1540 return commit, nil, nil 1541 } 1542 1543 func (st *buildStatus) fetchBenchmarksSource(benchmarksDir string) (rev string, remoteErr, err error) { 1544 if st.SubName == "benchmarks" { 1545 rev = st.SubRev 1546 } else { 1547 rev, err = getRepoHead("benchmarks") 1548 if err != nil { 1549 return "", nil, fmt.Errorf("error finding x/benchmarks HEAD: %w", err) 1550 } 1551 } 1552 1553 sp := st.CreateSpan("fetching_benchmarks") 1554 defer func() { sp.Done(err) }() 1555 1556 tgz, err := sourcecache.GetSourceTgz(st, "benchmarks", rev) 1557 if errors.As(err, new(sourcecache.TooBigError)) { 1558 // Source being too big is a non-retryable error. 1559 return "", err, nil 1560 } else if err != nil { 1561 return "", nil, err 1562 } 1563 1564 err = st.bc.PutTar(st.ctx, tgz, benchmarksDir) 1565 if err != nil { 1566 return "", nil, err 1567 } 1568 1569 return rev, nil, nil 1570 } 1571 1572 func (st *buildStatus) fetchSubrepoAndBaseline(repoDir, baselineDir string) (baselineRev string, remoteErr, err error) { 1573 st.LogEventTime("fetching_subrepo", st.SubName) 1574 1575 tgz, err := sourcecache.GetSourceTgz(st, st.SubName, st.SubRev) 1576 if errors.As(err, new(sourcecache.TooBigError)) { 1577 // Source being too big is a non-retryable error. 1578 return "", err, nil 1579 } else if err != nil { 1580 return "", nil, err 1581 } 1582 1583 err = st.bc.PutTar(st.ctx, tgz, repoDir) 1584 if err != nil { 1585 return "", nil, err 1586 } 1587 1588 baselineRev, err = st.subrepoBaselineCommit() 1589 if err != nil { 1590 return "", nil, err 1591 } 1592 1593 fmt.Fprintf(st, "Baseline subrepo %s\n", baselineRev) 1594 1595 tgz, err = sourcecache.GetSourceTgz(st, st.SubName, baselineRev) 1596 if errors.As(err, new(sourcecache.TooBigError)) { 1597 // Source being too big is a non-retryable error. 1598 return "", err, nil 1599 } else if err != nil { 1600 return "", nil, err 1601 } 1602 1603 err = st.bc.PutTar(st.ctx, tgz, baselineDir) 1604 if err != nil { 1605 return "", nil, err 1606 } 1607 1608 return baselineRev, nil, nil 1609 } 1610 1611 var errBuildletsGone = errors.New("runTests: dist test failed: all buildlets had network errors or timeouts, yet tests remain") 1612 1613 // runTests runs tests for the main Go repo. 1614 // 1615 // After runTests completes, the caller must assume that st.bc might be invalid 1616 // (It's possible that only one of the helper buildlets survived). 1617 func (st *buildStatus) runTests(helpers <-chan buildlet.Client) (remoteErr, err error) { 1618 testNames, remoteErr, err := st.distTestList() 1619 if remoteErr != nil { 1620 return fmt.Errorf("distTestList remote: %v", remoteErr), nil 1621 } 1622 if err != nil { 1623 return nil, fmt.Errorf("distTestList exec: %v", err) 1624 } 1625 testStats := getTestStats(st) 1626 1627 set, err := st.newTestSet(testStats, testNames) 1628 if err != nil { 1629 return nil, err 1630 } 1631 st.LogEventTime("starting_tests", fmt.Sprintf("%d tests", len(set.items))) 1632 startTime := time.Now() 1633 1634 workDir, err := st.bc.WorkDir(st.ctx) 1635 if err != nil { 1636 return nil, fmt.Errorf("error discovering workdir for main buildlet, %s: %v", st.bc.Name(), err) 1637 } 1638 1639 mainBuildletGoroot := st.conf.FilePathJoin(workDir, "go") 1640 mainBuildletGopath := st.conf.FilePathJoin(workDir, "gopath") 1641 1642 // We use our original buildlet to run the tests in order, to 1643 // make the streaming somewhat smooth and not incredibly 1644 // lumpy. The rest of the buildlets run the largest tests 1645 // first (critical path scheduling). 1646 // The buildletActivity WaitGroup is used to track when all 1647 // the buildlets are dead or done. 1648 var buildletActivity sync.WaitGroup 1649 buildletActivity.Add(2) // one per goroutine below (main + helper launcher goroutine) 1650 go func() { 1651 defer buildletActivity.Done() // for the per-goroutine Add(2) above 1652 for !st.bc.IsBroken() { 1653 tis, ok := set.testsToRunInOrder() 1654 if !ok { 1655 select { 1656 case <-st.ctx.Done(): 1657 return 1658 case <-time.After(5 * time.Second): 1659 } 1660 continue 1661 } 1662 st.runTestsOnBuildlet(st.bc, tis, mainBuildletGoroot, mainBuildletGopath) 1663 } 1664 st.LogEventTime("main_buildlet_broken", st.bc.Name()) 1665 }() 1666 go func() { 1667 defer buildletActivity.Done() // for the per-goroutine Add(2) above 1668 for helper := range helpers { 1669 buildletActivity.Add(1) 1670 go func(bc buildlet.Client) { 1671 defer buildletActivity.Done() // for the per-helper Add(1) above 1672 defer st.LogEventTime("closed_helper", bc.Name()) 1673 defer bc.Close() 1674 if devPause { 1675 defer time.Sleep(5 * time.Minute) 1676 defer st.LogEventTime("DEV_HELPER_SLEEP", bc.Name()) 1677 } 1678 st.LogEventTime("got_empty_test_helper", bc.String()) 1679 if err := bc.PutTarFromURL(st.ctx, st.SnapshotURL(pool.NewGCEConfiguration().BuildEnv()), "go"); err != nil { 1680 log.Printf("failed to extract snapshot for helper %s: %v", bc.Name(), err) 1681 return 1682 } 1683 workDir, err := bc.WorkDir(st.ctx) 1684 if err != nil { 1685 log.Printf("error discovering workdir for helper %s: %v", bc.Name(), err) 1686 return 1687 } 1688 st.LogEventTime("test_helper_set_up", bc.Name()) 1689 goroot := st.conf.FilePathJoin(workDir, "go") 1690 gopath := st.conf.FilePathJoin(workDir, "gopath") 1691 for !bc.IsBroken() { 1692 tis, ok := set.testsToRunBiggestFirst() 1693 if !ok { 1694 st.LogEventTime("no_new_tests_remain", bc.Name()) 1695 return 1696 } 1697 st.runTestsOnBuildlet(bc, tis, goroot, gopath) 1698 } 1699 st.LogEventTime("test_helper_is_broken", bc.Name()) 1700 }(helper) 1701 } 1702 }() 1703 1704 // Convert a sync.WaitGroup into a channel. 1705 // Aside: https://groups.google.com/forum/#!topic/golang-dev/7fjGWuImu5k 1706 buildletsGone := make(chan struct{}) 1707 go func() { 1708 buildletActivity.Wait() 1709 close(buildletsGone) 1710 }() 1711 1712 var lastMetadata string 1713 var lastHeader string 1714 var serialDuration time.Duration 1715 for _, ti := range set.items { 1716 AwaitDone: 1717 for { 1718 timer := time.NewTimer(30 * time.Second) 1719 select { 1720 case <-ti.done: // wait for success 1721 timer.Stop() 1722 break AwaitDone 1723 case <-timer.C: 1724 st.LogEventTime("still_waiting_on_test", ti.name.Old) 1725 case <-buildletsGone: 1726 set.cancelAll() 1727 return nil, errBuildletsGone 1728 } 1729 } 1730 1731 serialDuration += ti.execDuration 1732 if len(ti.output) > 0 { 1733 metadata, header, out := parseOutputAndHeader(ti.output) 1734 printHeader := false 1735 if metadata != lastMetadata { 1736 lastMetadata = metadata 1737 fmt.Fprintf(st, "\n%s\n", metadata) 1738 // Always include the test header after 1739 // metadata changes. This is a readability 1740 // optimization that ensures that tests are 1741 // always immediately preceded by their test 1742 // banner, even if it is duplicate banner 1743 // because the test metadata changed. 1744 printHeader = true 1745 } 1746 if header != lastHeader { 1747 lastHeader = header 1748 printHeader = true 1749 } 1750 if printHeader { 1751 fmt.Fprintf(st, "\n%s\n", header) 1752 } 1753 if pool.NewGCEConfiguration().InStaging() { 1754 out = bytes.TrimSuffix(out, nl) 1755 st.Write(out) 1756 fmt.Fprintf(st, " (shard %s; par=%d)\n", ti.shardIPPort, ti.groupSize) 1757 } else { 1758 st.Write(out) 1759 } 1760 } 1761 1762 if ti.remoteErr != nil { 1763 set.cancelAll() 1764 return fmt.Errorf("dist test failed: %s: %v", ti.name, ti.remoteErr), nil 1765 } 1766 } 1767 elapsed := time.Since(startTime) 1768 var msg string 1769 if st.conf.NumTestHelpers(st.isTry()) > 0 { 1770 msg = fmt.Sprintf("took %v; aggregate %v; saved %v", elapsed, serialDuration, serialDuration-elapsed) 1771 } else { 1772 msg = fmt.Sprintf("took %v", elapsed) 1773 } 1774 st.LogEventTime("tests_complete", msg) 1775 fmt.Fprintf(st, "\nAll tests passed.\n") 1776 return nil, nil 1777 } 1778 1779 const ( 1780 banner = "XXXBANNERXXX:" // flag passed to dist 1781 bannerPrefix = "\n" + banner // with the newline added by dist 1782 1783 metadataBannerPrefix = bannerPrefix + "Test execution environment." 1784 1785 outputBanner = "##### " // banner to display in output. 1786 ) 1787 1788 var ( 1789 bannerPrefixBytes = []byte(bannerPrefix) 1790 metadataBannerPrefixBytes = []byte(metadataBannerPrefix) 1791 ) 1792 1793 // parseOutputAndHeader parses b and returns the test (optional) environment 1794 // metaadata, display header (e.g., "##### Testing packages.") and the 1795 // following output. 1796 // 1797 // metadata is the optional execution environment metadata block. e.g., 1798 // 1799 // ##### Test execution environment. 1800 // # GOARCH: amd64 1801 // # CPU: Intel(R) Xeon(R) W-2135 CPU @ 3.70GHz 1802 func parseOutputAndHeader(b []byte) (metadata, header string, out []byte) { 1803 if !bytes.HasPrefix(b, bannerPrefixBytes) { 1804 return "", "", b 1805 } 1806 1807 if bytes.HasPrefix(b, metadataBannerPrefixBytes) { 1808 // Header includes everything up to and including the next 1809 // banner. 1810 rem := b[len(metadataBannerPrefixBytes):] 1811 i := bytes.Index(rem, bannerPrefixBytes) 1812 if i == -1 { 1813 // Metadata block without a following block doesn't 1814 // make sense. Bail. 1815 return "", "", b 1816 } 1817 bi := i + len(metadataBannerPrefixBytes) 1818 // Metadata portion of header, skipping initial and trailing newlines. 1819 metadata = strings.Trim(string(b[:bi]), "\n") 1820 metadata = strings.Replace(metadata, banner, outputBanner, 1) 1821 b = b[bi+1:] // skip newline at start of next banner. 1822 } else { 1823 b = b[1:] // skip newline 1824 } 1825 1826 // Find end of primary test banner. 1827 nl := bytes.IndexByte(b, '\n') 1828 if nl == -1 { 1829 // No newline, everything is header. 1830 header = string(b) 1831 b = nil 1832 } else { 1833 header = string(b[:nl]) 1834 b = b[nl+1:] 1835 } 1836 1837 // Replace internal marker banner with the human-friendly version. 1838 header = strings.Replace(header, banner, outputBanner, 1) 1839 return metadata, header, b 1840 } 1841 1842 // maxTestExecError is the number of test execution failures at which 1843 // we give up and stop trying and instead permanently fail the test. 1844 // Note that this is not related to whether the test failed remotely, 1845 // but whether we were unable to start or complete watching it run. 1846 // (A communication error) 1847 const maxTestExecErrors = 3 1848 1849 // runTestsOnBuildlet runs tis on bc, using the optional goroot & gopath environment variables. 1850 func (st *buildStatus) runTestsOnBuildlet(bc buildlet.Client, tis []*testItem, goroot, gopath string) { 1851 names, rawNames := make([]string, len(tis)), make([]string, len(tis)) 1852 for i, ti := range tis { 1853 names[i], rawNames[i] = ti.name.Old, ti.name.Raw 1854 if i > 0 && (!strings.HasPrefix(ti.name.Old, "go_test:") || !strings.HasPrefix(names[0], "go_test:")) { 1855 panic("only go_test:* tests may be merged") 1856 } 1857 } 1858 var spanName string 1859 var detail string 1860 if len(names) == 1 { 1861 spanName = "run_test:" + names[0] 1862 detail = bc.Name() 1863 } else { 1864 spanName = "run_tests_multi" 1865 detail = fmt.Sprintf("%s: %v", bc.Name(), names) 1866 } 1867 sp := st.CreateSpan(spanName, detail) 1868 1869 args := []string{"tool", "dist", "test", "--no-rebuild", "--banner=" + banner} 1870 if st.conf.IsRace() { 1871 args = append(args, "--race") 1872 } 1873 if st.conf.CompileOnly { 1874 args = append(args, "--compile-only") 1875 } 1876 if st.useKeepGoingFlag() { 1877 args = append(args, "-k") 1878 } 1879 args = append(args, rawNames...) 1880 var buf bytes.Buffer 1881 t0 := time.Now() 1882 timeout := st.conf.DistTestsExecTimeout(names) 1883 1884 ctx, cancel := context.WithTimeout(st.ctx, timeout) 1885 defer cancel() 1886 1887 env := append(st.conf.Env(), 1888 "GOROOT="+goroot, 1889 "GOPATH="+gopath, 1890 ) 1891 env = append(env, st.modulesEnv()...) 1892 1893 remoteErr, err := bc.Exec(ctx, "./go/bin/go", buildlet.ExecOpts{ 1894 // We set Dir to "." instead of the default ("go/bin") so when the dist tests 1895 // try to run os/exec.Command("go", "test", ...), the LookPath of "go" doesn't 1896 // return "./go.exe" (which exists in the current directory: "go/bin") and then 1897 // fail when dist tries to run the binary in dir "$GOROOT/src", since 1898 // "$GOROOT/src" + "./go.exe" doesn't exist. Perhaps LookPath should return 1899 // an absolute path. 1900 Dir: ".", 1901 Output: &buf, // see "maybe stream lines" TODO below 1902 ExtraEnv: env, 1903 Path: []string{st.conf.FilePathJoin("$WORKDIR", "go", "bin"), "$PATH"}, 1904 Args: args, 1905 }) 1906 execDuration := time.Since(t0) 1907 sp.Done(err) 1908 if err != nil { 1909 bc.MarkBroken() // prevents reuse 1910 for _, ti := range tis { 1911 ti.numFail++ 1912 st.logf("Execution error running %s on %s: %v (numFails = %d)", ti.name, bc, err, ti.numFail) 1913 if err == buildlet.ErrTimeout { 1914 ti.failf("Test %q ran over %v limit (%v); saw output:\n%s", ti.name, timeout, execDuration, buf.Bytes()) 1915 } else if ti.numFail >= maxTestExecErrors { 1916 ti.failf("Failed to schedule %q test after %d tries.\n", ti.name, maxTestExecErrors) 1917 } else { 1918 ti.retry() 1919 } 1920 } 1921 return 1922 } 1923 1924 out := buf.Bytes() 1925 out = bytes.Replace(out, []byte("\nALL TESTS PASSED (some were excluded)\n"), nil, 1) 1926 out = bytes.Replace(out, []byte("\nALL TESTS PASSED\n"), nil, 1) 1927 1928 for _, ti := range tis { 1929 ti.output = out 1930 ti.remoteErr = remoteErr 1931 ti.execDuration = execDuration 1932 ti.groupSize = len(tis) 1933 ti.shardIPPort = bc.IPPort() 1934 close(ti.done) 1935 1936 // After the first one, make the rest succeed with no output. 1937 // TODO: maybe stream lines (set Output to a line-reading 1938 // Writer instead of &buf). for now we just wait for them in 1939 // ~10 second batches. Doesn't look as smooth on the output, 1940 // though. 1941 out = nil 1942 remoteErr = nil 1943 execDuration = 0 1944 } 1945 } 1946 1947 func (st *buildStatus) CreateSpan(event string, optText ...string) spanlog.Span { 1948 return schedule.CreateSpan(st, event, optText...) 1949 } 1950 1951 func (st *buildStatus) LogEventTime(event string, optText ...string) { 1952 if len(optText) > 1 { 1953 panic("usage") 1954 } 1955 if pool.NewGCEConfiguration().InStaging() { 1956 st.logf("%s %v", event, optText) 1957 } 1958 st.mu.Lock() 1959 defer st.mu.Unlock() 1960 var text string 1961 if len(optText) > 0 { 1962 text = optText[0] 1963 } 1964 st.events = append(st.events, eventAndTime{ 1965 t: time.Now(), 1966 evt: event, 1967 text: text, 1968 }) 1969 } 1970 1971 func (st *buildStatus) hasEvent(event string) bool { 1972 st.mu.Lock() 1973 defer st.mu.Unlock() 1974 for _, e := range st.events { 1975 if e.evt == event { 1976 return true 1977 } 1978 } 1979 return false 1980 } 1981 1982 // HTMLStatusLine returns the HTML to show within the <pre> block on 1983 // the main page's list of active builds. 1984 func (st *buildStatus) HTMLStatusLine() template.HTML { return st.htmlStatus(singleLine) } 1985 func (st *buildStatus) HTMLStatusTruncated() template.HTML { return st.htmlStatus(truncated) } 1986 func (st *buildStatus) HTMLStatus() template.HTML { return st.htmlStatus(full) } 1987 1988 func strSliceTo(s string, n int) string { 1989 if len(s) <= n { 1990 return s 1991 } 1992 return s[:n] 1993 } 1994 1995 type buildStatusDetail int 1996 1997 const ( 1998 singleLine buildStatusDetail = iota 1999 truncated 2000 full 2001 ) 2002 2003 func (st *buildStatus) htmlStatus(detail buildStatusDetail) template.HTML { 2004 if st == nil { 2005 return "[nil]" 2006 } 2007 st.mu.Lock() 2008 defer st.mu.Unlock() 2009 2010 urlPrefix := "https://go-review.googlesource.com/#/q/" 2011 2012 if st.Rev == "" { 2013 log.Printf("warning: st.Rev is empty") 2014 } 2015 2016 var buf bytes.Buffer 2017 fmt.Fprintf(&buf, "<a href='https://github.com/golang/go/wiki/DashboardBuilders'>%s</a> rev <a href='%s%s'>%s</a>", 2018 st.Name, urlPrefix, st.Rev, strSliceTo(st.Rev, 8)) 2019 if st.IsSubrepo() { 2020 if st.SubRev == "" { 2021 log.Printf("warning: st.SubRev is empty on subrepo") 2022 } 2023 fmt.Fprintf(&buf, " (sub-repo %s rev <a href='%s%s'>%s</a>)", 2024 st.SubName, urlPrefix, st.SubRev, strSliceTo(st.SubRev, 8)) 2025 } 2026 if ts := st.trySet; ts != nil { 2027 if ts.ChangeID == "" { 2028 log.Printf("warning: ts.ChangeID is empty") 2029 } 2030 fmt.Fprintf(&buf, " (<a href='/try?commit=%v'>trybot set</a> for <a href='https://go-review.googlesource.com/#/q/%s'>%s</a>)", 2031 strSliceTo(ts.Commit, 8), 2032 ts.ChangeTriple(), strSliceTo(ts.ChangeID, 8)) 2033 } 2034 2035 var state string 2036 if st.canceled { 2037 state = "canceled" 2038 } else if st.done.IsZero() { 2039 if st.HasBuildlet() { 2040 state = "running" 2041 } else { 2042 state = "waiting_for_machine" 2043 } 2044 } else if st.succeeded { 2045 state = "succeeded" 2046 } else { 2047 state = "<font color='#700000'>failed</font>" 2048 } 2049 if detail > singleLine && st.bc != nil { 2050 fmt.Fprintf(&buf, "; <a href='%s'>%s</a>; %s", html.EscapeString(st.logsURLLocked()), state, html.EscapeString(st.bc.String())) 2051 } else { 2052 fmt.Fprintf(&buf, "; <a href='%s'>%s</a>", html.EscapeString(st.logsURLLocked()), state) 2053 } 2054 2055 t := st.done 2056 if t.IsZero() { 2057 t = st.startTime 2058 } 2059 fmt.Fprintf(&buf, ", %v ago", time.Since(t).Round(time.Second)) 2060 if detail > singleLine { 2061 buf.WriteByte('\n') 2062 lastLines := 0 2063 if detail == truncated { 2064 lastLines = 3 2065 } 2066 st.writeEventsLocked(&buf, true, lastLines) 2067 } 2068 return template.HTML(buf.String()) 2069 } 2070 2071 func (st *buildStatus) logsURLLocked() string { 2072 if st.logURL != "" { 2073 return st.logURL 2074 } 2075 var urlPrefix string 2076 if pool.NewGCEConfiguration().BuildEnv() == buildenv.Production { 2077 urlPrefix = "https://farmer.golang.org" 2078 } else { 2079 urlPrefix = "http://" + pool.NewGCEConfiguration().BuildEnv().StaticIP 2080 } 2081 if *mode == "dev" { 2082 urlPrefix = "https://localhost:8119" 2083 } 2084 u := fmt.Sprintf("%v/temporarylogs?name=%s&rev=%s&st=%p", urlPrefix, st.Name, st.Rev, st) 2085 if st.IsSubrepo() { 2086 u += fmt.Sprintf("&subName=%v&subRev=%v", st.SubName, st.SubRev) 2087 } 2088 return u 2089 } 2090 2091 // st.mu must be held. 2092 // If numLines is greater than zero, it's the number of final lines to truncate to. 2093 func (st *buildStatus) writeEventsLocked(w io.Writer, htmlMode bool, numLines int) { 2094 startAt := 0 2095 if numLines > 0 { 2096 startAt = len(st.events) - numLines 2097 if startAt > 0 { 2098 io.WriteString(w, "...\n") 2099 } else { 2100 startAt = 0 2101 } 2102 } 2103 2104 for i := startAt; i < len(st.events); i++ { 2105 evt := st.events[i] 2106 e := evt.evt 2107 text := evt.text 2108 if htmlMode { 2109 if e == "running_exec" { 2110 e = fmt.Sprintf("<a href='%s'>%s</a>", html.EscapeString(st.logsURLLocked()), e) 2111 } 2112 e = "<b>" + e + "</b>" 2113 text = "<i>" + html.EscapeString(text) + "</i>" 2114 } 2115 fmt.Fprintf(w, " %v %s %s\n", evt.t.Format(time.RFC3339), e, text) 2116 } 2117 if st.isRunningLocked() && len(st.events) > 0 { 2118 lastEvt := st.events[len(st.events)-1] 2119 fmt.Fprintf(w, " %7s (now)\n", fmt.Sprintf("+%0.1fs", time.Since(lastEvt.t).Seconds())) 2120 } 2121 } 2122 2123 func (st *buildStatus) logs() string { 2124 return st.output.String() 2125 } 2126 2127 func (st *buildStatus) Write(p []byte) (n int, err error) { 2128 return st.output.Write(p) 2129 } 2130 2131 // repeatedCommunicationError takes a buildlet execution error (a 2132 // network/communication error, as opposed to a remote execution that 2133 // ran and had a non-zero exit status and we heard about) and 2134 // conditionally promotes it to a terminal error. If this returns a 2135 // non-nil value, the execErr should be considered terminal with the 2136 // returned error. 2137 func (st *buildStatus) repeatedCommunicationError(execErr error) error { 2138 if execErr == nil { 2139 return nil 2140 } 2141 // For now, only do this for plan9, which is flaky (Issue 31261), 2142 // but not for plan9-arm (Issue 52677) 2143 if strings.HasPrefix(st.Name, "plan9-") && st.Name != "plan9-arm" && execErr == errBuildletsGone { 2144 // TODO: give it two tries at least later (store state 2145 // somewhere; global map?). But for now we're going to 2146 // only give it one try. 2147 return fmt.Errorf("network error promoted to terminal error: %v", execErr) 2148 } 2149 return nil 2150 } 2151 2152 // commitTime returns the greater of Rev and SubRev's commit times. 2153 func (st *buildStatus) commitTime() time.Time { 2154 if st.RevCommitTime.Before(st.SubRevCommitTime) { 2155 return st.SubRevCommitTime 2156 } 2157 return st.RevCommitTime 2158 } 2159 2160 // branch returns branch for either Rev, or SubRev if it exists. 2161 func (st *buildStatus) branch() string { 2162 if st.SubRev != "" { 2163 return st.SubRevBranch 2164 } 2165 return st.RevBranch 2166 }