golang.org/x/build@v0.0.0-20240506185731-218518f32b70/cmd/watchflakes/luci.go (about) 1 // Copyright 2024 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package main 6 7 import ( 8 "context" 9 "encoding/json" 10 "fmt" 11 "io" 12 "log" 13 "net/http" 14 "regexp" 15 "slices" 16 "strings" 17 "sync" 18 "time" 19 20 bbpb "go.chromium.org/luci/buildbucket/proto" 21 "go.chromium.org/luci/common/api/gitiles" 22 gpb "go.chromium.org/luci/common/proto/gitiles" 23 "go.chromium.org/luci/grpc/prpc" 24 rdbpb "go.chromium.org/luci/resultdb/proto/v1" 25 "golang.org/x/sync/errgroup" 26 "google.golang.org/protobuf/types/known/fieldmaskpb" 27 "google.golang.org/protobuf/types/known/timestamppb" 28 ) 29 30 const resultDBHost = "results.api.cr.dev" 31 const crBuildBucketHost = "cr-buildbucket.appspot.com" 32 const gitilesHost = "go.googlesource.com" 33 34 // LUCIClient is a LUCI client. 35 type LUCIClient struct { 36 HTTPClient *http.Client 37 GitilesClient gpb.GitilesClient 38 BuildsClient bbpb.BuildsClient 39 BuildersClient bbpb.BuildersClient 40 ResultDBClient rdbpb.ResultDBClient 41 42 // TraceSteps controls whether to log each step name as it's executed. 43 TraceSteps bool 44 45 nProc int 46 } 47 48 // NewLUCIClient creates a LUCI client. 49 // nProc controls concurrency. NewLUCIClient panics if nProc is non-positive. 50 func NewLUCIClient(nProc int) *LUCIClient { 51 if nProc < 1 { 52 panic(fmt.Errorf("nProc is %d, want 1 or higher", nProc)) 53 } 54 c := new(http.Client) 55 gitilesClient, err := gitiles.NewRESTClient(c, gitilesHost, false) 56 if err != nil { 57 log.Fatal(err) 58 } 59 buildsClient := bbpb.NewBuildsClient(&prpc.Client{ 60 C: c, 61 Host: crBuildBucketHost, 62 }) 63 buildersClient := bbpb.NewBuildersClient(&prpc.Client{ 64 C: c, 65 Host: crBuildBucketHost, 66 }) 67 resultDBClient := rdbpb.NewResultDBClient(&prpc.Client{ 68 C: c, 69 Host: resultDBHost, 70 }) 71 return &LUCIClient{ 72 HTTPClient: c, 73 GitilesClient: gitilesClient, 74 BuildsClient: buildsClient, 75 BuildersClient: buildersClient, 76 ResultDBClient: resultDBClient, 77 nProc: nProc, 78 } 79 } 80 81 type BuilderConfigProperties struct { 82 Repo string `json:"project,omitempty"` 83 GoBranch string `json:"go_branch,omitempty"` 84 Target struct { 85 GOARCH string `json:"goarch,omitempty"` 86 GOOS string `json:"goos,omitempty"` 87 } `json:"target"` 88 KnownIssue int `json:"known_issue,omitempty"` 89 } 90 91 type Builder struct { 92 Name string 93 *BuilderConfigProperties 94 } 95 96 type BuildResult struct { 97 ID int64 98 Status bbpb.Status 99 Commit string // commit hash 100 Time time.Time // commit time 101 GoCommit string // for subrepo build, go commit hash 102 BuildTime time.Time // build end time 103 Builder string 104 *BuilderConfigProperties 105 InvocationID string // ResultDB invocation ID 106 LogURL string // textual log of the whole run 107 LogText string 108 StepLogURL string // textual log of the (last) failed step, if any 109 StepLogText string 110 Failures []*Failure 111 } 112 113 type Commit struct { 114 Hash string 115 Time time.Time 116 } 117 118 type Project struct { 119 Repo string 120 GoBranch string 121 } 122 123 type Dashboard struct { 124 Project 125 Builders []Builder 126 Commits []Commit 127 Results [][]*BuildResult // indexed by builder, then by commit 128 } 129 130 type Failure struct { 131 TestID string 132 Status rdbpb.TestStatus 133 LogURL string 134 LogText string 135 } 136 137 // ListCommits fetches the list of commits from Gerrit. 138 func (c *LUCIClient) ListCommits(ctx context.Context, repo, goBranch string, since time.Time) []Commit { 139 if c.TraceSteps { 140 log.Println("ListCommits", repo, goBranch) 141 } 142 branch := "master" 143 if repo == "go" { 144 branch = goBranch 145 } 146 var commits []Commit 147 var pageToken string 148 nextPage: 149 resp, err := c.GitilesClient.Log(ctx, &gpb.LogRequest{ 150 Project: repo, 151 Committish: "refs/heads/" + branch, 152 PageSize: 1000, 153 PageToken: pageToken, 154 }) 155 if err != nil { 156 log.Fatal(err) 157 } 158 for _, c := range resp.GetLog() { 159 commitTime := c.GetCommitter().GetTime().AsTime() 160 if commitTime.Before(since) { 161 goto done 162 } 163 commits = append(commits, Commit{ 164 Hash: c.GetId(), 165 Time: commitTime, 166 }) 167 } 168 if resp.GetNextPageToken() != "" { 169 pageToken = resp.GetNextPageToken() 170 goto nextPage 171 } 172 done: 173 return commits 174 } 175 176 // ListBuilders fetches the list of builders, on the given repo and goBranch. 177 // If repo and goBranch are empty, it fetches all builders. 178 func (c *LUCIClient) ListBuilders(ctx context.Context, repo, goBranch string) ([]Builder, error) { 179 if c.TraceSteps { 180 log.Println("ListBuilders", repo, goBranch) 181 } 182 all := repo == "" && goBranch == "" 183 var builders []Builder 184 var pageToken string 185 nextPage: 186 resp, err := c.BuildersClient.ListBuilders(ctx, &bbpb.ListBuildersRequest{ 187 Project: "golang", 188 Bucket: "ci", 189 PageSize: 1000, 190 PageToken: pageToken, 191 }) 192 if err != nil { 193 return nil, err 194 } 195 for _, b := range resp.GetBuilders() { 196 var p BuilderConfigProperties 197 json.Unmarshal([]byte(b.GetConfig().GetProperties()), &p) 198 if all || (p.Repo == repo && p.GoBranch == goBranch) { 199 builders = append(builders, Builder{b.GetId().GetBuilder(), &p}) 200 } 201 } 202 if resp.GetNextPageToken() != "" { 203 pageToken = resp.GetNextPageToken() 204 goto nextPage 205 } 206 slices.SortFunc(builders, func(a, b Builder) int { 207 return strings.Compare(a.Name, b.Name) 208 }) 209 return builders, nil 210 } 211 212 func (c *LUCIClient) ListBoards(ctx context.Context) ([]*Dashboard, error) { 213 builders, err := c.ListBuilders(ctx, "", "") 214 if err != nil { 215 return nil, err 216 } 217 repoMap := make(map[Project]bool) 218 for _, b := range builders { 219 repoMap[Project{b.Repo, b.GoBranch}] = true 220 } 221 boards := make([]*Dashboard, 0, len(repoMap)) 222 for p := range repoMap { 223 d := &Dashboard{Project: p} 224 boards = append(boards, d) 225 } 226 slices.SortFunc(boards, func(d1, d2 *Dashboard) int { 227 if d1.Repo != d2.Repo { 228 // put main repo first 229 if d1.Repo == "go" { 230 return -1 231 } 232 if d2.Repo == "go" { 233 return 1 234 } 235 return strings.Compare(d1.Repo, d2.Repo) 236 } 237 return strings.Compare(d1.GoBranch, d2.GoBranch) 238 }) 239 return boards, nil 240 } 241 242 // GetBuilds fetches builds from one builder. 243 func (c *LUCIClient) GetBuilds(ctx context.Context, builder string, since time.Time) ([]*bbpb.Build, error) { 244 if c.TraceSteps { 245 log.Println("GetBuilds", builder) 246 } 247 pred := &bbpb.BuildPredicate{ 248 Builder: &bbpb.BuilderID{Project: "golang", Bucket: "ci", Builder: builder}, 249 CreateTime: &bbpb.TimeRange{StartTime: timestamppb.New(since)}, 250 } 251 mask, err := fieldmaskpb.New((*bbpb.Build)(nil), "id", "builder", "output", "status", "steps", "infra", "end_time") 252 if err != nil { 253 return nil, err 254 } 255 var builds []*bbpb.Build 256 var pageToken string 257 nextPage: 258 resp, err := c.BuildsClient.SearchBuilds(ctx, &bbpb.SearchBuildsRequest{ 259 Predicate: pred, 260 Mask: &bbpb.BuildMask{Fields: mask}, 261 PageSize: 1000, 262 PageToken: pageToken, 263 }) 264 if err != nil { 265 return nil, err 266 } 267 builds = append(builds, resp.GetBuilds()...) 268 if resp.GetNextPageToken() != "" { 269 pageToken = resp.GetNextPageToken() 270 goto nextPage 271 } 272 return builds, nil 273 } 274 275 // ReadBoard reads the build dashboard dash, then fills in the content. 276 func (c *LUCIClient) ReadBoard(ctx context.Context, dash *Dashboard, since time.Time) error { 277 if c.TraceSteps { 278 log.Println("ReadBoard", dash.Repo, dash.GoBranch) 279 } 280 dash.Commits = c.ListCommits(ctx, dash.Repo, dash.GoBranch, since) 281 var err error 282 dash.Builders, err = c.ListBuilders(ctx, dash.Repo, dash.GoBranch) 283 if err != nil { 284 return err 285 } 286 287 dashMap := make([]map[string]*BuildResult, len(dash.Builders)) // indexed by builder, then keyed by commit hash 288 289 // Get builds from builders. 290 g, groupContext := errgroup.WithContext(ctx) 291 g.SetLimit(c.nProc) 292 for i, builder := range dash.Builders { 293 builder := builder 294 buildMap := make(map[string]*BuildResult) 295 dashMap[i] = buildMap 296 g.Go(func() error { 297 bName := builder.Name 298 builds, err := c.GetBuilds(groupContext, bName, since) 299 if err != nil { 300 return err 301 } 302 for _, b := range builds { 303 id := b.GetId() 304 var commit, goCommit string 305 prop := b.GetOutput().GetProperties().GetFields() 306 for _, s := range prop["sources"].GetListValue().GetValues() { 307 x := s.GetStructValue().GetFields()["gitilesCommit"].GetStructValue().GetFields() 308 c := x["id"].GetStringValue() 309 switch repo := x["project"].GetStringValue(); repo { 310 case dash.Repo: 311 commit = c 312 case "go": 313 goCommit = c 314 default: 315 log.Fatalf("repo mismatch: %s %s %s", repo, dash.Repo, buildURL(id)) 316 } 317 } 318 if commit == "" { 319 switch b.GetStatus() { 320 case bbpb.Status_SUCCESS: 321 log.Fatalf("empty commit: %s", buildURL(id)) 322 default: 323 // unfinished build, or infra failure, ignore 324 continue 325 } 326 } 327 buildTime := b.GetEndTime().AsTime() 328 if r0 := buildMap[commit]; r0 != nil { 329 // A build already exists for the same builder and commit. 330 // Maybe manually retried, or different go commits on same subrepo commit. 331 // Pick the one ended at later time. 332 const printDup = false 333 if printDup { 334 fmt.Printf("skip duplicate build: %s %s %d %d\n", bName, shortHash(commit), id, r0.ID) 335 } 336 if buildTime.Before(r0.BuildTime) { 337 continue 338 } 339 } 340 rdb := b.GetInfra().GetResultdb() 341 if rdb.GetHostname() != resultDBHost { 342 log.Fatalf("ResultDB host mismatch: %s %s %s", rdb.GetHostname(), resultDBHost, buildURL(id)) 343 } 344 if b.GetBuilder().GetBuilder() != bName { // sanity check 345 log.Fatalf("builder mismatch: %s %s %s", b.GetBuilder().GetBuilder(), bName, buildURL(id)) 346 } 347 r := &BuildResult{ 348 ID: id, 349 Status: b.GetStatus(), 350 Commit: commit, 351 GoCommit: goCommit, 352 BuildTime: buildTime, 353 Builder: bName, 354 BuilderConfigProperties: builder.BuilderConfigProperties, 355 InvocationID: rdb.GetInvocation(), 356 } 357 if r.Status == bbpb.Status_FAILURE { 358 links := prop["failure"].GetStructValue().GetFields()["links"].GetListValue().GetValues() 359 for _, l := range links { 360 m := l.GetStructValue().GetFields() 361 if strings.Contains(m["name"].GetStringValue(), "(combined output)") { 362 r.LogURL = m["url"].GetStringValue() 363 break 364 } 365 } 366 if r.LogURL == "" { 367 // No log URL, Probably a build failure. 368 // E.g. https://ci.chromium.org/ui/b/8759448820419452721 369 // Use the build's stderr instead. 370 for _, l := range b.GetOutput().GetLogs() { 371 if l.GetName() == "stderr" { 372 r.LogURL = l.GetViewUrl() 373 break 374 } 375 } 376 } 377 378 // Fetch the stderr of the failed step. 379 steps := b.GetSteps() 380 stepLoop: 381 for i := len(steps) - 1; i >= 0; i-- { 382 s := steps[i] 383 if s.GetStatus() == bbpb.Status_FAILURE { 384 for _, l := range s.GetLogs() { 385 if l.GetName() == "stderr" || l.GetName() == "output" { 386 r.StepLogURL = l.GetViewUrl() 387 break stepLoop 388 } 389 } 390 } 391 } 392 } 393 buildMap[commit] = r 394 } 395 return nil 396 }) 397 } 398 if err := g.Wait(); err != nil { 399 return err 400 } 401 402 // Gather into dashboard. 403 dash.Results = make([][]*BuildResult, len(dash.Builders)) 404 for i, m := range dashMap { 405 dash.Results[i] = make([]*BuildResult, len(dash.Commits)) 406 for j, c := range dash.Commits { 407 r := m[c.Hash] 408 if r == nil { 409 continue 410 } 411 r.Time = c.Time // fill in commit time 412 dash.Results[i][j] = r 413 } 414 } 415 416 return nil 417 } 418 419 func (c *LUCIClient) ReadBoards(ctx context.Context, boards []*Dashboard, since time.Time) error { 420 for _, dash := range boards { 421 err := c.ReadBoard(ctx, dash, since) 422 if err != nil { 423 return err 424 } 425 } 426 return nil 427 } 428 429 // GetResultAndArtifacts fetches the failed tests and artifacts for the failed run r. 430 func (c *LUCIClient) GetResultAndArtifacts(ctx context.Context, r *BuildResult) []*Failure { 431 if c.TraceSteps { 432 log.Println("GetResultAndArtifacts", r.Builder, shortHash(r.Commit), r.ID) 433 } 434 req := &rdbpb.QueryTestResultsRequest{ 435 Invocations: []string{r.InvocationID}, 436 Predicate: &rdbpb.TestResultPredicate{Expectancy: rdbpb.TestResultPredicate_VARIANTS_WITH_UNEXPECTED_RESULTS}, 437 PageSize: 1000, 438 // TODO: paging? Not sure we want to handle more than 1000 failures in a run... 439 } 440 resp, err := c.ResultDBClient.QueryTestResults(ctx, req) 441 if err != nil { 442 log.Fatal(err) 443 } 444 445 var failures []*Failure 446 for _, rr := range resp.GetTestResults() { 447 testID := rr.GetTestId() 448 resp, err := c.ResultDBClient.QueryArtifacts(ctx, &rdbpb.QueryArtifactsRequest{ 449 Invocations: []string{r.InvocationID}, 450 Predicate: &rdbpb.ArtifactPredicate{ 451 TestResultPredicate: &rdbpb.TestResultPredicate{ 452 TestIdRegexp: regexp.QuoteMeta(testID), 453 Expectancy: rdbpb.TestResultPredicate_VARIANTS_WITH_UNEXPECTED_RESULTS, 454 }, 455 }, 456 PageSize: 1000, 457 }) 458 if err != nil { 459 log.Fatal(err) 460 } 461 for _, a := range resp.GetArtifacts() { 462 if a.GetArtifactId() != "output" { 463 continue 464 } 465 url := a.GetFetchUrl() 466 f := &Failure{ 467 TestID: testID, 468 Status: rr.GetStatus(), 469 LogURL: url, 470 } 471 failures = append(failures, f) 472 } 473 } 474 slices.SortFunc(failures, func(f1, f2 *Failure) int { 475 return strings.Compare(f1.TestID, f2.TestID) 476 }) 477 return failures 478 } 479 480 // split TestID to package and test name. 481 func splitTestID(testid string) (string, string) { 482 // TestId is <package path>.<test name>. 483 // Both package path and test name could contain "." and "/" (due to subtests). 484 // So looking for "." or "/" are not reliable. 485 // Tests are always start with ".Test" (or ".Example", ".Benchmark" (do we 486 // run benchmarks?)). Looking for them instead. 487 // TODO: handle test flavors (e.g. -cpu=1,2,4, -linkmode=internal, etc.) 488 for _, sep := range []string{".Test", ".Example", ".Benchmark"} { 489 pkg, test, ok := strings.Cut(testid, sep) 490 if ok { 491 return pkg, sep[1:] + test // add back "Test" prefix (without ".") 492 } 493 } 494 return "", testid 495 } 496 497 func buildURL(buildID int64) string { // keep in sync with buildUrlRE in github.go 498 return fmt.Sprintf("https://ci.chromium.org/b/%d", buildID) 499 } 500 501 func shortHash(s string) string { 502 if len(s) > 8 { 503 return s[:8] 504 } 505 return s 506 } 507 508 // FindFailures returns the failures listed in the dashboards. 509 // The result is sorted by commit date, then repo, then builder. 510 // Pupulate the failure contents (the .Failures fields) for the 511 // failures. 512 func (c *LUCIClient) FindFailures(ctx context.Context, boards []*Dashboard) []*BuildResult { 513 var res []*BuildResult 514 var wg sync.WaitGroup 515 sem := make(chan int, c.nProc) 516 for _, dash := range boards { 517 for i, b := range dash.Builders { 518 for _, r := range dash.Results[i] { 519 if r == nil { 520 continue 521 } 522 if r.Builder != b.Name { // sanity check 523 log.Fatalf("builder mismatch: %s %s", b.Name, r.Builder) 524 } 525 526 if r.Status == bbpb.Status_FAILURE { 527 wg.Add(1) 528 sem <- 1 529 go func(r *BuildResult) { 530 defer func() { wg.Done(); <-sem }() 531 r.Failures = c.GetResultAndArtifacts(ctx, r) 532 }(r) 533 res = append(res, r) 534 } 535 } 536 } 537 } 538 wg.Wait() 539 540 slices.SortFunc(res, func(a, b *BuildResult) int { 541 if !a.Time.Equal(b.Time) { 542 return a.Time.Compare(b.Time) 543 } 544 if a.Repo != b.Repo { 545 return strings.Compare(a.Repo, b.Repo) 546 } 547 if a.Builder != b.Builder { 548 return strings.Compare(a.Builder, b.Builder) 549 } 550 return strings.Compare(a.Commit, b.Commit) 551 }) 552 553 return res 554 } 555 556 // PrintDashboard prints the dashboard. 557 // For each builder, it prints a list of commits and status. 558 func PrintDashboard(dash *Dashboard) { 559 for i, b := range dash.Builders { 560 fmt.Println(b.Name) 561 for _, r := range dash.Results[i] { 562 if r == nil { 563 continue 564 } 565 fmt.Printf("\t%s %v %v\n", shortHash(r.Commit), r.Time, r.Status) 566 } 567 } 568 } 569 570 // FetchLogs fetches logs for build results. 571 func (c *LUCIClient) FetchLogs(res []*BuildResult) { 572 // TODO: caching? 573 g := new(errgroup.Group) 574 g.SetLimit(c.nProc) 575 for _, r := range res { 576 r := r 577 g.Go(func() error { 578 c.fetchLogsForBuild(r) 579 return nil 580 }) 581 } 582 g.Wait() 583 } 584 585 func (c *LUCIClient) fetchLogsForBuild(r *BuildResult) { 586 if c.TraceSteps { 587 log.Println("fetchLogsForBuild", r.Builder, shortHash(r.Commit), r.ID) 588 } 589 if r.LogURL == "" { 590 fmt.Printf("no log url: %s\n", buildURL(r.ID)) 591 } else { 592 r.LogText = fetchURL(r.LogURL + "?format=raw") 593 } 594 if r.StepLogURL != "" { 595 r.StepLogText = fetchURL(r.StepLogURL + "?format=raw") 596 } 597 for _, f := range r.Failures { 598 if f.LogURL == "" { 599 fmt.Printf("no log url: %s %s\n", buildURL(r.ID), f.TestID) 600 } else { 601 f.LogText = fetchURL(f.LogURL) 602 } 603 } 604 } 605 606 func fetchURL(url string) string { 607 resp, err := http.Get(url) 608 if err != nil { 609 log.Fatal(err) 610 } 611 defer resp.Body.Close() 612 if resp.StatusCode == http.StatusNotFound { 613 return "" 614 } else if resp.StatusCode != http.StatusOK { 615 body, _ := io.ReadAll(io.LimitReader(resp.Body, 4<<10)) 616 log.Fatal(fmt.Errorf("GET %s: non-200 OK status code: %v body: %q", url, resp.Status, body)) 617 } 618 body, err := io.ReadAll(resp.Body) 619 if err != nil { 620 log.Fatal(fmt.Errorf("GET %s: failed to read body: %v body: %q", url, err, body)) 621 } 622 return string(body) 623 }