golang.org/x/build@v0.0.0-20240506185731-218518f32b70/cmd/coordinator/internal/lucipoll/lucipoll.go (about) 1 // Copyright 2024 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package lucipoll implements a simple polling LUCI client 6 // for the possibly-short-term needs of the build dashboard. 7 package lucipoll 8 9 import ( 10 "context" 11 "encoding/json" 12 "fmt" 13 "log" 14 "runtime/debug" 15 "slices" 16 "strings" 17 "sync" 18 "time" 19 20 bbpb "go.chromium.org/luci/buildbucket/proto" 21 "golang.org/x/build/maintner/maintnerd/apipb" 22 "golang.org/x/build/repos" 23 "google.golang.org/grpc" 24 "google.golang.org/protobuf/types/known/fieldmaskpb" 25 ) 26 27 // maintnerClient is a subset of apipb.MaintnerServiceClient. 28 type maintnerClient interface { 29 // GetDashboard is extracted from apipb.MaintnerServiceClient. 30 GetDashboard(ctx context.Context, in *apipb.DashboardRequest, opts ...grpc.CallOption) (*apipb.DashboardResponse, error) 31 } 32 33 type Builder struct { 34 Name string 35 *BuilderConfigProperties 36 } 37 38 type BuilderConfigProperties struct { 39 Repo string `json:"project,omitempty"` 40 GoBranch string `json:"go_branch,omitempty"` 41 Target struct { 42 GOOS string `json:"goos,omitempty"` 43 GOARCH string `json:"goarch,omitempty"` 44 } `json:"target"` 45 KnownIssue int `json:"known_issue,omitempty"` 46 } 47 48 type Build struct { 49 ID int64 50 BuilderName string 51 Status bbpb.Status 52 } 53 54 func NewService(maintCl maintnerClient, buildersCl bbpb.BuildersClient, buildsCl bbpb.BuildsClient) *service { 55 s := &service{ 56 maintCl: maintCl, 57 buildersCl: buildersCl, 58 buildsCl: buildsCl, 59 } 60 go s.pollLoop() 61 return s 62 } 63 64 type service struct { 65 maintCl maintnerClient 66 67 buildersCl bbpb.BuildersClient 68 buildsCl bbpb.BuildsClient 69 70 mu sync.RWMutex 71 cached Snapshot 72 } 73 74 // A Snapshot is a consistent snapshot in time holding LUCI post-submit state. 75 type Snapshot struct { 76 Builders map[string]Builder // Map key is builder name. 77 RepoCommitBuilds map[string]map[string]map[string]Build // Map keys are repo, commit ID, builder name. 78 } 79 80 // PostSubmitSnapshot returns a cached snapshot. 81 func (s *service) PostSubmitSnapshot() Snapshot { 82 s.mu.RLock() 83 defer s.mu.RUnlock() 84 return s.cached 85 } 86 87 func (s *service) pollLoop() { 88 // A hard timeout for runOnce to complete. 89 // Normally it takes about a minute or so. 90 // Sometimes (a few times a week) it takes 24 hours and a minute. 91 // Don't let it run more than 30 minutes, so we'll find out trying 92 // again sooner can help, at least until the root problem is fixed. 93 // See go.dev/issue/66687. 94 const runOnceTimeout = 30 * time.Minute 95 96 ticker := time.NewTicker(2 * time.Minute) 97 for { 98 ctx, cancel := context.WithTimeout(context.Background(), runOnceTimeout) 99 builders, builds, err := runOnce(ctx, s.maintCl, s.buildersCl, s.buildsCl) 100 cancel() 101 if err != nil { 102 log.Println("lucipoll:", err) 103 // Sleep a bit and retry. 104 time.Sleep(30 * time.Second) 105 continue 106 } 107 s.mu.Lock() 108 s.cached = Snapshot{builders, builds} 109 s.mu.Unlock() 110 <-ticker.C // Limit how often we're willing to poll. 111 } 112 } 113 114 func runOnce( 115 ctx context.Context, 116 maintCl maintnerClient, buildersCl bbpb.BuildersClient, buildsCl bbpb.BuildsClient, 117 ) (_ map[string]Builder, _ map[string]map[string]map[string]Build, err error) { 118 defer func() { 119 if e := recover(); e != nil { 120 err = fmt.Errorf("internal panic: %v\n\n%s", e, debug.Stack()) 121 } 122 }() 123 124 // Fetch all current completed LUCI builders. 125 // 126 // TODO: It would be possible to cache initially fetched builders and then fetch 127 // additional individual builders when seeing a build referencing an unknown one. 128 // But that would need to take into account that a builder may be intentionally 129 // removed from the LUCI dashboard. It adds more complexity, so for now do the 130 // simple thing and save caching as an optional enhancement. 131 builderList, err := listBuilders(ctx, buildersCl) 132 if err != nil { 133 return nil, nil, err 134 } 135 var builders = make(map[string]Builder) 136 for _, b := range builderList { 137 if _, ok := builders[b.Name]; ok { 138 return nil, nil, fmt.Errorf("duplicate builder name %q", b.Name) 139 } 140 if b.KnownIssue != 0 { 141 // Skip LUCI builders with a known issue at this time. 142 // This also means builds from these builders are skipped below as well. 143 // Such builders&builds can be included when the callers deem it useful. 144 continue 145 } 146 builders[b.Name] = b 147 } 148 149 // Fetch LUCI builds for the builders, repositories, and their commits 150 // that are deemed relevant to the callers of this package. 151 // 152 // TODO: It would be possible to cache the last GetDashboard response 153 // and if didn't change since the last, only fetch new LUCI builds 154 // since then. Similarly, builds that were earlier for commits that 155 // still show up in the response can be reused instead of refetched. 156 // Furthermore, builds can be sorted according to how complete/useful 157 // they are. These known enhancements are left for later as needed. 158 var builds = make(map[string]map[string]map[string]Build) 159 dashResp, err := maintCl.GetDashboard(ctx, &apipb.DashboardRequest{MaxCommits: 30}) 160 if err != nil { 161 return nil, nil, err 162 } 163 var used, total int 164 t0 := time.Now() 165 // Fetch builds for Go repo commits. 166 for _, c := range dashResp.Commits { 167 repo, commit := "go", c.Commit 168 buildList, err := fetchBuildsForCommit(ctx, buildsCl, repo, commit, "id", "builder.builder", "status", "input.gitiles_commit") 169 if err != nil { 170 return nil, nil, err 171 } 172 total += len(buildList) 173 for _, b := range buildList { 174 if c := b.GetInput().GetGitilesCommit(); c.Project != repo { 175 return nil, nil, fmt.Errorf(`internal error: in Go repo commit loop, c.Project is %q but expected it to be "go"`, c.Project) 176 } else if c.Id != commit { 177 return nil, nil, fmt.Errorf("internal error: in Go repo commit loop, c.Id is %q but expected it to be %q", c.Id, commit) 178 } 179 switch b.GetStatus() { 180 case bbpb.Status_STARTED, bbpb.Status_SUCCESS, bbpb.Status_FAILURE, bbpb.Status_INFRA_FAILURE: 181 default: 182 // Skip builds with other statuses at this time. 183 // Such builds can be included when the callers deem it useful. 184 continue 185 } 186 builder, ok := builders[b.GetBuilder().GetBuilder()] 187 if !ok { 188 // A build that isn't associated with a current builder we're tracking. 189 // It might've been removed, or has a known issue. Skip this build too. 190 continue 191 } else if builder.Repo != "go" { 192 // Not a Go repo build. Those are handled below, so out of scope here. 193 continue 194 } 195 if builds[repo] == nil { 196 builds[repo] = make(map[string]map[string]Build) 197 } 198 if builds[repo][commit] == nil { 199 builds[repo][commit] = make(map[string]Build) 200 } 201 builds[repo][commit][b.GetBuilder().GetBuilder()] = Build{ 202 ID: b.GetId(), 203 BuilderName: b.GetBuilder().GetBuilder(), 204 Status: b.GetStatus(), 205 } 206 used++ 207 } 208 } 209 // Fetch builds for the single latest commit of each golang.org/x repo, 210 // ones that were invoked from the Go repository side. 211 var repoHeads = make(map[string]string) // A repo → head commit ID map. 212 for _, rh := range dashResp.RepoHeads { 213 repoHeads[rh.GerritProject] = rh.Commit.Commit 214 } 215 for _, r := range dashResp.Releases { 216 repo, commit := "go", r.GetBranchCommit() 217 buildList, err := fetchBuildsForCommit(ctx, buildsCl, repo, commit, "id", "builder.builder", "status", "input.gitiles_commit", "output.properties") 218 if err != nil { 219 return nil, nil, err 220 } 221 total += len(buildList) 222 for _, b := range buildList { 223 if c := b.GetInput().GetGitilesCommit(); c.Project != "go" { 224 return nil, nil, fmt.Errorf(`internal error: in x/ repo loop for builds invoked from the Go repo side, c.Project is %q but expected it to be "go"`, c.Project) 225 } 226 switch b.GetStatus() { 227 case bbpb.Status_STARTED, bbpb.Status_SUCCESS, bbpb.Status_FAILURE, bbpb.Status_INFRA_FAILURE: 228 default: 229 // Skip builds with other statuses at this time. 230 // Such builds can be included when the callers deem it useful. 231 continue 232 } 233 builder, ok := builders[b.GetBuilder().GetBuilder()] 234 if !ok { 235 // A build that isn't associated with a current builder we're tracking. 236 // It might've been removed, or has a known issue. Skip this build too. 237 continue 238 } else if builder.Repo == "go" { 239 // A Go repo build. Those were handled above, so out of scope here. 240 continue 241 } 242 var buildOutputProps struct { 243 Sources []struct { 244 GitilesCommit struct { 245 Project string 246 Ref string 247 Id string 248 } 249 } 250 } 251 if data, err := b.GetOutput().GetProperties().MarshalJSON(); err != nil { 252 return nil, nil, fmt.Errorf("marshaling build output properties to JSON failed: %v", err) 253 } else if err := json.Unmarshal(data, &buildOutputProps); err != nil { 254 return nil, nil, err 255 } 256 repoCommit, ok := func() (string, bool) { 257 for _, s := range buildOutputProps.Sources { 258 if c := s.GitilesCommit; c.Project == builder.Repo { 259 if c.Ref != "refs/heads/master" { 260 panic(fmt.Errorf(`internal error: in x/ repo loop for project %s, c.Ref != "refs/heads/master"`, c.Project)) 261 } 262 return c.Id, true 263 } 264 } 265 return "", false 266 }() 267 if !ok && b.GetStatus() == bbpb.Status_STARTED { 268 // A started build that hasn't selected the x/ repo commit yet. 269 // As an approximation, assume it'll pick the latest x/ repo head commit. 270 repoCommit = repoHeads[builder.Repo] 271 } else if !ok { 272 // Repo commit not found in output properties, and it's not a started build. 273 // As an example, this can happen if a build failed due to an infra failure 274 // early on, before selecting the x/ repo commit. Skip such builds. 275 continue 276 } 277 if repoCommit != repoHeads[builder.Repo] { 278 // Skip builds that are not for the x/ repository's head commit. 279 continue 280 } 281 if builds[builder.Repo] == nil { 282 builds[builder.Repo] = make(map[string]map[string]Build) 283 } 284 if builds[builder.Repo][repoCommit] == nil { 285 builds[builder.Repo][repoCommit] = make(map[string]Build) 286 } 287 builds[builder.Repo][repoCommit][b.GetBuilder().GetBuilder()] = Build{ 288 ID: b.GetId(), 289 BuilderName: b.GetBuilder().GetBuilder(), 290 Status: b.GetStatus(), 291 } 292 used++ 293 } 294 } 295 // Fetch builds for the single latest commit of each golang.org/x repo, 296 // ones that were invoked from the x/ repository side. 297 var goHeads = make(map[string]string) // A branch → head commit ID map. 298 for _, r := range dashResp.Releases { 299 goHeads[r.GetBranchName()] = r.GetBranchCommit() 300 } 301 for _, rh := range dashResp.RepoHeads { 302 if rh.GerritProject == "go" { 303 continue 304 } 305 if r, ok := repos.ByGerritProject[rh.GerritProject]; !ok || !r.ShowOnDashboard() { 306 // Not a golang.org/x repository that's marked visible on the dashboard. 307 // Skip it. 308 continue 309 } 310 repo, commit := rh.GerritProject, rh.Commit.Commit 311 buildList, err := fetchBuildsForCommit(ctx, buildsCl, repo, commit, "id", "builder.builder", "status", "input.gitiles_commit", "output.properties") 312 if err != nil { 313 return nil, nil, err 314 } 315 total += len(buildList) 316 for _, b := range buildList { 317 switch b.GetStatus() { 318 case bbpb.Status_STARTED, bbpb.Status_SUCCESS, bbpb.Status_FAILURE, bbpb.Status_INFRA_FAILURE: 319 default: 320 // Skip builds with other statuses at this time. 321 // Such builds can be included when the callers deem it useful. 322 continue 323 } 324 builder, ok := builders[b.GetBuilder().GetBuilder()] 325 if !ok { 326 // A build that isn't associated with a current builder we're tracking. 327 // It might've been removed, or has a known issue. Skip this build too. 328 continue 329 } 330 var buildOutputProps struct { 331 Sources []struct { 332 GitilesCommit struct { 333 Project string 334 Ref string 335 Id string 336 } 337 } 338 } 339 if data, err := b.GetOutput().GetProperties().MarshalJSON(); err != nil { 340 return nil, nil, fmt.Errorf("marshaling build output properties to JSON failed: %v", err) 341 } else if err := json.Unmarshal(data, &buildOutputProps); err != nil { 342 return nil, nil, err 343 } 344 goCommit, ok := func() (string, bool) { 345 for _, s := range buildOutputProps.Sources { 346 if c := s.GitilesCommit; c.Project == "go" { 347 if c.Ref != "refs/heads/"+builder.GoBranch { 348 panic(fmt.Errorf(`internal error: in Go repo loop, c.Ref != "refs/heads/%s"`, builder.GoBranch)) 349 } 350 return c.Id, true 351 } 352 } 353 return "", false 354 }() 355 if !ok && b.GetStatus() == bbpb.Status_STARTED { 356 // A started build that hasn't selected the Go repo commit yet. 357 // As an approximation, assume it'll pick the latest Go repo head commit. 358 goCommit = goHeads[builder.GoBranch] 359 } else if !ok { 360 // Repo commit not found in output properties, and it's not a started build. 361 // As an example, this can happen if a build failed due to an infra failure 362 // early on, before selecting the Go repo commit. Skip such builds. 363 continue 364 } 365 if goCommit != goHeads[builder.GoBranch] { 366 // Skip builds that are not for the Go repository's head commit. 367 continue 368 } 369 c := b.GetInput().GetGitilesCommit() 370 if c.Project != builder.Repo { 371 // When fetching builds for commits in x/ repos, it's expected 372 // that build repo will always match builder repo. This isn't 373 // true for the main Go repo because it triggers builds for x/ 374 // repos. But x/ repo builds don't trigger builds elsewhere. 375 return nil, nil, fmt.Errorf("internal error: build repo %q doesn't match builder repo %q", c.Project, builder.Repo) 376 } 377 if builds[builder.Repo] == nil { 378 builds[builder.Repo] = make(map[string]map[string]Build) 379 } 380 if builds[builder.Repo][c.Id] == nil { 381 builds[builder.Repo][c.Id] = make(map[string]Build) 382 } 383 builds[builder.Repo][c.Id][b.GetBuilder().GetBuilder()] = Build{ 384 ID: b.GetId(), 385 BuilderName: b.GetBuilder().GetBuilder(), 386 Status: b.GetStatus(), 387 } 388 used++ 389 } 390 } 391 log.Printf("lucipoll.runOnce: aggregate GetBuildsForCommit calls fetched %d builds (and used %d of them) in %v\n", total, used, time.Since(t0)) 392 393 return builders, builds, nil 394 } 395 396 // listBuilders lists post-submit LUCI builders. 397 func listBuilders(ctx context.Context, buildersCl bbpb.BuildersClient) (builders []Builder, _ error) { 398 var pageToken string 399 nextPage: 400 resp, err := buildersCl.ListBuilders(ctx, &bbpb.ListBuildersRequest{ 401 Project: "golang", Bucket: "ci", 402 PageSize: 1000, 403 PageToken: pageToken, 404 }) 405 if err != nil { 406 return nil, err 407 } 408 for _, b := range resp.GetBuilders() { 409 var p BuilderConfigProperties 410 if err := json.Unmarshal([]byte(b.GetConfig().GetProperties()), &p); err != nil { 411 return nil, err 412 } 413 builders = append(builders, Builder{b.GetId().GetBuilder(), &p}) 414 } 415 if resp.GetNextPageToken() != "" { 416 pageToken = resp.GetNextPageToken() 417 goto nextPage 418 } 419 slices.SortFunc(builders, func(a, b Builder) int { 420 return strings.Compare(a.Name, b.Name) 421 }) 422 return builders, nil 423 } 424 425 // fetchBuildsForCommit fetches builds from all post-submit LUCI builders for a specific commit. 426 func fetchBuildsForCommit(ctx context.Context, buildsCl bbpb.BuildsClient, repo, commit string, maskPaths ...string) (builds []*bbpb.Build, _ error) { 427 mask, err := fieldmaskpb.New((*bbpb.Build)(nil), maskPaths...) 428 if err != nil { 429 return nil, err 430 } 431 var pageToken string 432 nextPage: 433 resp, err := buildsCl.SearchBuilds(ctx, &bbpb.SearchBuildsRequest{ 434 Predicate: &bbpb.BuildPredicate{ 435 Builder: &bbpb.BuilderID{Project: "golang", Bucket: "ci"}, 436 Tags: []*bbpb.StringPair{ 437 {Key: "buildset", Value: fmt.Sprintf("commit/gitiles/go.googlesource.com/%s/+/%s", repo, commit)}, 438 }, 439 }, 440 Mask: &bbpb.BuildMask{Fields: mask}, 441 PageSize: 1000, 442 PageToken: pageToken, 443 }) 444 if err != nil { 445 return nil, err 446 } 447 builds = append(builds, resp.GetBuilds()...) 448 if resp.GetNextPageToken() != "" { 449 pageToken = resp.GetNextPageToken() 450 goto nextPage 451 } 452 return builds, nil 453 }