golang.org/x/build@v0.0.0-20240506185731-218518f32b70/cmd/coordinator/internal/lucipoll/lucipoll.go (about)

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package lucipoll implements a simple polling LUCI client
     6  // for the possibly-short-term needs of the build dashboard.
     7  package lucipoll
     8  
     9  import (
    10  	"context"
    11  	"encoding/json"
    12  	"fmt"
    13  	"log"
    14  	"runtime/debug"
    15  	"slices"
    16  	"strings"
    17  	"sync"
    18  	"time"
    19  
    20  	bbpb "go.chromium.org/luci/buildbucket/proto"
    21  	"golang.org/x/build/maintner/maintnerd/apipb"
    22  	"golang.org/x/build/repos"
    23  	"google.golang.org/grpc"
    24  	"google.golang.org/protobuf/types/known/fieldmaskpb"
    25  )
    26  
    27  // maintnerClient is a subset of apipb.MaintnerServiceClient.
    28  type maintnerClient interface {
    29  	// GetDashboard is extracted from apipb.MaintnerServiceClient.
    30  	GetDashboard(ctx context.Context, in *apipb.DashboardRequest, opts ...grpc.CallOption) (*apipb.DashboardResponse, error)
    31  }
    32  
    33  type Builder struct {
    34  	Name string
    35  	*BuilderConfigProperties
    36  }
    37  
    38  type BuilderConfigProperties struct {
    39  	Repo     string `json:"project,omitempty"`
    40  	GoBranch string `json:"go_branch,omitempty"`
    41  	Target   struct {
    42  		GOOS   string `json:"goos,omitempty"`
    43  		GOARCH string `json:"goarch,omitempty"`
    44  	} `json:"target"`
    45  	KnownIssue int `json:"known_issue,omitempty"`
    46  }
    47  
    48  type Build struct {
    49  	ID          int64
    50  	BuilderName string
    51  	Status      bbpb.Status
    52  }
    53  
    54  func NewService(maintCl maintnerClient, buildersCl bbpb.BuildersClient, buildsCl bbpb.BuildsClient) *service {
    55  	s := &service{
    56  		maintCl:    maintCl,
    57  		buildersCl: buildersCl,
    58  		buildsCl:   buildsCl,
    59  	}
    60  	go s.pollLoop()
    61  	return s
    62  }
    63  
    64  type service struct {
    65  	maintCl maintnerClient
    66  
    67  	buildersCl bbpb.BuildersClient
    68  	buildsCl   bbpb.BuildsClient
    69  
    70  	mu     sync.RWMutex
    71  	cached Snapshot
    72  }
    73  
    74  // A Snapshot is a consistent snapshot in time holding LUCI post-submit state.
    75  type Snapshot struct {
    76  	Builders         map[string]Builder                     // Map key is builder name.
    77  	RepoCommitBuilds map[string]map[string]map[string]Build // Map keys are repo, commit ID, builder name.
    78  }
    79  
    80  // PostSubmitSnapshot returns a cached snapshot.
    81  func (s *service) PostSubmitSnapshot() Snapshot {
    82  	s.mu.RLock()
    83  	defer s.mu.RUnlock()
    84  	return s.cached
    85  }
    86  
    87  func (s *service) pollLoop() {
    88  	// A hard timeout for runOnce to complete.
    89  	// Normally it takes about a minute or so.
    90  	// Sometimes (a few times a week) it takes 24 hours and a minute.
    91  	// Don't let it run more than 30 minutes, so we'll find out trying
    92  	// again sooner can help, at least until the root problem is fixed.
    93  	// See go.dev/issue/66687.
    94  	const runOnceTimeout = 30 * time.Minute
    95  
    96  	ticker := time.NewTicker(2 * time.Minute)
    97  	for {
    98  		ctx, cancel := context.WithTimeout(context.Background(), runOnceTimeout)
    99  		builders, builds, err := runOnce(ctx, s.maintCl, s.buildersCl, s.buildsCl)
   100  		cancel()
   101  		if err != nil {
   102  			log.Println("lucipoll:", err)
   103  			// Sleep a bit and retry.
   104  			time.Sleep(30 * time.Second)
   105  			continue
   106  		}
   107  		s.mu.Lock()
   108  		s.cached = Snapshot{builders, builds}
   109  		s.mu.Unlock()
   110  		<-ticker.C // Limit how often we're willing to poll.
   111  	}
   112  }
   113  
   114  func runOnce(
   115  	ctx context.Context,
   116  	maintCl maintnerClient, buildersCl bbpb.BuildersClient, buildsCl bbpb.BuildsClient,
   117  ) (_ map[string]Builder, _ map[string]map[string]map[string]Build, err error) {
   118  	defer func() {
   119  		if e := recover(); e != nil {
   120  			err = fmt.Errorf("internal panic: %v\n\n%s", e, debug.Stack())
   121  		}
   122  	}()
   123  
   124  	// Fetch all current completed LUCI builders.
   125  	//
   126  	// TODO: It would be possible to cache initially fetched builders and then fetch
   127  	// additional individual builders when seeing a build referencing an unknown one.
   128  	// But that would need to take into account that a builder may be intentionally
   129  	// removed from the LUCI dashboard. It adds more complexity, so for now do the
   130  	// simple thing and save caching as an optional enhancement.
   131  	builderList, err := listBuilders(ctx, buildersCl)
   132  	if err != nil {
   133  		return nil, nil, err
   134  	}
   135  	var builders = make(map[string]Builder)
   136  	for _, b := range builderList {
   137  		if _, ok := builders[b.Name]; ok {
   138  			return nil, nil, fmt.Errorf("duplicate builder name %q", b.Name)
   139  		}
   140  		if b.KnownIssue != 0 {
   141  			// Skip LUCI builders with a known issue at this time.
   142  			// This also means builds from these builders are skipped below as well.
   143  			// Such builders&builds can be included when the callers deem it useful.
   144  			continue
   145  		}
   146  		builders[b.Name] = b
   147  	}
   148  
   149  	// Fetch LUCI builds for the builders, repositories, and their commits
   150  	// that are deemed relevant to the callers of this package.
   151  	//
   152  	// TODO: It would be possible to cache the last GetDashboard response
   153  	// and if didn't change since the last, only fetch new LUCI builds
   154  	// since then. Similarly, builds that were earlier for commits that
   155  	// still show up in the response can be reused instead of refetched.
   156  	// Furthermore, builds can be sorted according to how complete/useful
   157  	// they are. These known enhancements are left for later as needed.
   158  	var builds = make(map[string]map[string]map[string]Build)
   159  	dashResp, err := maintCl.GetDashboard(ctx, &apipb.DashboardRequest{MaxCommits: 30})
   160  	if err != nil {
   161  		return nil, nil, err
   162  	}
   163  	var used, total int
   164  	t0 := time.Now()
   165  	// Fetch builds for Go repo commits.
   166  	for _, c := range dashResp.Commits {
   167  		repo, commit := "go", c.Commit
   168  		buildList, err := fetchBuildsForCommit(ctx, buildsCl, repo, commit, "id", "builder.builder", "status", "input.gitiles_commit")
   169  		if err != nil {
   170  			return nil, nil, err
   171  		}
   172  		total += len(buildList)
   173  		for _, b := range buildList {
   174  			if c := b.GetInput().GetGitilesCommit(); c.Project != repo {
   175  				return nil, nil, fmt.Errorf(`internal error: in Go repo commit loop, c.Project is %q but expected it to be "go"`, c.Project)
   176  			} else if c.Id != commit {
   177  				return nil, nil, fmt.Errorf("internal error: in Go repo commit loop, c.Id is %q but expected it to be %q", c.Id, commit)
   178  			}
   179  			switch b.GetStatus() {
   180  			case bbpb.Status_STARTED, bbpb.Status_SUCCESS, bbpb.Status_FAILURE, bbpb.Status_INFRA_FAILURE:
   181  			default:
   182  				// Skip builds with other statuses at this time.
   183  				// Such builds can be included when the callers deem it useful.
   184  				continue
   185  			}
   186  			builder, ok := builders[b.GetBuilder().GetBuilder()]
   187  			if !ok {
   188  				// A build that isn't associated with a current builder we're tracking.
   189  				// It might've been removed, or has a known issue. Skip this build too.
   190  				continue
   191  			} else if builder.Repo != "go" {
   192  				// Not a Go repo build. Those are handled below, so out of scope here.
   193  				continue
   194  			}
   195  			if builds[repo] == nil {
   196  				builds[repo] = make(map[string]map[string]Build)
   197  			}
   198  			if builds[repo][commit] == nil {
   199  				builds[repo][commit] = make(map[string]Build)
   200  			}
   201  			builds[repo][commit][b.GetBuilder().GetBuilder()] = Build{
   202  				ID:          b.GetId(),
   203  				BuilderName: b.GetBuilder().GetBuilder(),
   204  				Status:      b.GetStatus(),
   205  			}
   206  			used++
   207  		}
   208  	}
   209  	// Fetch builds for the single latest commit of each golang.org/x repo,
   210  	// ones that were invoked from the Go repository side.
   211  	var repoHeads = make(map[string]string) // A repo → head commit ID map.
   212  	for _, rh := range dashResp.RepoHeads {
   213  		repoHeads[rh.GerritProject] = rh.Commit.Commit
   214  	}
   215  	for _, r := range dashResp.Releases {
   216  		repo, commit := "go", r.GetBranchCommit()
   217  		buildList, err := fetchBuildsForCommit(ctx, buildsCl, repo, commit, "id", "builder.builder", "status", "input.gitiles_commit", "output.properties")
   218  		if err != nil {
   219  			return nil, nil, err
   220  		}
   221  		total += len(buildList)
   222  		for _, b := range buildList {
   223  			if c := b.GetInput().GetGitilesCommit(); c.Project != "go" {
   224  				return nil, nil, fmt.Errorf(`internal error: in x/ repo loop for builds invoked from the Go repo side, c.Project is %q but expected it to be "go"`, c.Project)
   225  			}
   226  			switch b.GetStatus() {
   227  			case bbpb.Status_STARTED, bbpb.Status_SUCCESS, bbpb.Status_FAILURE, bbpb.Status_INFRA_FAILURE:
   228  			default:
   229  				// Skip builds with other statuses at this time.
   230  				// Such builds can be included when the callers deem it useful.
   231  				continue
   232  			}
   233  			builder, ok := builders[b.GetBuilder().GetBuilder()]
   234  			if !ok {
   235  				// A build that isn't associated with a current builder we're tracking.
   236  				// It might've been removed, or has a known issue. Skip this build too.
   237  				continue
   238  			} else if builder.Repo == "go" {
   239  				// A Go repo build. Those were handled above, so out of scope here.
   240  				continue
   241  			}
   242  			var buildOutputProps struct {
   243  				Sources []struct {
   244  					GitilesCommit struct {
   245  						Project string
   246  						Ref     string
   247  						Id      string
   248  					}
   249  				}
   250  			}
   251  			if data, err := b.GetOutput().GetProperties().MarshalJSON(); err != nil {
   252  				return nil, nil, fmt.Errorf("marshaling build output properties to JSON failed: %v", err)
   253  			} else if err := json.Unmarshal(data, &buildOutputProps); err != nil {
   254  				return nil, nil, err
   255  			}
   256  			repoCommit, ok := func() (string, bool) {
   257  				for _, s := range buildOutputProps.Sources {
   258  					if c := s.GitilesCommit; c.Project == builder.Repo {
   259  						if c.Ref != "refs/heads/master" {
   260  							panic(fmt.Errorf(`internal error: in x/ repo loop for project %s, c.Ref != "refs/heads/master"`, c.Project))
   261  						}
   262  						return c.Id, true
   263  					}
   264  				}
   265  				return "", false
   266  			}()
   267  			if !ok && b.GetStatus() == bbpb.Status_STARTED {
   268  				// A started build that hasn't selected the x/ repo commit yet.
   269  				// As an approximation, assume it'll pick the latest x/ repo head commit.
   270  				repoCommit = repoHeads[builder.Repo]
   271  			} else if !ok {
   272  				// Repo commit not found in output properties, and it's not a started build.
   273  				// As an example, this can happen if a build failed due to an infra failure
   274  				// early on, before selecting the x/ repo commit. Skip such builds.
   275  				continue
   276  			}
   277  			if repoCommit != repoHeads[builder.Repo] {
   278  				// Skip builds that are not for the x/ repository's head commit.
   279  				continue
   280  			}
   281  			if builds[builder.Repo] == nil {
   282  				builds[builder.Repo] = make(map[string]map[string]Build)
   283  			}
   284  			if builds[builder.Repo][repoCommit] == nil {
   285  				builds[builder.Repo][repoCommit] = make(map[string]Build)
   286  			}
   287  			builds[builder.Repo][repoCommit][b.GetBuilder().GetBuilder()] = Build{
   288  				ID:          b.GetId(),
   289  				BuilderName: b.GetBuilder().GetBuilder(),
   290  				Status:      b.GetStatus(),
   291  			}
   292  			used++
   293  		}
   294  	}
   295  	// Fetch builds for the single latest commit of each golang.org/x repo,
   296  	// ones that were invoked from the x/ repository side.
   297  	var goHeads = make(map[string]string) // A branch → head commit ID map.
   298  	for _, r := range dashResp.Releases {
   299  		goHeads[r.GetBranchName()] = r.GetBranchCommit()
   300  	}
   301  	for _, rh := range dashResp.RepoHeads {
   302  		if rh.GerritProject == "go" {
   303  			continue
   304  		}
   305  		if r, ok := repos.ByGerritProject[rh.GerritProject]; !ok || !r.ShowOnDashboard() {
   306  			// Not a golang.org/x repository that's marked visible on the dashboard.
   307  			// Skip it.
   308  			continue
   309  		}
   310  		repo, commit := rh.GerritProject, rh.Commit.Commit
   311  		buildList, err := fetchBuildsForCommit(ctx, buildsCl, repo, commit, "id", "builder.builder", "status", "input.gitiles_commit", "output.properties")
   312  		if err != nil {
   313  			return nil, nil, err
   314  		}
   315  		total += len(buildList)
   316  		for _, b := range buildList {
   317  			switch b.GetStatus() {
   318  			case bbpb.Status_STARTED, bbpb.Status_SUCCESS, bbpb.Status_FAILURE, bbpb.Status_INFRA_FAILURE:
   319  			default:
   320  				// Skip builds with other statuses at this time.
   321  				// Such builds can be included when the callers deem it useful.
   322  				continue
   323  			}
   324  			builder, ok := builders[b.GetBuilder().GetBuilder()]
   325  			if !ok {
   326  				// A build that isn't associated with a current builder we're tracking.
   327  				// It might've been removed, or has a known issue. Skip this build too.
   328  				continue
   329  			}
   330  			var buildOutputProps struct {
   331  				Sources []struct {
   332  					GitilesCommit struct {
   333  						Project string
   334  						Ref     string
   335  						Id      string
   336  					}
   337  				}
   338  			}
   339  			if data, err := b.GetOutput().GetProperties().MarshalJSON(); err != nil {
   340  				return nil, nil, fmt.Errorf("marshaling build output properties to JSON failed: %v", err)
   341  			} else if err := json.Unmarshal(data, &buildOutputProps); err != nil {
   342  				return nil, nil, err
   343  			}
   344  			goCommit, ok := func() (string, bool) {
   345  				for _, s := range buildOutputProps.Sources {
   346  					if c := s.GitilesCommit; c.Project == "go" {
   347  						if c.Ref != "refs/heads/"+builder.GoBranch {
   348  							panic(fmt.Errorf(`internal error: in Go repo loop, c.Ref != "refs/heads/%s"`, builder.GoBranch))
   349  						}
   350  						return c.Id, true
   351  					}
   352  				}
   353  				return "", false
   354  			}()
   355  			if !ok && b.GetStatus() == bbpb.Status_STARTED {
   356  				// A started build that hasn't selected the Go repo commit yet.
   357  				// As an approximation, assume it'll pick the latest Go repo head commit.
   358  				goCommit = goHeads[builder.GoBranch]
   359  			} else if !ok {
   360  				// Repo commit not found in output properties, and it's not a started build.
   361  				// As an example, this can happen if a build failed due to an infra failure
   362  				// early on, before selecting the Go repo commit. Skip such builds.
   363  				continue
   364  			}
   365  			if goCommit != goHeads[builder.GoBranch] {
   366  				// Skip builds that are not for the Go repository's head commit.
   367  				continue
   368  			}
   369  			c := b.GetInput().GetGitilesCommit()
   370  			if c.Project != builder.Repo {
   371  				// When fetching builds for commits in x/ repos, it's expected
   372  				// that build repo will always match builder repo. This isn't
   373  				// true for the main Go repo because it triggers builds for x/
   374  				// repos. But x/ repo builds don't trigger builds elsewhere.
   375  				return nil, nil, fmt.Errorf("internal error: build repo %q doesn't match builder repo %q", c.Project, builder.Repo)
   376  			}
   377  			if builds[builder.Repo] == nil {
   378  				builds[builder.Repo] = make(map[string]map[string]Build)
   379  			}
   380  			if builds[builder.Repo][c.Id] == nil {
   381  				builds[builder.Repo][c.Id] = make(map[string]Build)
   382  			}
   383  			builds[builder.Repo][c.Id][b.GetBuilder().GetBuilder()] = Build{
   384  				ID:          b.GetId(),
   385  				BuilderName: b.GetBuilder().GetBuilder(),
   386  				Status:      b.GetStatus(),
   387  			}
   388  			used++
   389  		}
   390  	}
   391  	log.Printf("lucipoll.runOnce: aggregate GetBuildsForCommit calls fetched %d builds (and used %d of them) in %v\n", total, used, time.Since(t0))
   392  
   393  	return builders, builds, nil
   394  }
   395  
   396  // listBuilders lists post-submit LUCI builders.
   397  func listBuilders(ctx context.Context, buildersCl bbpb.BuildersClient) (builders []Builder, _ error) {
   398  	var pageToken string
   399  nextPage:
   400  	resp, err := buildersCl.ListBuilders(ctx, &bbpb.ListBuildersRequest{
   401  		Project: "golang", Bucket: "ci",
   402  		PageSize:  1000,
   403  		PageToken: pageToken,
   404  	})
   405  	if err != nil {
   406  		return nil, err
   407  	}
   408  	for _, b := range resp.GetBuilders() {
   409  		var p BuilderConfigProperties
   410  		if err := json.Unmarshal([]byte(b.GetConfig().GetProperties()), &p); err != nil {
   411  			return nil, err
   412  		}
   413  		builders = append(builders, Builder{b.GetId().GetBuilder(), &p})
   414  	}
   415  	if resp.GetNextPageToken() != "" {
   416  		pageToken = resp.GetNextPageToken()
   417  		goto nextPage
   418  	}
   419  	slices.SortFunc(builders, func(a, b Builder) int {
   420  		return strings.Compare(a.Name, b.Name)
   421  	})
   422  	return builders, nil
   423  }
   424  
   425  // fetchBuildsForCommit fetches builds from all post-submit LUCI builders for a specific commit.
   426  func fetchBuildsForCommit(ctx context.Context, buildsCl bbpb.BuildsClient, repo, commit string, maskPaths ...string) (builds []*bbpb.Build, _ error) {
   427  	mask, err := fieldmaskpb.New((*bbpb.Build)(nil), maskPaths...)
   428  	if err != nil {
   429  		return nil, err
   430  	}
   431  	var pageToken string
   432  nextPage:
   433  	resp, err := buildsCl.SearchBuilds(ctx, &bbpb.SearchBuildsRequest{
   434  		Predicate: &bbpb.BuildPredicate{
   435  			Builder: &bbpb.BuilderID{Project: "golang", Bucket: "ci"},
   436  			Tags: []*bbpb.StringPair{
   437  				{Key: "buildset", Value: fmt.Sprintf("commit/gitiles/go.googlesource.com/%s/+/%s", repo, commit)},
   438  			},
   439  		},
   440  		Mask:      &bbpb.BuildMask{Fields: mask},
   441  		PageSize:  1000,
   442  		PageToken: pageToken,
   443  	})
   444  	if err != nil {
   445  		return nil, err
   446  	}
   447  	builds = append(builds, resp.GetBuilds()...)
   448  	if resp.GetNextPageToken() != "" {
   449  		pageToken = resp.GetNextPageToken()
   450  		goto nextPage
   451  	}
   452  	return builds, nil
   453  }