golang.org/x/build@v0.0.0-20240506185731-218518f32b70/cmd/coordinator/coordinator.go (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build linux || darwin
     6  
     7  // The coordinator runs the majority of the Go build system.
     8  //
     9  // It is responsible for finding build work, executing it,
    10  // and displaying the results.
    11  //
    12  // For an overview of the Go build system, see the README at
    13  // the root of the x/build repo.
    14  package main // import "golang.org/x/build/cmd/coordinator"
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"crypto/rand"
    20  	"crypto/sha1"
    21  	"crypto/tls"
    22  	"encoding/json"
    23  	"errors"
    24  	"flag"
    25  	"fmt"
    26  	"html"
    27  	"io"
    28  	"log"
    29  	"net/http"
    30  	"net/url"
    31  	"os"
    32  	"sort"
    33  	"strings"
    34  	"sync"
    35  	"time"
    36  	"unicode"
    37  
    38  	"cloud.google.com/go/compute/metadata"
    39  	"cloud.google.com/go/storage"
    40  	"go.chromium.org/luci/auth"
    41  	buildbucketpb "go.chromium.org/luci/buildbucket/proto"
    42  	"go.chromium.org/luci/grpc/prpc"
    43  	"go.chromium.org/luci/hardcoded/chromeinfra"
    44  	"golang.org/x/build/buildenv"
    45  	"golang.org/x/build/buildlet"
    46  	builddash "golang.org/x/build/cmd/coordinator/internal/dashboard"
    47  	"golang.org/x/build/cmd/coordinator/internal/legacydash"
    48  	"golang.org/x/build/cmd/coordinator/internal/lucipoll"
    49  	"golang.org/x/build/cmd/coordinator/protos"
    50  	"golang.org/x/build/dashboard"
    51  	"golang.org/x/build/gerrit"
    52  	"golang.org/x/build/internal/access"
    53  	"golang.org/x/build/internal/buildgo"
    54  	"golang.org/x/build/internal/buildstats"
    55  	"golang.org/x/build/internal/cloud"
    56  	"golang.org/x/build/internal/coordinator/pool"
    57  	"golang.org/x/build/internal/coordinator/pool/queue"
    58  	"golang.org/x/build/internal/coordinator/remote"
    59  	"golang.org/x/build/internal/coordinator/schedule"
    60  	"golang.org/x/build/internal/gomote"
    61  	gomoteprotos "golang.org/x/build/internal/gomote/protos"
    62  	"golang.org/x/build/internal/https"
    63  	"golang.org/x/build/internal/metrics"
    64  	"golang.org/x/build/internal/secret"
    65  	"golang.org/x/build/kubernetes/gke"
    66  	"golang.org/x/build/maintner/maintnerd/apipb"
    67  	"golang.org/x/build/repos"
    68  	"golang.org/x/build/revdial/v2"
    69  	"golang.org/x/build/types"
    70  	"golang.org/x/exp/slices"
    71  	"golang.org/x/time/rate"
    72  	"google.golang.org/api/option"
    73  	"google.golang.org/grpc"
    74  	"google.golang.org/grpc/credentials"
    75  )
    76  
    77  const (
    78  	// eventDone is a build event name meaning the build was
    79  	// completed (either successfully or with remote errors).
    80  	// Notably, it is NOT included for network/communication
    81  	// errors.
    82  	eventDone = "done"
    83  
    84  	// eventSkipBuildMissingDep is a build event name meaning
    85  	// the builder type is not applicable to the commit being
    86  	// tested because the commit lacks a necessary dependency
    87  	// in its git history.
    88  	eventSkipBuildMissingDep = "skipped_build_missing_dep"
    89  )
    90  
    91  var (
    92  	processStartTime = time.Now()
    93  	processID        = "P" + randHex(9)
    94  )
    95  
    96  var sched = schedule.NewScheduler()
    97  
    98  var Version string // set by linker -X
    99  
   100  // devPause is a debug option to pause for 5 minutes after the build
   101  // finishes before destroying buildlets.
   102  const devPause = false
   103  
   104  // stagingTryWork is a debug option to enable or disable running
   105  // trybot work in staging.
   106  //
   107  // If enabled, only open CLs containing "DO NOT SUBMIT" and "STAGING"
   108  // in their commit message (in addition to being marked Run-TryBot+1)
   109  // will be run.
   110  const stagingTryWork = true
   111  
   112  var (
   113  	masterKeyFile = flag.String("masterkey", "", "Path to builder master key. Else fetched using GCE project attribute 'builder-master-key'.")
   114  	mode          = flag.String("mode", "", "Valid modes are 'dev', 'prod', or '' for auto-detect. dev means localhost development, not be confused with staging on go-dashboard-dev, which is still the 'prod' mode.")
   115  	buildEnvName  = flag.String("env", "", "The build environment configuration to use. Not required if running in dev mode locally or prod mode on GCE.")
   116  	devEnableGCE  = flag.Bool("dev_gce", false, "Whether or not to enable the GCE pool when in dev mode. The pool is enabled by default in prod mode.")
   117  	devEnableEC2  = flag.Bool("dev_ec2", false, "Whether or not to enable the EC2 pool when in dev mode. The pool is enabled by default in prod mode.")
   118  	sshAddr       = flag.String("ssh_addr", ":2222", "Address the gomote SSH server should listen on")
   119  )
   120  
   121  // LOCK ORDER:
   122  //   statusMu, buildStatus.mu, trySet.mu
   123  // (Other locks, such as the remoteBuildlet mutex should
   124  // not be used along with other locks)
   125  
   126  var (
   127  	statusMu   sync.Mutex // guards the following four structures; see LOCK ORDER comment above
   128  	status     = map[buildgo.BuilderRev]*buildStatus{}
   129  	statusDone []*buildStatus         // finished recently, capped to maxStatusDone
   130  	tries      = map[tryKey]*trySet{} // trybot builds
   131  	tryList    []tryKey
   132  )
   133  
   134  var maintnerClient apipb.MaintnerServiceClient
   135  
   136  const (
   137  	maxStatusDone = 30
   138  )
   139  
   140  var validHosts = map[string]bool{
   141  	"farmer.golang.org": true,
   142  	"build.golang.org":  true,
   143  }
   144  
   145  // hostPathHandler infers the host from the first element of the URL path,
   146  // and rewrites URLs in the output HTML accordingly. It disables response
   147  // compression to simplify the process of link rewriting.
   148  func hostPathHandler(h http.Handler) http.Handler {
   149  	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
   150  		// Don't bother rewriting ReverseHandler requests. ReverseHandler
   151  		// must be a Hijacker. Other handlers must not be a Hijacker to
   152  		// serve HTTP/2 requests.
   153  		if strings.HasPrefix(r.URL.Path, "/reverse") || strings.HasPrefix(r.URL.Path, "/revdial") {
   154  			h.ServeHTTP(w, r)
   155  			return
   156  		}
   157  		elem, rest := strings.TrimPrefix(r.URL.Path, "/"), ""
   158  		if i := strings.Index(elem, "/"); i >= 0 {
   159  			elem, rest = elem[:i], elem[i+1:]
   160  		}
   161  		if !validHosts[elem] {
   162  			u := "/farmer.golang.org" + r.URL.EscapedPath()
   163  			if r.URL.RawQuery != "" {
   164  				u += "?" + r.URL.RawQuery
   165  			}
   166  			http.Redirect(w, r, u, http.StatusTemporaryRedirect)
   167  			return
   168  		}
   169  
   170  		r.Host = elem
   171  		r.URL.Host = elem
   172  		r.URL.Path = "/" + rest
   173  		r.Header.Set("Accept-Encoding", "identity") // Disable compression for link rewriting.
   174  		lw := &linkRewriter{ResponseWriter: w, host: r.Host}
   175  		h.ServeHTTP(lw, r)
   176  		lw.Flush()
   177  	})
   178  }
   179  
   180  // A linkRewriter is a ResponseWriter that rewrites links in HTML output.
   181  // It rewrites relative links /foo to be /host/foo, and it rewrites any link
   182  // https://h/foo or //h/foo, where h is in validHosts, to be /h/foo.
   183  // This corrects the links to have the right form for the local development mode.
   184  type linkRewriter struct {
   185  	http.ResponseWriter
   186  	host string
   187  	buf  []byte
   188  	ct   string // content-type
   189  }
   190  
   191  func (r *linkRewriter) WriteHeader(code int) {
   192  	if l := r.Header().Get("Location"); l != "" {
   193  		if u, err := url.Parse(l); err == nil {
   194  			if u.Host == "" {
   195  				u.Path = "/" + r.host + u.Path
   196  			} else if validHosts[u.Host] {
   197  				u.Path = "/" + u.Host + u.Path
   198  				u.Scheme, u.Host = "", ""
   199  			}
   200  			r.Header().Set("Location", u.String())
   201  		}
   202  	}
   203  	r.ResponseWriter.WriteHeader(code)
   204  }
   205  
   206  func (r *linkRewriter) Write(data []byte) (int, error) {
   207  	if r.ct == "" {
   208  		ct := r.Header().Get("Content-Type")
   209  		if ct == "" {
   210  			// Note: should use first 512 bytes, but first write is fine for our purposes.
   211  			ct = http.DetectContentType(data)
   212  		}
   213  		r.ct = ct
   214  	}
   215  	if !strings.HasPrefix(r.ct, "text/html") {
   216  		return r.ResponseWriter.Write(data)
   217  	}
   218  	r.buf = append(r.buf, data...)
   219  	return len(data), nil
   220  }
   221  
   222  func (r *linkRewriter) Flush() {
   223  	var repl []string
   224  	for host := range validHosts {
   225  		repl = append(repl, `href="https://`+host, `href="/`+host)
   226  		repl = append(repl, `href="//`+host, `href="/`+host) // Handle scheme-less URLs.
   227  	}
   228  	repl = append(repl, `href="/`, `href="/`+r.host+`/`)
   229  	strings.NewReplacer(repl...).WriteString(r.ResponseWriter, string(r.buf))
   230  	r.buf = nil
   231  }
   232  
   233  func main() {
   234  	https.RegisterFlags(flag.CommandLine)
   235  	flag.Parse()
   236  
   237  	pool.SetProcessMetadata(processID, processStartTime)
   238  
   239  	if Version == "" && *mode == "dev" {
   240  		Version = "dev"
   241  	}
   242  	log.Printf("coordinator version %q starting", Version)
   243  
   244  	sc := mustCreateSecretClientOnGCE()
   245  	if sc != nil {
   246  		defer sc.Close()
   247  	}
   248  
   249  	mustInitMasterKeyCache(sc)
   250  
   251  	// TODO(golang.org/issue/38337): remove package level variables where possible.
   252  	// TODO(golang.org/issue/36841): remove after key functions are moved into
   253  	// a shared package.
   254  	pool.SetBuilderMasterKey(masterKey())
   255  	sp := remote.NewSessionPool(context.Background())
   256  	err := pool.InitGCE(sc, &basePinErr, sp.IsSession, *buildEnvName, *mode)
   257  	if err != nil {
   258  		if *mode == "" {
   259  			*mode = "dev"
   260  		}
   261  		log.Printf("VM support disabled due to error initializing GCE: %v", err)
   262  	} else {
   263  		if *mode == "" {
   264  			*mode = "prod"
   265  		}
   266  	}
   267  
   268  	gce := pool.NewGCEConfiguration()
   269  
   270  	if gce.BuildEnv().KubeServices.Name != "" {
   271  		goKubeClient, err := gke.NewClient(context.Background(),
   272  			gce.BuildEnv().KubeServices.Name,
   273  			gce.BuildEnv().KubeServices.Location(),
   274  			gke.OptNamespace(gce.BuildEnv().KubeServices.Namespace),
   275  			gke.OptProject(gce.BuildEnv().ProjectName),
   276  			gke.OptTokenSource(gce.GCPCredentials().TokenSource))
   277  		if err != nil {
   278  			log.Fatalf("connecting to GKE failed: %v", err)
   279  		}
   280  		go monitorGitMirror(goKubeClient)
   281  	} else {
   282  		log.Println("Kubernetes services disabled due to empty KubeServices.Name")
   283  	}
   284  
   285  	if *mode == "prod" || (*mode == "dev" && *devEnableEC2) {
   286  		// TODO(golang.org/issues/38337) the coordinator will use a package scoped pool
   287  		// until the coordinator is refactored to not require them.
   288  		ec2Pool := mustCreateEC2BuildletPool(sc, sp.IsSession)
   289  		defer ec2Pool.Close()
   290  	}
   291  
   292  	if *mode == "dev" {
   293  		// Replace linux-amd64 with a config using a -localdev reverse
   294  		// buildlet so it is possible to run local builds by starting a
   295  		// local reverse buildlet.
   296  		dashboard.Builders["linux-amd64"] = &dashboard.BuildConfig{
   297  			Name:     "linux-amd64",
   298  			HostType: "host-linux-amd64-localdev",
   299  		}
   300  		dashboard.Builders["linux-amd64-localdev"] = &dashboard.BuildConfig{
   301  			Name:     "linux-amd64",
   302  			HostType: "host-linux-amd64-localdev",
   303  		}
   304  	}
   305  
   306  	go pool.CoordinatorProcess().UpdateInstanceRecord()
   307  
   308  	switch *mode {
   309  	case "dev", "prod":
   310  		log.Printf("Running in %s mode", *mode)
   311  	default:
   312  		log.Fatalf("Unknown mode: %q", *mode)
   313  	}
   314  
   315  	mux := http.NewServeMux()
   316  
   317  	if *mode == "dev" {
   318  		// Serve a mock TryBot Status page at /try-dev.
   319  		initTryDev(mux)
   320  	}
   321  
   322  	addHealthCheckers(context.Background(), mux, sc)
   323  
   324  	gr, err := metrics.GKEResource("coordinator-deployment")
   325  	if err != nil && metadata.OnGCE() {
   326  		log.Println("metrics.GKEResource:", err)
   327  	}
   328  	if ms, err := metrics.NewService(gr, views); err != nil {
   329  		log.Println("failed to initialize metrics:", err)
   330  	} else {
   331  		mux.Handle("/metrics", ms)
   332  		defer ms.Stop()
   333  	}
   334  
   335  	dialOpts := []grpc.DialOption{
   336  		grpc.WithBlock(),
   337  		grpc.WithTimeout(10 * time.Second),
   338  		grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{NextProtos: []string{"h2"}})),
   339  	}
   340  	mServer := "maintner.golang.org:443"
   341  	cc, err := grpc.Dial(mServer, dialOpts...)
   342  	if err != nil {
   343  		log.Fatalf("unable to grpc.Dial(%q) = _, %s", mServer, err)
   344  	}
   345  	maintnerClient = apipb.NewMaintnerServiceClient(cc)
   346  
   347  	sshCA := mustRetrieveSSHCertificateAuthority()
   348  
   349  	var gomoteBucket string
   350  	var opts []grpc.ServerOption
   351  	if *buildEnvName == "" && *mode != "dev" && metadata.OnGCE() {
   352  		projectID, err := metadata.ProjectID()
   353  		if err != nil {
   354  			log.Fatalf("metadata.ProjectID() = %v", err)
   355  		}
   356  		env := buildenv.ByProjectID(projectID)
   357  		gomoteBucket = env.GomoteTransferBucket
   358  		var coordinatorBackend, serviceID = "coordinator-internal-iap", ""
   359  		if serviceID = env.IAPServiceID(coordinatorBackend); serviceID == "" {
   360  			log.Fatalf("unable to retrieve Service ID for backend service=%q", coordinatorBackend)
   361  		}
   362  		opts = append(opts, grpc.UnaryInterceptor(access.RequireIAPAuthUnaryInterceptor(access.IAPSkipAudienceValidation)))
   363  		opts = append(opts, grpc.StreamInterceptor(access.RequireIAPAuthStreamInterceptor(access.IAPSkipAudienceValidation)))
   364  	}
   365  	// grpcServer is a shared gRPC server. It is global, as it needs to be used in places that aren't factored otherwise.
   366  	grpcServer := grpc.NewServer(opts...)
   367  
   368  	var luciHTTPClient *http.Client
   369  	switch *mode {
   370  	case "prod":
   371  		var err error
   372  		luciHTTPClient, err = auth.NewAuthenticator(context.Background(), auth.SilentLogin, auth.Options{GCEAllowAsDefault: true}).Client()
   373  		if err != nil {
   374  			log.Fatalln("luci/auth.NewAuthenticator:", err)
   375  		}
   376  	case "dev":
   377  		var err error
   378  		luciHTTPClient, err = auth.NewAuthenticator(context.Background(), auth.SilentLogin, chromeinfra.DefaultAuthOptions()).Client()
   379  		if err != nil {
   380  			log.Fatalln("luci/auth.NewAuthenticator:", err)
   381  		}
   382  	}
   383  	buildersCl := buildbucketpb.NewBuildersClient(&prpc.Client{
   384  		C:    luciHTTPClient,
   385  		Host: "cr-buildbucket.appspot.com",
   386  	})
   387  	buildsCl := buildbucketpb.NewBuildsClient(&prpc.Client{
   388  		C:    luciHTTPClient,
   389  		Host: "cr-buildbucket.appspot.com",
   390  	})
   391  	luciPoll := lucipoll.NewService(maintnerClient, buildersCl, buildsCl)
   392  	dashV1 := legacydash.Handler(gce.GoDSClient(), maintnerClient, luciPoll, string(masterKey()), grpcServer)
   393  	dashV2 := &builddash.Handler{Datastore: gce.GoDSClient(), Maintner: maintnerClient, LUCI: luciPoll}
   394  	gs := &gRPCServer{dashboardURL: "https://build.golang.org"}
   395  	setSessionPool(sp)
   396  	gomoteServer := gomote.New(sp, sched, sshCA, gomoteBucket, mustStorageClient())
   397  	protos.RegisterCoordinatorServer(grpcServer, gs)
   398  	gomoteprotos.RegisterGomoteServiceServer(grpcServer, gomoteServer)
   399  	mux.HandleFunc("/", grpcHandlerFunc(grpcServer, handleStatus)) // Serve a status page at farmer.golang.org.
   400  	mux.Handle("build.golang.org/", dashV1)                        // Serve a build dashboard at build.golang.org.
   401  	mux.Handle("build-staging.golang.org/", dashV1)
   402  	mux.HandleFunc("/builders", handleBuilders)
   403  	mux.HandleFunc("/temporarylogs", handleLogs)
   404  	mux.HandleFunc("/reverse", pool.HandleReverse)
   405  	mux.Handle("/revdial", revdial.ConnHandler())
   406  	mux.HandleFunc("/style.css", handleStyleCSS)
   407  	mux.HandleFunc("/try", serveTryStatus(false))
   408  	mux.HandleFunc("/try.json", serveTryStatus(true))
   409  	mux.HandleFunc("/status/post-submit-active.json", handlePostSubmitActiveJSON)
   410  	mux.Handle("/dashboard", dashV2)
   411  	mux.HandleFunc("/queues", handleQueues)
   412  	if *mode == "dev" {
   413  		// TODO(crawshaw): do more in dev mode
   414  		gce.BuildletPool().SetEnabled(*devEnableGCE)
   415  		if *devEnableGCE || *devEnableEC2 {
   416  			go findWorkLoop()
   417  		}
   418  	} else {
   419  		go gce.BuildletPool().CleanUpOldVMs()
   420  
   421  		if gce.InStaging() {
   422  			dashboard.Builders = stagingClusterBuilders()
   423  		}
   424  
   425  		go listenAndServeInternalModuleProxy()
   426  		go findWorkLoop()
   427  		go findTryWorkLoop()
   428  		go reportReverseCountMetrics()
   429  		// TODO(cmang): gccgo will need its own findWorkLoop
   430  	}
   431  
   432  	ctx := context.Background()
   433  	configureSSHServer := func() (*remote.SSHServer, error) {
   434  		privateKey, publicKey, err := retrieveSSHKeys(ctx, sc, *mode)
   435  		if err != nil {
   436  			return nil, fmt.Errorf("unable to retrieve keys for SSH Server: %v", err)
   437  		}
   438  		return remote.NewSSHServer(*sshAddr, privateKey, publicKey, sshCA, sp)
   439  	}
   440  	sshServ, err := configureSSHServer()
   441  	if err != nil {
   442  		log.Printf("unable to configure SSH server: %s", err)
   443  	} else {
   444  		go func() {
   445  			log.Printf("running SSH server on %s", *sshAddr)
   446  			err := sshServ.ListenAndServe()
   447  			log.Printf("SSH server ended with error: %v", err)
   448  		}()
   449  		defer func() {
   450  			err := sshServ.Close()
   451  			if err != nil {
   452  				log.Printf("unable to close SSH server: %s", err)
   453  			}
   454  		}()
   455  	}
   456  	if *mode == "dev" {
   457  		// Use hostPathHandler in local development mode (only) to improve
   458  		// convenience of testing multiple domains that coordinator serves.
   459  		log.Fatalln(https.ListenAndServe(context.Background(), hostPathHandler(mux)))
   460  	}
   461  	log.Fatalln(https.ListenAndServe(context.Background(), mux))
   462  }
   463  
   464  // ignoreAllNewWork, when true, prevents addWork from doing anything.
   465  // It's sometimes set in staging mode when people are debugging
   466  // certain paths.
   467  var ignoreAllNewWork bool
   468  
   469  // addWorkTestHook is optionally set by tests.
   470  var addWorkTestHook func(buildgo.BuilderRev, commitDetail)
   471  
   472  type commitDetail struct {
   473  	// RevCommitTime is always the git committer time of the associated
   474  	// BuilderRev.Rev.
   475  	RevCommitTime time.Time
   476  
   477  	// SubRevCommitTime is always the git committer time of the associated
   478  	// BuilderRev.SubRev, if it exists. Otherwise, it's the zero value.
   479  	SubRevCommitTime time.Time
   480  
   481  	// Branch for BuilderRev.Rev.
   482  	RevBranch string
   483  
   484  	// Branch for BuilderRev.SubRev, if it exists.
   485  	SubRevBranch string
   486  
   487  	// AuthorId is the gerrit-internal ID for the commit author, if
   488  	// available. For sub-repo trybots, this is the author of the
   489  	// commit from the trybot CL.
   490  	AuthorId int64
   491  
   492  	// AuthorEmail is the commit author from Gerrit, if available.
   493  	// For sub-repo trybots, this is the author of the
   494  	// commit from the trybot CL.
   495  	AuthorEmail string
   496  }
   497  
   498  // addWorkDetail adds some work to (maybe) do, if it's not already
   499  // enqueued and the builders are configured to run the given repo. The
   500  // detail argument is optional and used for scheduling. It's currently
   501  // only used for post-submit builds.
   502  func addWorkDetail(work buildgo.BuilderRev, detail commitDetail) {
   503  	if f := addWorkTestHook; f != nil {
   504  		f(work, detail)
   505  		return
   506  	}
   507  	if ignoreAllNewWork || isBuilding(work) {
   508  		return
   509  	}
   510  	if !mayBuildRev(work) {
   511  		if pool.NewGCEConfiguration().InStaging() {
   512  			if _, ok := dashboard.Builders[work.Name]; ok && logCantBuildStaging.Allow() {
   513  				log.Printf("may not build %v; skipping", work)
   514  			}
   515  		}
   516  		return
   517  	}
   518  	st, err := newBuild(work, detail)
   519  	if err != nil {
   520  		log.Printf("Bad build work params %v: %v", work, err)
   521  		return
   522  	}
   523  	st.start()
   524  }
   525  
   526  func stagingClusterBuilders() map[string]*dashboard.BuildConfig {
   527  	m := map[string]*dashboard.BuildConfig{}
   528  	for _, name := range []string{
   529  		"linux-amd64",
   530  		"linux-amd64-sid",
   531  		"linux-amd64-clang",
   532  		"js-wasm-node18",
   533  	} {
   534  		if c, ok := dashboard.Builders[name]; ok {
   535  			m[name] = c
   536  		} else {
   537  			panic(fmt.Sprintf("unknown builder %q", name))
   538  		}
   539  	}
   540  
   541  	// Also permit all the reverse buildlets:
   542  	for name, bc := range dashboard.Builders {
   543  		if bc.IsReverse() {
   544  			m[name] = bc
   545  		}
   546  	}
   547  	return m
   548  }
   549  
   550  func numCurrentBuilds() int {
   551  	statusMu.Lock()
   552  	defer statusMu.Unlock()
   553  	return len(status)
   554  }
   555  
   556  func isBuilding(work buildgo.BuilderRev) bool {
   557  	statusMu.Lock()
   558  	defer statusMu.Unlock()
   559  	_, building := status[work]
   560  	return building
   561  }
   562  
   563  var (
   564  	logUnknownBuilder   = rate.NewLimiter(rate.Every(5*time.Second), 2)
   565  	logCantBuildStaging = rate.NewLimiter(rate.Every(1*time.Second), 2)
   566  )
   567  
   568  // mayBuildRev reports whether the build type & revision should be started.
   569  // It returns true if it's not already building, and if a reverse buildlet is
   570  // required, if an appropriate machine is registered.
   571  func mayBuildRev(rev buildgo.BuilderRev) bool {
   572  	if isBuilding(rev) {
   573  		return false
   574  	}
   575  	if rev.SubName != "" {
   576  		// Don't build repos we don't know about,
   577  		// so importPathOfRepo won't panic later.
   578  		if r, ok := repos.ByGerritProject[rev.SubName]; !ok || r.ImportPath == "" || !r.CoordinatorCanBuild {
   579  			return false
   580  		}
   581  	}
   582  	buildConf, ok := dashboard.Builders[rev.Name]
   583  	if !ok {
   584  		if logUnknownBuilder.Allow() {
   585  			log.Printf("unknown builder %q", rev.Name)
   586  		}
   587  		return false
   588  	}
   589  	gceBuildEnv := pool.NewGCEConfiguration().BuildEnv()
   590  	if gceBuildEnv.MaxBuilds > 0 && numCurrentBuilds() >= gceBuildEnv.MaxBuilds {
   591  		return false
   592  	}
   593  	if buildConf.IsReverse() && !pool.ReversePool().CanBuild(buildConf.HostType) {
   594  		return false
   595  	}
   596  	return true
   597  }
   598  
   599  func setStatus(work buildgo.BuilderRev, st *buildStatus) {
   600  	statusMu.Lock()
   601  	defer statusMu.Unlock()
   602  	// TODO: panic if status[work] already exists. audit all callers.
   603  	// For instance, what if a trybot is running, and then the CL is merged
   604  	// and the findWork goroutine picks it up and it has the same commit,
   605  	// because it didn't need to be rebased in Gerrit's cherrypick?
   606  	// Could we then have two running with the same key?
   607  	status[work] = st
   608  }
   609  
   610  func markDone(work buildgo.BuilderRev) {
   611  	statusMu.Lock()
   612  	defer statusMu.Unlock()
   613  	st, ok := status[work]
   614  	if !ok {
   615  		return
   616  	}
   617  	delete(status, work)
   618  	if len(statusDone) == maxStatusDone {
   619  		copy(statusDone, statusDone[1:])
   620  		statusDone = statusDone[:len(statusDone)-1]
   621  	}
   622  	statusDone = append(statusDone, st)
   623  }
   624  
   625  // statusPtrStr disambiguates which status to return if there are
   626  // multiple in the history (e.g. recent failures where the build
   627  // didn't finish for reasons outside of all.bash failing)
   628  func getStatus(work buildgo.BuilderRev, statusPtrStr string) *buildStatus {
   629  	statusMu.Lock()
   630  	defer statusMu.Unlock()
   631  	match := func(st *buildStatus) bool {
   632  		return statusPtrStr == "" || fmt.Sprintf("%p", st) == statusPtrStr
   633  	}
   634  	if st, ok := status[work]; ok && match(st) {
   635  		return st
   636  	}
   637  	for _, st := range statusDone {
   638  		if st.BuilderRev == work && match(st) {
   639  			return st
   640  		}
   641  	}
   642  	for k, ts := range tries {
   643  		if k.Commit == work.Rev {
   644  			ts.mu.Lock()
   645  			for _, st := range ts.builds {
   646  				if st.BuilderRev == work && match(st) {
   647  					ts.mu.Unlock()
   648  					return st
   649  				}
   650  			}
   651  			ts.mu.Unlock()
   652  		}
   653  	}
   654  	return nil
   655  }
   656  
   657  // cancelOnePostSubmitBuildWithHostType tries to cancel one
   658  // post-submit (non trybot) build with the provided host type and
   659  // reports whether it did so.
   660  //
   661  // It currently selects the one that's been running the least amount
   662  // of time, but that's not guaranteed.
   663  func cancelOnePostSubmitBuildWithHostType(hostType string) bool {
   664  	statusMu.Lock()
   665  	defer statusMu.Unlock()
   666  	var best *buildStatus
   667  	for _, st := range status {
   668  		if st.isTry() || st.conf.HostType != hostType {
   669  			continue
   670  		}
   671  		if best == nil || st.startTime.After(best.startTime) {
   672  			best = st
   673  		}
   674  	}
   675  	if best != nil {
   676  		go best.cancelBuild()
   677  	}
   678  	return best != nil
   679  }
   680  
   681  type byAge []*buildStatus
   682  
   683  func (s byAge) Len() int           { return len(s) }
   684  func (s byAge) Less(i, j int) bool { return s[i].startTime.Before(s[j].startTime) }
   685  func (s byAge) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
   686  
   687  func serveTryStatus(json bool) http.HandlerFunc {
   688  	return func(w http.ResponseWriter, r *http.Request) {
   689  		ts := trySetOfCommitPrefix(r.FormValue("commit"))
   690  		var tss trySetState
   691  		if ts != nil {
   692  			ts.mu.Lock()
   693  			tss = ts.trySetState.clone()
   694  			ts.mu.Unlock()
   695  		}
   696  		if json {
   697  			serveTryStatusJSON(w, r, ts, tss)
   698  			return
   699  		}
   700  		serveTryStatusHTML(w, ts, tss)
   701  	}
   702  }
   703  
   704  // tss is a clone that does not require ts' lock.
   705  func serveTryStatusJSON(w http.ResponseWriter, r *http.Request, ts *trySet, tss trySetState) {
   706  	w.Header().Set("Access-Control-Allow-Origin", "*")
   707  	if r.Method == "OPTIONS" {
   708  		// This is likely a pre-flight CORS request.
   709  		return
   710  	}
   711  	var resp struct {
   712  		Success bool        `json:"success"`
   713  		Error   string      `json:"error,omitempty"`
   714  		Payload interface{} `json:"payload,omitempty"`
   715  	}
   716  	if ts == nil {
   717  		var buf bytes.Buffer
   718  		resp.Error = "TryBot result not found (already done, invalid, or not yet discovered from Gerrit). Check Gerrit for results."
   719  		if err := json.NewEncoder(&buf).Encode(resp); err != nil {
   720  			http.Error(w, err.Error(), http.StatusInternalServerError)
   721  			return
   722  		}
   723  		w.Header().Set("Content-Type", "application/json")
   724  		w.WriteHeader(http.StatusNotFound)
   725  		w.Write(buf.Bytes())
   726  		return
   727  	}
   728  	type litebuild struct {
   729  		Name      string    `json:"name"`
   730  		StartTime time.Time `json:"startTime"`
   731  		Done      bool      `json:"done"`
   732  		Succeeded bool      `json:"succeeded"`
   733  	}
   734  	var result struct {
   735  		ChangeID string      `json:"changeId"`
   736  		Commit   string      `json:"commit"`
   737  		Builds   []litebuild `json:"builds"`
   738  	}
   739  	result.Commit = ts.Commit
   740  	result.ChangeID = ts.ChangeID
   741  
   742  	for _, bs := range tss.builds {
   743  		var lb litebuild
   744  		bs.mu.Lock()
   745  		lb.Name = bs.Name
   746  		lb.StartTime = bs.startTime
   747  		if !bs.done.IsZero() {
   748  			lb.Done = true
   749  			lb.Succeeded = bs.succeeded
   750  		}
   751  		bs.mu.Unlock()
   752  		result.Builds = append(result.Builds, lb)
   753  	}
   754  	resp.Success = true
   755  	resp.Payload = result
   756  	var buf bytes.Buffer
   757  	if err := json.NewEncoder(&buf).Encode(resp); err != nil {
   758  		log.Printf("Could not encode JSON response: %v", err)
   759  		http.Error(w, "error encoding JSON", http.StatusInternalServerError)
   760  		return
   761  	}
   762  	w.Header().Set("Content-Type", "application/json")
   763  	w.Write(buf.Bytes())
   764  }
   765  
   766  // Styles unique to the trybot status page.
   767  const tryStatusCSS = `
   768  <style>
   769  p {
   770  	line-height: 1.15em;
   771  }
   772  
   773  table {
   774  	font-size: 11pt;
   775  }
   776  
   777  .nobr {
   778  	white-space: nowrap;
   779  }
   780  
   781  </style>
   782  `
   783  
   784  // tss is a clone that does not require ts' lock.
   785  func serveTryStatusHTML(w http.ResponseWriter, ts *trySet, tss trySetState) {
   786  	if ts == nil {
   787  		http.Error(w, "TryBot result not found (already done, invalid, or not yet discovered from Gerrit). Check Gerrit for results.", http.StatusNotFound)
   788  		return
   789  	}
   790  	buf := new(bytes.Buffer)
   791  	w.Header().Set("Content-Type", "text/html; charset=utf-8")
   792  	buf.WriteString("<!DOCTYPE html><head><title>trybot status</title>")
   793  	buf.WriteString(`<link rel="stylesheet" href="/style.css"/>`)
   794  	buf.WriteString(tryStatusCSS)
   795  	buf.WriteString("</head><body>")
   796  	fmt.Fprintf(buf, "[<a href='/'>homepage</a>] &gt; %s\n", ts.ChangeID)
   797  	fmt.Fprintf(buf, "<h1>Trybot Status</h1>")
   798  	fmt.Fprintf(buf, "<p>Change-ID: <a href='https://go-review.googlesource.com/#/q/%s'>%s</a><br />\n", ts.ChangeID, ts.ChangeID)
   799  	fmt.Fprintf(buf, "Commit: <a href='https://go-review.googlesource.com/#/q/%s'>%s</a></p>\n", ts.Commit, ts.Commit)
   800  	fmt.Fprintf(buf, "<p>Builds remaining: %d</p>\n", tss.remain)
   801  	fmt.Fprintf(buf, "<h4>Builds</h4>\n")
   802  	fmt.Fprintf(buf, "<table cellpadding=5 border=0>\n")
   803  	for _, bs := range tss.builds {
   804  		var status string
   805  		bs.mu.Lock()
   806  		if !bs.done.IsZero() {
   807  			if bs.succeeded {
   808  				status = "pass"
   809  			} else {
   810  				status = "<b>FAIL</b>"
   811  			}
   812  		} else {
   813  			status = fmt.Sprintf("<i>running</i> %s", time.Since(bs.startTime).Round(time.Second))
   814  		}
   815  		if u := bs.logURL; u != "" {
   816  			status = fmt.Sprintf(`<a href="%s">%s</a>`, html.EscapeString(u), status)
   817  		}
   818  		bs.mu.Unlock()
   819  		fmt.Fprintf(buf, "<tr><td class=\"nobr\">&#8226; %s</td><td>%s</td></tr>\n",
   820  			html.EscapeString(bs.NameAndBranch()), status)
   821  	}
   822  	fmt.Fprintf(buf, "</table>\n")
   823  	fmt.Fprintf(buf, "<h4>Full Detail</h4><table cellpadding=5 border=1>\n")
   824  	for _, bs := range tss.builds {
   825  		status := "<i>(running)</i>"
   826  		bs.mu.Lock()
   827  		if !bs.done.IsZero() {
   828  			if bs.succeeded {
   829  				status = "pass"
   830  			} else {
   831  				status = "<b>FAIL</b>"
   832  			}
   833  		}
   834  		bs.mu.Unlock()
   835  		fmt.Fprintf(buf, "<tr valign=top><td align=left>%s</td><td align=center>%s</td><td><pre>%s</pre></td></tr>\n",
   836  			html.EscapeString(bs.NameAndBranch()),
   837  			status,
   838  			bs.HTMLStatusTruncated())
   839  	}
   840  	fmt.Fprintf(buf, "</table>")
   841  	w.Write(buf.Bytes())
   842  }
   843  
   844  func trySetOfCommitPrefix(commitPrefix string) *trySet {
   845  	if commitPrefix == "" {
   846  		return nil
   847  	}
   848  	statusMu.Lock()
   849  	defer statusMu.Unlock()
   850  	for k, ts := range tries {
   851  		if strings.HasPrefix(k.Commit, commitPrefix) {
   852  			return ts
   853  		}
   854  	}
   855  	return nil
   856  }
   857  
   858  func handleLogs(w http.ResponseWriter, r *http.Request) {
   859  	br := buildgo.BuilderRev{
   860  		Name:    r.FormValue("name"),
   861  		Rev:     r.FormValue("rev"),
   862  		SubName: r.FormValue("subName"), // may be empty
   863  		SubRev:  r.FormValue("subRev"),  // may be empty
   864  	}
   865  	st := getStatus(br, r.FormValue("st"))
   866  	if st == nil {
   867  		http.NotFound(w, r)
   868  		return
   869  	}
   870  	w.Header().Set("Content-Type", "text/plain; charset=utf-8")
   871  	w.Header().Set("X-Content-Type-Options", "nosniff")
   872  	writeStatusHeader(w, st)
   873  
   874  	nostream := r.FormValue("nostream") != ""
   875  	if nostream || !st.isRunning() {
   876  		if nostream {
   877  			fmt.Fprintf(w, "\n\n(live streaming disabled; reload manually to see status)\n")
   878  		}
   879  		w.Write(st.output.Bytes())
   880  		return
   881  	}
   882  
   883  	if !st.hasEvent("make_and_test") && !st.hasEvent("make_cross_compile_kube") {
   884  		fmt.Fprintf(w, "\n\n(buildlet still starting; no live streaming. reload manually to see status)\n")
   885  		return
   886  	}
   887  
   888  	w.(http.Flusher).Flush()
   889  
   890  	output := st.output.Reader()
   891  	go func() {
   892  		<-r.Context().Done()
   893  		output.Close()
   894  	}()
   895  	buf := make([]byte, 65536)
   896  	for {
   897  		n, err := output.Read(buf)
   898  		if _, err2 := w.Write(buf[:n]); err2 != nil {
   899  			return
   900  		}
   901  		w.(http.Flusher).Flush()
   902  		if err != nil {
   903  			break
   904  		}
   905  	}
   906  }
   907  
   908  func writeStatusHeader(w http.ResponseWriter, st *buildStatus) {
   909  	st.mu.Lock()
   910  	defer st.mu.Unlock()
   911  	fmt.Fprintf(w, "  builder: %s\n", st.Name)
   912  	fmt.Fprintf(w, "      rev: %s\n", st.Rev)
   913  	workaroundFlush(w)
   914  	fmt.Fprintf(w, " buildlet: %s\n", st.bc)
   915  	fmt.Fprintf(w, "  started: %v\n", st.startTime)
   916  	done := !st.done.IsZero()
   917  	if done {
   918  		fmt.Fprintf(w, "    ended: %v\n", st.done)
   919  		fmt.Fprintf(w, "  success: %v\n", st.succeeded)
   920  	} else {
   921  		fmt.Fprintf(w, "   status: still running\n")
   922  	}
   923  	if len(st.events) > 0 {
   924  		io.WriteString(w, "\nEvents:\n")
   925  		st.writeEventsLocked(w, false, 0)
   926  	}
   927  	io.WriteString(w, "\nBuild log:\n")
   928  	workaroundFlush(w)
   929  }
   930  
   931  // workaroundFlush is an unnecessary flush to work around a bug in Chrome.
   932  // See https://code.google.com/p/chromium/issues/detail?id=2016 for the details.
   933  // In summary: a couple unnecessary chunk flushes bypass the content type
   934  // sniffing which happen (even if unused?), even if you set nosniff as we do
   935  // in func handleLogs.
   936  func workaroundFlush(w http.ResponseWriter) {
   937  	w.(http.Flusher).Flush()
   938  }
   939  
   940  // findWorkLoop polls https://build.golang.org/?mode=json looking for
   941  // new post-submit work for the main dashboard. It does not support
   942  // gccgo. This is separate from trybots, which populates its work from
   943  // findTryWorkLoop.
   944  func findWorkLoop() {
   945  	// TODO: remove this hard-coded 15 second ticker and instead
   946  	// do some new streaming gRPC call to maintnerd to subscribe
   947  	// to new commits.
   948  	ticker := time.NewTicker(15 * time.Second)
   949  	// We wait for the ticker first, before looking for work, to
   950  	// give findTryWork a head start. Because try work is more
   951  	// important and the scheduler can't (yet?) preempt an
   952  	// existing post-submit build to take it over for a trybot, we
   953  	// want to make sure that reverse buildlets get assigned to
   954  	// trybots/slowbots first on start-up.
   955  	for range ticker.C {
   956  		if err := findWork(); err != nil {
   957  			log.Printf("failed to find new work: %v", err)
   958  		}
   959  	}
   960  }
   961  
   962  // findWork polls the https://build.golang.org/ dashboard once to find
   963  // post-submit work to do. It's called in a loop by findWorkLoop.
   964  func findWork() error {
   965  	var bs types.BuildStatus
   966  	if err := dash("GET", "", url.Values{
   967  		"mode":   {"json"},
   968  		"branch": {"mixed"},
   969  	}, nil, &bs); err != nil {
   970  		return err
   971  	}
   972  	knownToDashboard := map[string]bool{} // keys are builder
   973  	for _, b := range bs.Builders {
   974  		knownToDashboard[b] = true
   975  	}
   976  
   977  	var goRevisions []string           // revisions of repo "go", branch "master"
   978  	var goRevisionsTypeParams []string // revisions of repo "go", branch "dev.typeparams" golang.org/issue/46786 and golang.org/issue/46864
   979  	seenSubrepo := make(map[string]bool)
   980  	commitTime := make(map[string]string)   // git rev => "2019-11-20T22:54:54Z" (time.RFC3339 from build.golang.org's JSON)
   981  	commitBranch := make(map[string]string) // git rev => "master"
   982  
   983  	add := func(br buildgo.BuilderRev) {
   984  		var d commitDetail
   985  		var err error
   986  		if revCommitTime := commitTime[br.Rev]; revCommitTime != "" {
   987  			d.RevCommitTime, err = time.Parse(time.RFC3339, revCommitTime)
   988  			if err != nil {
   989  				// Log the error, but ignore it. We can tolerate the lack of a commit time.
   990  				log.Printf("failure parsing commit time %q for %q: %v", revCommitTime, br.Rev, err)
   991  			}
   992  		}
   993  		d.RevBranch = commitBranch[br.Rev]
   994  		if br.SubRev != "" {
   995  			if subRevCommitTime := commitTime[br.SubRev]; subRevCommitTime != "" {
   996  				d.SubRevCommitTime, err = time.Parse(time.RFC3339, subRevCommitTime)
   997  				if err != nil {
   998  					// Log the error, but ignore it. We can tolerate the lack of a commit time.
   999  					log.Printf("failure parsing commit time %q for %q: %v", subRevCommitTime, br.SubRev, err)
  1000  				}
  1001  			}
  1002  			d.SubRevBranch = commitBranch[br.SubRev]
  1003  		}
  1004  		addWorkDetail(br, d)
  1005  	}
  1006  
  1007  	for _, br := range bs.Revisions {
  1008  		if r, ok := repos.ByGerritProject[br.Repo]; !ok || !r.CoordinatorCanBuild {
  1009  			continue
  1010  		}
  1011  		if br.Repo == "grpc-review" {
  1012  			// Skip the grpc repo. It's only for reviews
  1013  			// for now (using LetsUseGerrit).
  1014  			continue
  1015  		}
  1016  		commitTime[br.Revision] = br.Date
  1017  		commitBranch[br.Revision] = br.Branch
  1018  		awaitSnapshot := false
  1019  		if br.Repo == "go" {
  1020  			if br.Branch == "master" {
  1021  				goRevisions = append(goRevisions, br.Revision)
  1022  			} else if br.Branch == "dev.typeparams" {
  1023  				goRevisionsTypeParams = append(goRevisionsTypeParams, br.Revision)
  1024  			}
  1025  		} else {
  1026  			// If this is the first time we've seen this sub-repo
  1027  			// in this loop, then br.GoRevision is the go repo
  1028  			// HEAD.  To save resources, we only build subrepos
  1029  			// against HEAD once we have a snapshot.
  1030  			// The next time we see this sub-repo in this loop, the
  1031  			// GoRevision is one of the release branches, for which
  1032  			// we may not have a snapshot (if the release was made
  1033  			// a long time before this builder came up), so skip
  1034  			// the snapshot check.
  1035  			awaitSnapshot = !seenSubrepo[br.Repo]
  1036  			seenSubrepo[br.Repo] = true
  1037  		}
  1038  
  1039  		if len(br.Results) != len(bs.Builders) {
  1040  			return errors.New("bogus JSON response from dashboard: results is too long.")
  1041  		}
  1042  		for i, res := range br.Results {
  1043  			if res != "" {
  1044  				// It's either "ok" or a failure URL.
  1045  				continue
  1046  			}
  1047  			builder := bs.Builders[i]
  1048  			builderInfo, ok := dashboard.Builders[builder]
  1049  			if !ok {
  1050  				// Not managed by the coordinator.
  1051  				continue
  1052  			}
  1053  			if !builderInfo.BuildsRepoPostSubmit(br.Repo, br.Branch, br.GoBranch) {
  1054  				continue
  1055  			}
  1056  			var rev buildgo.BuilderRev
  1057  			if br.Repo == "go" {
  1058  				rev = buildgo.BuilderRev{
  1059  					Name: builder,
  1060  					Rev:  br.Revision,
  1061  				}
  1062  			} else {
  1063  				rev = buildgo.BuilderRev{
  1064  					Name:    builder,
  1065  					Rev:     br.GoRevision,
  1066  					SubName: br.Repo,
  1067  					SubRev:  br.Revision,
  1068  				}
  1069  				if awaitSnapshot &&
  1070  					// If this is a builder that snapshots after
  1071  					// make.bash but the snapshot doesn't yet exist,
  1072  					// then skip. But some builders on slow networks
  1073  					// don't snapshot, so don't wait for them. They'll
  1074  					// need to run make.bash first for x/ repos tests.
  1075  					!builderInfo.SkipSnapshot && !rev.SnapshotExists(context.TODO(), pool.NewGCEConfiguration().BuildEnv()) {
  1076  					continue
  1077  				}
  1078  			}
  1079  			add(rev)
  1080  		}
  1081  	}
  1082  
  1083  	// And to bootstrap new builders, see if we have any builders
  1084  	// that the dashboard doesn't know about.
  1085  	for b, builderInfo := range dashboard.Builders {
  1086  		if knownToDashboard[b] {
  1087  			// no need to bootstrap.
  1088  			continue
  1089  		}
  1090  		if builderInfo.BuildsRepoPostSubmit("go", "master", "master") {
  1091  			for _, rev := range goRevisions {
  1092  				add(buildgo.BuilderRev{Name: b, Rev: rev})
  1093  			}
  1094  		} else if builderInfo.BuildsRepoPostSubmit("go", "dev.typeparams", "dev.typeparams") {
  1095  			// schedule builds on dev.typeparams branch
  1096  			// golang.org/issue/46786 and golang.org/issue/46864
  1097  			for _, rev := range goRevisionsTypeParams {
  1098  				add(buildgo.BuilderRev{Name: b, Rev: rev})
  1099  			}
  1100  		}
  1101  	}
  1102  	return nil
  1103  }
  1104  
  1105  // findTryWorkLoop is a goroutine which loops periodically and queries
  1106  // Gerrit for TryBot work.
  1107  func findTryWorkLoop() {
  1108  	if pool.NewGCEConfiguration().TryDepsErr() != nil {
  1109  		return
  1110  	}
  1111  	ticker := time.NewTicker(1 * time.Second)
  1112  	for {
  1113  		if err := findTryWork(); err != nil {
  1114  			log.Printf("failed to find trybot work: %v", err)
  1115  		}
  1116  		<-ticker.C
  1117  	}
  1118  }
  1119  
  1120  func findTryWork() error {
  1121  	isStaging := pool.NewGCEConfiguration().InStaging()
  1122  	if isStaging && !stagingTryWork {
  1123  		return nil
  1124  	}
  1125  	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) // should be milliseconds
  1126  	defer cancel()
  1127  	tryRes, err := maintnerClient.GoFindTryWork(ctx, &apipb.GoFindTryWorkRequest{ForStaging: isStaging})
  1128  	if err != nil {
  1129  		return err
  1130  	}
  1131  
  1132  	now := time.Now()
  1133  
  1134  	statusMu.Lock()
  1135  	defer statusMu.Unlock()
  1136  
  1137  	tryList = tryList[:0]
  1138  	for _, work := range tryRes.Waiting {
  1139  		if work.ChangeId == "" || work.Commit == "" {
  1140  			log.Printf("Warning: skipping incomplete %#v", work)
  1141  			continue
  1142  		}
  1143  		if r, ok := repos.ByGerritProject[work.Project]; !ok || !r.CoordinatorCanBuild {
  1144  			continue
  1145  		}
  1146  		key := tryWorkItemKey(work)
  1147  		tryList = append(tryList, key)
  1148  		if ts, ok := tries[key]; ok {
  1149  			// already in progress
  1150  			ts.wantedAsOf = now
  1151  			continue
  1152  		} else {
  1153  			ts := newTrySet(work)
  1154  			ts.wantedAsOf = now
  1155  			tries[key] = ts
  1156  		}
  1157  	}
  1158  	for k, ts := range tries {
  1159  		if ts.wantedAsOf != now {
  1160  			delete(tries, k)
  1161  			go ts.cancelBuilds()
  1162  		}
  1163  	}
  1164  	return nil
  1165  }
  1166  
  1167  type tryKey struct {
  1168  	Project  string // "go", "net", etc
  1169  	Branch   string // master
  1170  	ChangeID string // I1a27695838409259d1586a0adfa9f92bccf7ceba
  1171  	Commit   string // ecf3dffc81dc21408fb02159af352651882a8383
  1172  }
  1173  
  1174  // ChangeTriple returns the Gerrit (project, branch, change-ID) triple
  1175  // uniquely identifying this change. Several Gerrit APIs require this
  1176  // form of if there are multiple changes with the same Change-ID.
  1177  func (k *tryKey) ChangeTriple() string {
  1178  	return fmt.Sprintf("%s~%s~%s", k.Project, k.Branch, k.ChangeID)
  1179  }
  1180  
  1181  // trySet is a the state of a set of builds of different
  1182  // configurations, all for the same (Change-ID, Commit) pair.  The
  1183  // sets which are still wanted (not already submitted or canceled) are
  1184  // stored in the global 'tries' map.
  1185  type trySet struct {
  1186  	// immutable
  1187  	tryKey
  1188  	tryID    string                   // "T" + 9 random hex
  1189  	slowBots []*dashboard.BuildConfig // any opt-in slower builders to run in a trybot run
  1190  	xrepos   []*buildStatus           // any opt-in x/ repo builds to run in a trybot run
  1191  
  1192  	// wantedAsOf is guarded by statusMu and is used by
  1193  	// findTryWork. It records the last time this tryKey was still
  1194  	// wanted.
  1195  	wantedAsOf time.Time
  1196  
  1197  	// mu guards the following fields.
  1198  	// See LOCK ORDER comment above.
  1199  	mu       sync.Mutex
  1200  	canceled bool // try run is no longer wanted and its builds were canceled
  1201  	trySetState
  1202  	errMsg bytes.Buffer
  1203  }
  1204  
  1205  type trySetState struct {
  1206  	remain int
  1207  	failed []string // builder names, with optional " ($branch)" suffix
  1208  	builds []*buildStatus
  1209  }
  1210  
  1211  func (ts trySetState) clone() trySetState {
  1212  	return trySetState{
  1213  		remain: ts.remain,
  1214  		failed: append([]string(nil), ts.failed...),
  1215  		builds: append([]*buildStatus(nil), ts.builds...),
  1216  	}
  1217  }
  1218  
  1219  func tryWorkItemKey(work *apipb.GerritTryWorkItem) tryKey {
  1220  	return tryKey{
  1221  		Project:  work.Project,
  1222  		Branch:   work.Branch,
  1223  		ChangeID: work.ChangeId,
  1224  		Commit:   work.Commit,
  1225  	}
  1226  }
  1227  
  1228  var testingKnobSkipBuilds bool
  1229  
  1230  // newTrySet creates a new trySet group of builders for a given
  1231  // work item, the (Project, Branch, Change-ID, Commit) tuple.
  1232  // It also starts goroutines for each build.
  1233  //
  1234  // Must hold statusMu.
  1235  func newTrySet(work *apipb.GerritTryWorkItem) *trySet {
  1236  	goBranch := work.Branch
  1237  	var subBranch string // branch of subrepository, empty for main Go repo.
  1238  	if work.Project != "go" && len(work.GoBranch) > 0 {
  1239  		// work.GoBranch is non-empty when work.Project != "go",
  1240  		// so prefer work.GoBranch[0] over work.Branch for goBranch.
  1241  		goBranch = work.GoBranch[0]
  1242  		subBranch = work.Branch
  1243  	}
  1244  	tryBots := dashboard.TryBuildersForProject(work.Project, work.Branch, goBranch)
  1245  	slowBots, invalidSlowBots := slowBotsFromComments(work)
  1246  	builders := joinBuilders(tryBots, slowBots)
  1247  
  1248  	key := tryWorkItemKey(work)
  1249  	log.Printf("Starting new trybot set for %v (ignored invalid terms = %q)", key, invalidSlowBots)
  1250  	ts := &trySet{
  1251  		tryKey: key,
  1252  		tryID:  "T" + randHex(9),
  1253  		trySetState: trySetState{
  1254  			builds: make([]*buildStatus, 0, len(builders)),
  1255  		},
  1256  		slowBots: slowBots,
  1257  	}
  1258  
  1259  	// Defensive check that the input is well-formed.
  1260  	// Each GoCommit should have a GoBranch and a GoVersion.
  1261  	// There should always be at least one GoVersion.
  1262  	if len(work.GoBranch) < len(work.GoCommit) {
  1263  		log.Printf("WARNING: len(GoBranch) of %d != len(GoCommit) of %d", len(work.GoBranch), len(work.GoCommit))
  1264  		work.GoCommit = work.GoCommit[:len(work.GoBranch)]
  1265  	}
  1266  	if len(work.GoVersion) < len(work.GoCommit) {
  1267  		log.Printf("WARNING: len(GoVersion) of %d != len(GoCommit) of %d", len(work.GoVersion), len(work.GoCommit))
  1268  		work.GoCommit = work.GoCommit[:len(work.GoVersion)]
  1269  	}
  1270  	if len(work.GoVersion) == 0 {
  1271  		log.Print("WARNING: len(GoVersion) is zero, want at least one")
  1272  		work.GoVersion = []*apipb.MajorMinor{{}}
  1273  	}
  1274  
  1275  	addBuilderToSet := func(bs *buildStatus, brev buildgo.BuilderRev) {
  1276  		bs.trySet = ts
  1277  		status[brev] = bs
  1278  
  1279  		idx := len(ts.builds)
  1280  		ts.builds = append(ts.builds, bs)
  1281  		ts.remain++
  1282  		if testingKnobSkipBuilds {
  1283  			return
  1284  		}
  1285  		go bs.start() // acquires statusMu itself, so in a goroutine
  1286  		go ts.awaitTryBuild(idx, bs, brev)
  1287  	}
  1288  
  1289  	var mainBuildGoCommit string
  1290  	if key.Project != "go" && len(work.GoCommit) > 0 {
  1291  		// work.GoCommit is non-empty when work.Project != "go".
  1292  		// For the main build, use the first GoCommit, which represents Go tip (master branch).
  1293  		mainBuildGoCommit = work.GoCommit[0]
  1294  	}
  1295  
  1296  	// Start the main TryBot build using the selected builders.
  1297  	// There may be additional builds, those are handled below.
  1298  	if !testingKnobSkipBuilds {
  1299  		go ts.notifyStarting(invalidSlowBots)
  1300  	}
  1301  	for _, bconf := range builders {
  1302  		goVersion := types.MajorMinor{Major: int(work.GoVersion[0].Major), Minor: int(work.GoVersion[0].Minor)}
  1303  		if goVersion.Less(bconf.MinimumGoVersion) {
  1304  			continue
  1305  		}
  1306  		brev := tryKeyToBuilderRev(bconf.Name, key, mainBuildGoCommit)
  1307  		bs, err := newBuild(brev, commitDetail{RevBranch: goBranch, SubRevBranch: subBranch, AuthorEmail: work.AuthorEmail})
  1308  		if err != nil {
  1309  			log.Printf("can't create build for %q: %v", brev, err)
  1310  			continue
  1311  		}
  1312  		addBuilderToSet(bs, brev)
  1313  	}
  1314  
  1315  	// If this is a golang.org/x repo and there's more than one GoCommit,
  1316  	// that means we're testing against prior releases of Go too.
  1317  	// The version selection logic is currently in maintapi's GoFindTryWork implementation.
  1318  	if key.Project != "go" && len(work.GoCommit) >= 2 {
  1319  		// linuxBuilder is the standard builder for this purpose.
  1320  		linuxBuilder := dashboard.Builders["linux-amd64"]
  1321  
  1322  		for i, goRev := range work.GoCommit {
  1323  			if i == 0 {
  1324  				// Skip the i==0 element, which was already handled above.
  1325  				continue
  1326  			}
  1327  			branch := work.GoBranch[i]
  1328  			if !linuxBuilder.BuildsRepoTryBot(key.Project, "master", branch) {
  1329  				continue
  1330  			}
  1331  			goVersion := types.MajorMinor{Major: int(work.GoVersion[i].Major), Minor: int(work.GoVersion[i].Minor)}
  1332  			if goVersion.Less(linuxBuilder.MinimumGoVersion) {
  1333  				continue
  1334  			}
  1335  			brev := tryKeyToBuilderRev(linuxBuilder.Name, key, goRev)
  1336  			bs, err := newBuild(brev, commitDetail{RevBranch: branch, SubRevBranch: subBranch, AuthorEmail: work.AuthorEmail})
  1337  			if err != nil {
  1338  				log.Printf("can't create build for %q: %v", brev, err)
  1339  				continue
  1340  			}
  1341  			addBuilderToSet(bs, brev)
  1342  		}
  1343  	}
  1344  
  1345  	// For the Go project on the "master" branch,
  1346  	// use the TRY= syntax to test against x repos.
  1347  	if branch := key.Branch; key.Project == "go" && branch == "master" {
  1348  		// customBuilder optionally specifies the builder to use for the build
  1349  		// (empty string means to use the default builder).
  1350  		addXrepo := func(project, customBuilder string) *buildStatus {
  1351  			// linux-amd64 is the default builder as it is the fastest and least
  1352  			// expensive.
  1353  			builder := dashboard.Builders["linux-amd64"]
  1354  			if customBuilder != "" {
  1355  				b, ok := dashboard.Builders[customBuilder]
  1356  				if !ok {
  1357  					log.Printf("can't resolve requested builder %q", customBuilder)
  1358  					return nil
  1359  				}
  1360  				builder = b
  1361  			}
  1362  
  1363  			if testingKnobSkipBuilds {
  1364  				return nil
  1365  			}
  1366  			if !builder.BuildsRepoPostSubmit(project, branch, branch) {
  1367  				log.Printf("builder %q isn't configured to build %q@%q", builder.Name, project, branch)
  1368  				return nil
  1369  			}
  1370  			rev, err := getRepoHead(project)
  1371  			if err != nil {
  1372  				log.Printf("can't determine repo head for %q: %v", project, err)
  1373  				return nil
  1374  			}
  1375  			brev := buildgo.BuilderRev{
  1376  				Name:    builder.Name,
  1377  				Rev:     work.Commit,
  1378  				SubName: project,
  1379  				SubRev:  rev,
  1380  			}
  1381  			// getRepoHead always fetches master, so use that as the SubRevBranch.
  1382  			bs, err := newBuild(brev, commitDetail{RevBranch: branch, SubRevBranch: "master", AuthorEmail: work.AuthorEmail})
  1383  			if err != nil {
  1384  				log.Printf("can't create x/%s trybot build for go/master commit %s: %v", project, rev, err)
  1385  				return nil
  1386  			}
  1387  			addBuilderToSet(bs, brev)
  1388  			return bs
  1389  		}
  1390  
  1391  		// First, add the opt-in x repos.
  1392  		repoBuilders := xReposFromComments(work)
  1393  		for rb := range repoBuilders {
  1394  			if bs := addXrepo(rb.Project, rb.Builder); bs != nil {
  1395  				ts.xrepos = append(ts.xrepos, bs)
  1396  			}
  1397  		}
  1398  
  1399  		// Always include the default x/tools builder. See golang.org/issue/34348.
  1400  		// Do not add it to the trySet's list of opt-in x repos, however.
  1401  		if haveDefaultToolsBuild := repoBuilders[xRepoAndBuilder{Project: "tools"}]; !haveDefaultToolsBuild {
  1402  			addXrepo("tools", "")
  1403  		}
  1404  	}
  1405  
  1406  	return ts
  1407  }
  1408  
  1409  // Note: called in some paths where statusMu is held; do not make RPCs.
  1410  func tryKeyToBuilderRev(builder string, key tryKey, goRev string) buildgo.BuilderRev {
  1411  	// This function is called from within newTrySet, holding statusMu, s
  1412  	if key.Project == "go" {
  1413  		return buildgo.BuilderRev{
  1414  			Name: builder,
  1415  			Rev:  key.Commit,
  1416  		}
  1417  	}
  1418  	return buildgo.BuilderRev{
  1419  		Name:    builder,
  1420  		Rev:     goRev,
  1421  		SubName: key.Project,
  1422  		SubRev:  key.Commit,
  1423  	}
  1424  }
  1425  
  1426  // joinBuilders joins sets of builders into one set.
  1427  // The resulting set contains unique builders sorted by name.
  1428  func joinBuilders(sets ...[]*dashboard.BuildConfig) []*dashboard.BuildConfig {
  1429  	byName := make(map[string]*dashboard.BuildConfig)
  1430  	for _, set := range sets {
  1431  		for _, bc := range set {
  1432  			byName[bc.Name] = bc
  1433  		}
  1434  	}
  1435  	var all []*dashboard.BuildConfig
  1436  	for _, bc := range byName {
  1437  		all = append(all, bc)
  1438  	}
  1439  	sort.Slice(all, func(i, j int) bool { return all[i].Name < all[j].Name })
  1440  	return all
  1441  }
  1442  
  1443  // state returns a copy of the trySet's state.
  1444  func (ts *trySet) state() trySetState {
  1445  	ts.mu.Lock()
  1446  	defer ts.mu.Unlock()
  1447  	return ts.trySetState.clone()
  1448  }
  1449  
  1450  // tryBotsTag returns a Gerrit tag for the TryBots state s. See Issue 39828 and
  1451  // https://gerrit-review.googlesource.com/Documentation/rest-api-changes.html#review-input.
  1452  func tryBotsTag(s string) string {
  1453  	return "autogenerated:trybots~" + s
  1454  }
  1455  
  1456  func isTryBotsTag(s string) bool {
  1457  	return strings.HasPrefix(s, "autogenerated:trybots~")
  1458  }
  1459  
  1460  // A commentThread is a thread of Gerrit comments.
  1461  type commentThread struct {
  1462  	// root is the first comment in the thread.
  1463  	root gerrit.CommentInfo
  1464  	// thread is a list of all the comments in the thread, including the root,
  1465  	// sorted chronologically.
  1466  	thread []gerrit.CommentInfo
  1467  	// unresolved is the thread unresolved state, based on the last comment.
  1468  	unresolved bool
  1469  }
  1470  
  1471  // listPatchSetThreads returns a list of PATCHSET_LEVEL comment threads, sorted
  1472  // by the time at which they were started.
  1473  func listPatchSetThreads(gerritClient *gerrit.Client, changeID string) ([]*commentThread, error) {
  1474  	comments, err := gerritClient.ListChangeComments(context.Background(), changeID)
  1475  	if err != nil {
  1476  		return nil, err
  1477  	}
  1478  	patchSetComments := comments["/PATCHSET_LEVEL"]
  1479  	if len(patchSetComments) == 0 {
  1480  		return nil, nil
  1481  	}
  1482  
  1483  	// The API doesn't sort comments chronologically, but "the state of
  1484  	// resolution of a comment thread is stored in the last comment in that
  1485  	// thread chronologically", so first of all sort them by time.
  1486  	sort.Slice(patchSetComments, func(i, j int) bool {
  1487  		return patchSetComments[i].Updated.Time().Before(patchSetComments[j].Updated.Time())
  1488  	})
  1489  
  1490  	// roots is a map of message IDs to their thread root.
  1491  	roots := make(map[string]string)
  1492  	threads := make(map[string]*commentThread)
  1493  	var result []*commentThread
  1494  	for _, c := range patchSetComments {
  1495  		if c.InReplyTo == "" {
  1496  			roots[c.ID] = c.ID
  1497  			threads[c.ID] = &commentThread{
  1498  				root:       c,
  1499  				thread:     []gerrit.CommentInfo{c},
  1500  				unresolved: *c.Unresolved,
  1501  			}
  1502  			if c.Unresolved != nil {
  1503  				threads[c.ID].unresolved = *c.Unresolved
  1504  			}
  1505  			result = append(result, threads[c.ID])
  1506  			continue
  1507  		}
  1508  
  1509  		root, ok := roots[c.InReplyTo]
  1510  		if !ok {
  1511  			return nil, fmt.Errorf("%s has no parent", c.ID)
  1512  		}
  1513  		roots[c.ID] = root
  1514  		threads[root].thread = append(threads[root].thread, c)
  1515  		if c.Unresolved != nil {
  1516  			threads[root].unresolved = *c.Unresolved
  1517  		}
  1518  	}
  1519  
  1520  	return result, nil
  1521  }
  1522  
  1523  func (ts *trySet) statusPage() string {
  1524  	return "https://farmer.golang.org/try?commit=" + ts.Commit[:8]
  1525  }
  1526  
  1527  // notifyStarting runs in its own goroutine and posts to Gerrit that
  1528  // the trybots have started on the user's CL with a link of where to watch.
  1529  func (ts *trySet) notifyStarting(invalidSlowBots []string) {
  1530  	name := "TryBots"
  1531  	if len(ts.slowBots) > 0 {
  1532  		name = "SlowBots"
  1533  	}
  1534  	msg := name + " beginning. Status page: " + ts.statusPage() + "\n"
  1535  
  1536  	if len(invalidSlowBots) > 0 {
  1537  		msg += fmt.Sprintf("Note that the following SlowBot terms didn't match any existing builder name or slowbot alias: %s.\n", strings.Join(invalidSlowBots, ", "))
  1538  	}
  1539  
  1540  	// If any of the requested SlowBot builders
  1541  	// have a known issue, give users a warning.
  1542  	for _, b := range ts.slowBots {
  1543  		if len(b.KnownIssues) > 0 {
  1544  			issueBlock := new(strings.Builder)
  1545  			fmt.Fprintf(issueBlock, "Note that builder %s has known issues:\n", b.Name)
  1546  			for _, i := range b.KnownIssues {
  1547  				fmt.Fprintf(issueBlock, "\thttps://go.dev/issue/%d\n", i)
  1548  			}
  1549  			msg += issueBlock.String()
  1550  		}
  1551  	}
  1552  
  1553  	unresolved := true
  1554  	ri := gerrit.ReviewInput{
  1555  		Tag: tryBotsTag("beginning"),
  1556  		Comments: map[string][]gerrit.CommentInput{
  1557  			"/PATCHSET_LEVEL": {{Message: msg, Unresolved: &unresolved}},
  1558  		},
  1559  	}
  1560  
  1561  	// Mark as resolved old TryBot threads that don't have human comments on them.
  1562  	gerritClient := pool.NewGCEConfiguration().GerritClient()
  1563  	if patchSetThreads, err := listPatchSetThreads(gerritClient, ts.ChangeTriple()); err == nil {
  1564  		for _, t := range patchSetThreads {
  1565  			if !t.unresolved {
  1566  				continue
  1567  			}
  1568  			hasHumanComments := false
  1569  			for _, c := range t.thread {
  1570  				if !isTryBotsTag(c.Tag) {
  1571  					hasHumanComments = true
  1572  					break
  1573  				}
  1574  			}
  1575  			if hasHumanComments {
  1576  				continue
  1577  			}
  1578  			unresolved := false
  1579  			ri.Comments["/PATCHSET_LEVEL"] = append(ri.Comments["/PATCHSET_LEVEL"], gerrit.CommentInput{
  1580  				InReplyTo:  t.root.ID,
  1581  				Message:    "Superseded.",
  1582  				Unresolved: &unresolved,
  1583  			})
  1584  		}
  1585  	} else {
  1586  		log.Printf("Error getting Gerrit threads on %s: %v", ts.ChangeTriple(), err)
  1587  	}
  1588  
  1589  	if err := gerritClient.SetReview(context.Background(), ts.ChangeTriple(), ts.Commit, ri); err != nil {
  1590  		log.Printf("Error leaving Gerrit comment on %s: %v", ts.Commit[:8], err)
  1591  	}
  1592  }
  1593  
  1594  // awaitTryBuild runs in its own goroutine and waits for a build in a
  1595  // trySet to complete.
  1596  //
  1597  // If the build fails without getting to the end, it sleeps and
  1598  // reschedules it, as long as it's still wanted.
  1599  func (ts *trySet) awaitTryBuild(idx int, bs *buildStatus, brev buildgo.BuilderRev) {
  1600  	for {
  1601  	WaitCh:
  1602  		for {
  1603  			timeout := time.NewTimer(10 * time.Minute)
  1604  			select {
  1605  			case <-bs.ctx.Done():
  1606  				timeout.Stop()
  1607  				break WaitCh
  1608  			case <-timeout.C:
  1609  				if !ts.wanted() {
  1610  					// Build was canceled.
  1611  					return
  1612  				}
  1613  			}
  1614  		}
  1615  
  1616  		if bs.hasEvent(eventDone) || bs.hasEvent(eventSkipBuildMissingDep) {
  1617  			ts.noteBuildComplete(bs)
  1618  			return
  1619  		}
  1620  
  1621  		// TODO(bradfitz): rethink this logic. we should only
  1622  		// start a new build if the old one appears dead or
  1623  		// hung.
  1624  
  1625  		// Sleep a bit and retry.
  1626  		time.Sleep(30 * time.Second)
  1627  		if !ts.wanted() {
  1628  			return
  1629  		}
  1630  		bs, _ = newBuild(brev, bs.commitDetail)
  1631  		bs.trySet = ts
  1632  		go bs.start()
  1633  		ts.mu.Lock()
  1634  		ts.builds[idx] = bs
  1635  		ts.mu.Unlock()
  1636  	}
  1637  }
  1638  
  1639  // wanted reports whether this trySet is still active.
  1640  //
  1641  // If the commit has been submitted, or change abandoned, or the
  1642  // checkbox unchecked, wanted returns false.
  1643  func (ts *trySet) wanted() bool {
  1644  	statusMu.Lock()
  1645  	defer statusMu.Unlock()
  1646  	_, ok := tries[ts.tryKey]
  1647  	return ok
  1648  }
  1649  
  1650  // cancelBuilds run in its own goroutine and cancels this trySet's
  1651  // currently-active builds because they're no longer wanted.
  1652  func (ts *trySet) cancelBuilds() {
  1653  	ts.mu.Lock()
  1654  	defer ts.mu.Unlock()
  1655  
  1656  	// Only cancel the builds once. And note that they're canceled so we
  1657  	// can avoid spamming Gerrit later if they come back as failed.
  1658  	if ts.canceled {
  1659  		return
  1660  	}
  1661  	ts.canceled = true
  1662  
  1663  	for _, bs := range ts.builds {
  1664  		go bs.cancelBuild()
  1665  	}
  1666  }
  1667  
  1668  func (ts *trySet) noteBuildComplete(bs *buildStatus) {
  1669  	bs.mu.Lock()
  1670  	var (
  1671  		succeeded = bs.succeeded
  1672  		buildLog  = bs.output.String()
  1673  	)
  1674  	bs.mu.Unlock()
  1675  
  1676  	ts.mu.Lock()
  1677  	ts.remain--
  1678  	remain := ts.remain
  1679  	if !succeeded {
  1680  		ts.failed = append(ts.failed, bs.NameAndBranch())
  1681  	}
  1682  	numFail := len(ts.failed)
  1683  	canceled := ts.canceled
  1684  	ts.mu.Unlock()
  1685  
  1686  	if canceled {
  1687  		// Be quiet and don't spam Gerrit.
  1688  		return
  1689  	}
  1690  
  1691  	const failureFooter = "Consult https://build.golang.org/ to see whether they are new failures. Keep in mind that TryBots currently test *exactly* your git commit, without rebasing. If your commit's git parent is old, the failure might've already been fixed.\n"
  1692  
  1693  	s1 := sha1.New()
  1694  	io.WriteString(s1, buildLog)
  1695  	objName := fmt.Sprintf("%s/%s_%x.log", bs.Rev[:8], bs.Name, s1.Sum(nil)[:4])
  1696  	wr, logURL := newBuildLogBlob(objName)
  1697  	if _, err := io.WriteString(wr, buildLog); err != nil {
  1698  		log.Printf("Failed to write to GCS: %v", err)
  1699  		return
  1700  	}
  1701  	if err := wr.Close(); err != nil {
  1702  		log.Printf("Failed to write to GCS: %v", err)
  1703  		return
  1704  	}
  1705  
  1706  	bs.mu.Lock()
  1707  	bs.logURL = logURL
  1708  	bs.mu.Unlock()
  1709  
  1710  	if !succeeded {
  1711  		ts.mu.Lock()
  1712  		fmt.Fprintf(&ts.errMsg, "Failed on %s: %s\n", bs.NameAndBranch(), logURL)
  1713  		ts.mu.Unlock()
  1714  	}
  1715  
  1716  	postInProgressMessage := !succeeded && numFail == 1 && remain > 0
  1717  	postFinishedMessage := remain == 0
  1718  
  1719  	if !postInProgressMessage && !postFinishedMessage {
  1720  		return
  1721  	}
  1722  
  1723  	var (
  1724  		gerritMsg   = &strings.Builder{}
  1725  		gerritTag   string
  1726  		gerritScore int
  1727  	)
  1728  
  1729  	if postInProgressMessage {
  1730  		fmt.Fprintf(gerritMsg, "Build is still in progress... "+
  1731  			"Status page: https://farmer.golang.org/try?commit=%s\n"+
  1732  			"Failed on %s: %s\n"+
  1733  			"Other builds still in progress; subsequent failure notices suppressed until final report.\n\n"+
  1734  			failureFooter, ts.Commit[:8], bs.NameAndBranch(), logURL)
  1735  		gerritTag = tryBotsTag("progress")
  1736  	}
  1737  
  1738  	if postFinishedMessage {
  1739  		name := "TryBots"
  1740  		if len(ts.slowBots) > 0 {
  1741  			name = "SlowBots"
  1742  		}
  1743  
  1744  		if numFail == 0 {
  1745  			gerritScore = 1
  1746  			fmt.Fprintf(gerritMsg, "%s are happy.\n", name)
  1747  			gerritTag = tryBotsTag("happy")
  1748  		} else {
  1749  			gerritScore = -1
  1750  			ts.mu.Lock()
  1751  			errMsg := ts.errMsg.String()
  1752  			ts.mu.Unlock()
  1753  			fmt.Fprintf(gerritMsg, "%d of %d %s failed.\n%s\n"+failureFooter,
  1754  				numFail, len(ts.builds), name, errMsg)
  1755  			gerritTag = tryBotsTag("failed")
  1756  		}
  1757  		fmt.Fprintln(gerritMsg)
  1758  		if len(ts.slowBots) > 0 {
  1759  			fmt.Fprintf(gerritMsg, "SlowBot builds that ran:\n")
  1760  			for _, c := range ts.slowBots {
  1761  				fmt.Fprintf(gerritMsg, "* %s\n", c.Name)
  1762  			}
  1763  		}
  1764  		if len(ts.xrepos) > 0 {
  1765  			fmt.Fprintf(gerritMsg, "Also tested the following repos:\n")
  1766  			for _, st := range ts.xrepos {
  1767  				fmt.Fprintf(gerritMsg, "* %s\n", st.NameAndBranch())
  1768  			}
  1769  		}
  1770  	}
  1771  
  1772  	var inReplyTo string
  1773  	gerritClient := pool.NewGCEConfiguration().GerritClient()
  1774  	if patchSetThreads, err := listPatchSetThreads(gerritClient, ts.ChangeTriple()); err == nil {
  1775  		for _, t := range patchSetThreads {
  1776  			if t.root.Tag == tryBotsTag("beginning") && strings.Contains(t.root.Message, ts.statusPage()) {
  1777  				inReplyTo = t.root.ID
  1778  			}
  1779  		}
  1780  	} else {
  1781  		log.Printf("Error getting Gerrit threads on %s: %v", ts.ChangeTriple(), err)
  1782  	}
  1783  
  1784  	// Mark resolved if TryBots are happy.
  1785  	unresolved := gerritScore != 1
  1786  
  1787  	ri := gerrit.ReviewInput{
  1788  		Tag: gerritTag,
  1789  		Comments: map[string][]gerrit.CommentInput{
  1790  			"/PATCHSET_LEVEL": {{
  1791  				InReplyTo:  inReplyTo,
  1792  				Message:    gerritMsg.String(),
  1793  				Unresolved: &unresolved,
  1794  			}},
  1795  		},
  1796  	}
  1797  	if gerritScore != 0 {
  1798  		ri.Labels = map[string]int{
  1799  			"TryBot-Result": gerritScore,
  1800  		}
  1801  	}
  1802  	if err := gerritClient.SetReview(context.Background(), ts.ChangeTriple(), ts.Commit, ri); err != nil {
  1803  		log.Printf("Error leaving Gerrit comment on %s: %v", ts.Commit[:8], err)
  1804  	}
  1805  }
  1806  
  1807  // getBuildlets creates up to n buildlets and sends them on the returned channel
  1808  // before closing the channel.
  1809  func getBuildlets(ctx context.Context, n int, schedTmpl *queue.SchedItem, lg pool.Logger) <-chan buildlet.Client {
  1810  	ch := make(chan buildlet.Client) // NOT buffered
  1811  	var wg sync.WaitGroup
  1812  	wg.Add(n)
  1813  	for i := 0; i < n; i++ {
  1814  		go func(i int) {
  1815  			defer wg.Done()
  1816  			sp := lg.CreateSpan("get_helper", fmt.Sprintf("helper %d/%d", i+1, n))
  1817  			schedItem := *schedTmpl // copy; GetBuildlet takes ownership
  1818  			schedItem.IsHelper = i > 0
  1819  			bc, err := sched.GetBuildlet(ctx, &schedItem)
  1820  			sp.Done(err)
  1821  			if err != nil {
  1822  				if err != context.Canceled {
  1823  					log.Printf("failed to get a %s buildlet: %v", schedItem.HostType, err)
  1824  				}
  1825  				return
  1826  			}
  1827  			lg.LogEventTime("empty_helper_ready", bc.Name())
  1828  			select {
  1829  			case ch <- bc:
  1830  			case <-ctx.Done():
  1831  				lg.LogEventTime("helper_killed_before_use", bc.Name())
  1832  				bc.Close()
  1833  				return
  1834  			}
  1835  		}(i)
  1836  	}
  1837  	go func() {
  1838  		wg.Wait()
  1839  		close(ch)
  1840  	}()
  1841  	return ch
  1842  }
  1843  
  1844  type testSet struct {
  1845  	st        *buildStatus
  1846  	items     []*testItem
  1847  	testStats *buildstats.TestStats
  1848  
  1849  	mu           sync.Mutex
  1850  	inOrder      [][]*testItem
  1851  	biggestFirst [][]*testItem
  1852  }
  1853  
  1854  // cancelAll cancels all pending tests.
  1855  func (s *testSet) cancelAll() {
  1856  	for _, ti := range s.items {
  1857  		ti.tryTake() // ignore return value
  1858  	}
  1859  }
  1860  
  1861  func (s *testSet) testsToRunInOrder() (chunk []*testItem, ok bool) {
  1862  	s.mu.Lock()
  1863  	defer s.mu.Unlock()
  1864  	if s.inOrder == nil {
  1865  		s.initInOrder()
  1866  	}
  1867  	return s.testsFromSlice(s.inOrder)
  1868  }
  1869  
  1870  func (s *testSet) testsToRunBiggestFirst() (chunk []*testItem, ok bool) {
  1871  	s.mu.Lock()
  1872  	defer s.mu.Unlock()
  1873  	if s.biggestFirst == nil {
  1874  		s.initBiggestFirst()
  1875  	}
  1876  	return s.testsFromSlice(s.biggestFirst)
  1877  }
  1878  
  1879  func (s *testSet) testsFromSlice(chunkList [][]*testItem) (chunk []*testItem, ok bool) {
  1880  	for _, candChunk := range chunkList {
  1881  		for _, ti := range candChunk {
  1882  			if ti.tryTake() {
  1883  				chunk = append(chunk, ti)
  1884  			}
  1885  		}
  1886  		if len(chunk) > 0 {
  1887  			return chunk, true
  1888  		}
  1889  	}
  1890  	return nil, false
  1891  }
  1892  
  1893  func (s *testSet) initInOrder() {
  1894  	names := make([]string, len(s.items))
  1895  	namedItem := map[string]*testItem{}
  1896  	for i, ti := range s.items {
  1897  		names[i] = ti.name.Old
  1898  		namedItem[ti.name.Old] = ti
  1899  	}
  1900  
  1901  	// First do the go_test:* ones. partitionGoTests
  1902  	// only returns those, which are the ones we merge together.
  1903  	stdSets := partitionGoTests(s.testStats.Duration, s.st.BuilderRev.Name, names)
  1904  	for _, set := range stdSets {
  1905  		tis := make([]*testItem, len(set))
  1906  		for i, name := range set {
  1907  			tis[i] = namedItem[name]
  1908  		}
  1909  		s.inOrder = append(s.inOrder, tis)
  1910  	}
  1911  
  1912  	// Then do the misc tests, which are always by themselves.
  1913  	// (No benefit to merging them)
  1914  	for _, ti := range s.items {
  1915  		if !strings.HasPrefix(ti.name.Old, "go_test:") {
  1916  			s.inOrder = append(s.inOrder, []*testItem{ti})
  1917  		}
  1918  	}
  1919  }
  1920  
  1921  func partitionGoTests(testDuration func(string, string) time.Duration, builderName string, tests []string) (sets [][]string) {
  1922  	var srcTests []string
  1923  	var cmdTests []string
  1924  	for _, name := range tests {
  1925  		if strings.HasPrefix(name, "go_test:cmd/") {
  1926  			cmdTests = append(cmdTests, name)
  1927  		} else if strings.HasPrefix(name, "go_test:") {
  1928  			srcTests = append(srcTests, name)
  1929  		}
  1930  	}
  1931  	sort.Strings(srcTests)
  1932  	sort.Strings(cmdTests)
  1933  	goTests := append(srcTests, cmdTests...)
  1934  
  1935  	const sizeThres = 10 * time.Second
  1936  
  1937  	var curSet []string
  1938  	var curDur time.Duration
  1939  
  1940  	flush := func() {
  1941  		if len(curSet) > 0 {
  1942  			sets = append(sets, curSet)
  1943  			curSet = nil
  1944  			curDur = 0
  1945  		}
  1946  	}
  1947  	for _, testName := range goTests {
  1948  		d := testDuration(builderName, testName)
  1949  		if curDur+d > sizeThres {
  1950  			flush() // no-op if empty
  1951  		}
  1952  		curSet = append(curSet, testName)
  1953  		curDur += d
  1954  	}
  1955  
  1956  	flush()
  1957  	return
  1958  }
  1959  
  1960  func (s *testSet) initBiggestFirst() {
  1961  	items := append([]*testItem(nil), s.items...)
  1962  	sort.Sort(sort.Reverse(byTestDuration(items)))
  1963  	for _, item := range items {
  1964  		s.biggestFirst = append(s.biggestFirst, []*testItem{item})
  1965  	}
  1966  }
  1967  
  1968  type testItem struct {
  1969  	set      *testSet
  1970  	name     distTestName
  1971  	duration time.Duration // optional approximate size
  1972  
  1973  	take chan token // buffered size 1: sending takes ownership of rest of fields:
  1974  
  1975  	done    chan token // closed when done; guards output & failed
  1976  	numFail int        // how many times it's failed to execute
  1977  
  1978  	// groupSize is the number of tests which were run together
  1979  	// along with this one with "go dist test".
  1980  	// This is 1 for non-std/cmd tests, and usually >1 for std/cmd tests.
  1981  	groupSize   int
  1982  	shardIPPort string // buildlet's IPPort, for debugging
  1983  
  1984  	// the following are only set for the first item in a group:
  1985  	output       []byte
  1986  	remoteErr    error         // real test failure (not a communications failure)
  1987  	execDuration time.Duration // actual time
  1988  }
  1989  
  1990  func (ti *testItem) tryTake() bool {
  1991  	select {
  1992  	case ti.take <- token{}:
  1993  		return true
  1994  	default:
  1995  		return false
  1996  	}
  1997  }
  1998  
  1999  // retry reschedules the test to run again, if a machine died before
  2000  // or during execution, so its results aren't yet known.
  2001  // The caller must own the 'take' semaphore.
  2002  func (ti *testItem) retry() {
  2003  	// release it to make it available for somebody else to try later:
  2004  	<-ti.take
  2005  }
  2006  
  2007  func (ti *testItem) failf(format string, args ...interface{}) {
  2008  	msg := fmt.Sprintf(format, args...)
  2009  	ti.output = []byte(msg)
  2010  	ti.remoteErr = errors.New(msg)
  2011  	close(ti.done)
  2012  }
  2013  
  2014  // distTestName is the name of a dist test as discovered from 'go tool dist test -list'.
  2015  type distTestName struct {
  2016  	Old string // Old is dist test name converted to Go 1.20 format, like "go_test:sort" or "reboot".
  2017  	Raw string // Raw is unmodified name from dist, suitable as an argument back to 'go tool dist test'.
  2018  }
  2019  
  2020  type byTestDuration []*testItem
  2021  
  2022  func (s byTestDuration) Len() int           { return len(s) }
  2023  func (s byTestDuration) Less(i, j int) bool { return s[i].duration < s[j].duration }
  2024  func (s byTestDuration) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
  2025  
  2026  type eventAndTime struct {
  2027  	t    time.Time
  2028  	evt  string // "get_source", "make_and_test", "make", etc
  2029  	text string // optional detail text
  2030  }
  2031  
  2032  var nl = []byte("\n")
  2033  
  2034  // getRepoHead returns the commit hash of the latest master HEAD
  2035  // for the given repo ("go", "tools", "sys", etc).
  2036  func getRepoHead(repo string) (string, error) {
  2037  	// This gRPC call should only take a couple milliseconds, but set some timeout
  2038  	// to catch network problems. 5 seconds is overkill.
  2039  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
  2040  	defer cancel()
  2041  	res, err := maintnerClient.GetRef(ctx, &apipb.GetRefRequest{
  2042  		GerritServer:  "go.googlesource.com",
  2043  		GerritProject: repo,
  2044  		Ref:           "refs/heads/master",
  2045  	})
  2046  	if err != nil {
  2047  		return "", fmt.Errorf("looking up ref for %q: %v", repo, err)
  2048  	}
  2049  	if res.Value == "" {
  2050  		return "", fmt.Errorf("no master ref found for %q", repo)
  2051  	}
  2052  	return res.Value, nil
  2053  }
  2054  
  2055  // newBuildLogBlob creates a new object to record a public build log.
  2056  // The objName should be a Google Cloud Storage object name.
  2057  // When developing on localhost, the WriteCloser may be of a different type.
  2058  func newBuildLogBlob(objName string) (obj io.WriteCloser, url_ string) {
  2059  	if *mode == "dev" {
  2060  		// TODO(bradfitz): write to disk or something, or
  2061  		// something testable. Maybe memory.
  2062  		return struct {
  2063  			io.Writer
  2064  			io.Closer
  2065  		}{
  2066  			os.Stderr,
  2067  			io.NopCloser(nil),
  2068  		}, "devmode://build-log/" + objName
  2069  	}
  2070  	if pool.NewGCEConfiguration().StorageClient() == nil {
  2071  		panic("nil storageClient in newFailureBlob")
  2072  	}
  2073  	bucket := pool.NewGCEConfiguration().BuildEnv().LogBucket
  2074  
  2075  	wr := pool.NewGCEConfiguration().StorageClient().Bucket(bucket).Object(objName).NewWriter(context.Background())
  2076  	wr.ContentType = "text/plain; charset=utf-8"
  2077  
  2078  	return wr, fmt.Sprintf("https://storage.googleapis.com/%s/%s", bucket, objName)
  2079  }
  2080  
  2081  func randHex(n int) string {
  2082  	buf := make([]byte, n/2+1)
  2083  	if _, err := rand.Read(buf); err != nil {
  2084  		log.Fatalf("randHex: %v", err)
  2085  	}
  2086  	return fmt.Sprintf("%x", buf)[:n]
  2087  }
  2088  
  2089  // importPathOfRepo returns the Go import path corresponding to the
  2090  // root of the given non-"go" repo (Gerrit project). Because it's a Go
  2091  // import path, it always has forward slashes and no trailing slash.
  2092  //
  2093  // For example:
  2094  //
  2095  //	"net"    -> "golang.org/x/net"
  2096  //	"crypto" -> "golang.org/x/crypto"
  2097  //	"dl"     -> "golang.org/dl"
  2098  func importPathOfRepo(repo string) string {
  2099  	r := repos.ByGerritProject[repo]
  2100  	if r == nil {
  2101  		// mayBuildRev prevents adding work for repos we don't know about,
  2102  		// so this shouldn't happen. If it does, a panic will be useful.
  2103  		panic(fmt.Sprintf("importPathOfRepo(%q) on unknown repo %q", repo, repo))
  2104  	}
  2105  	if r.ImportPath == "" {
  2106  		// Likewise. This shouldn't happen.
  2107  		panic(fmt.Sprintf("importPathOfRepo(%q) doesn't have an ImportPath", repo))
  2108  	}
  2109  	return r.ImportPath
  2110  }
  2111  
  2112  // slowBotsFromComments looks at the Gerrit comments in work,
  2113  // and returns all build configurations that were explicitly
  2114  // requested to be tested as SlowBots via the TRY= syntax. It
  2115  // also returns any build terms that are not a valid builder
  2116  // or alias.
  2117  func slowBotsFromComments(work *apipb.GerritTryWorkItem) (builders []*dashboard.BuildConfig, invalidTryTerms []string) {
  2118  	tryTerms := latestTryTerms(work)
  2119  	invalidTryTerms = slices.Clone(tryTerms)
  2120  	for _, bc := range dashboard.Builders {
  2121  		for _, term := range tryTerms {
  2122  			if bc.MatchesSlowBotTerm(term) {
  2123  				invalidTryTerms = slices.DeleteFunc(invalidTryTerms, func(e string) bool {
  2124  					return e == term
  2125  				})
  2126  				builders = append(builders, bc)
  2127  				break
  2128  			}
  2129  		}
  2130  	}
  2131  	sort.Slice(builders, func(i, j int) bool {
  2132  		return builders[i].Name < builders[j].Name
  2133  	})
  2134  	return builders, invalidTryTerms
  2135  }
  2136  
  2137  type xRepoAndBuilder struct {
  2138  	Project string // "net", "tools", etc.
  2139  	Builder string // Builder to use. Empty string means default builder.
  2140  }
  2141  
  2142  func (rb xRepoAndBuilder) String() string {
  2143  	if rb.Builder == "" {
  2144  		return rb.Project
  2145  	}
  2146  	return rb.Project + "@" + rb.Builder
  2147  }
  2148  
  2149  // xReposFromComments looks at the TRY= comments from Gerrit (in work) and
  2150  // returns any additional subrepos that should be tested. The TRY= comments
  2151  // are expected to be of the format TRY=x/foo or TRY=x/foo@builder where foo is
  2152  // the name of the subrepo and builder is a builder name. If no builder is
  2153  // provided, a default builder is used.
  2154  func xReposFromComments(work *apipb.GerritTryWorkItem) map[xRepoAndBuilder]bool {
  2155  	xrepos := make(map[xRepoAndBuilder]bool)
  2156  	for _, term := range latestTryTerms(work) {
  2157  		if len(term) < len("x/_") || term[:2] != "x/" {
  2158  			continue
  2159  		}
  2160  		parts := strings.SplitN(term, "@", 2)
  2161  		xrepo := parts[0][2:]
  2162  		builder := "" // By convention, this means the default builder.
  2163  		if len(parts) > 1 {
  2164  			builder = parts[1]
  2165  		}
  2166  		xrepos[xRepoAndBuilder{
  2167  			Project: xrepo,
  2168  			Builder: builder,
  2169  		}] = true
  2170  	}
  2171  	return xrepos
  2172  }
  2173  
  2174  // latestTryTerms returns the terms that follow the TRY= syntax in Gerrit comments.
  2175  func latestTryTerms(work *apipb.GerritTryWorkItem) []string {
  2176  	tryMsg := latestTryMessage(work) // "aix, darwin, linux-386-387, arm64, x/tools"
  2177  	if tryMsg == "" {
  2178  		return nil
  2179  	}
  2180  	if len(tryMsg) > 1<<10 { // arbitrary sanity
  2181  		return nil
  2182  	}
  2183  	return strings.FieldsFunc(tryMsg, func(c rune) bool {
  2184  		return !unicode.IsLetter(c) && !unicode.IsNumber(c) && c != '-' && c != '_' && c != '/' && c != '@'
  2185  	})
  2186  }
  2187  
  2188  func latestTryMessage(work *apipb.GerritTryWorkItem) string {
  2189  	// Prioritize exact version matches first
  2190  	for i := len(work.TryMessage) - 1; i >= 0; i-- {
  2191  		m := work.TryMessage[i]
  2192  		if m.Version == work.Version {
  2193  			return m.Message
  2194  		}
  2195  	}
  2196  	// Otherwise the latest message at all
  2197  	for i := len(work.TryMessage) - 1; i >= 0; i-- {
  2198  		m := work.TryMessage[i]
  2199  		if m.Message != "" {
  2200  			return m.Message
  2201  		}
  2202  	}
  2203  	return ""
  2204  }
  2205  
  2206  // handlePostSubmitActiveJSON serves JSON with the info for which builds
  2207  // are currently building. The build.golang.org dashboard renders these as little
  2208  // blue gophers that link to the each build's status.
  2209  // TODO: this a transitional step on our way towards merging build.golang.org into
  2210  // this codebase; see https://github.com/golang/go/issues/34744#issuecomment-563398753.
  2211  func handlePostSubmitActiveJSON(w http.ResponseWriter, r *http.Request) {
  2212  	w.Header().Set("Content-Type", "application/json")
  2213  	json.NewEncoder(w).Encode(activePostSubmitBuilds())
  2214  }
  2215  
  2216  func activePostSubmitBuilds() []types.ActivePostSubmitBuild {
  2217  	var ret []types.ActivePostSubmitBuild
  2218  	statusMu.Lock()
  2219  	defer statusMu.Unlock()
  2220  	for _, st := range status {
  2221  		if st.isTry() || !st.HasBuildlet() {
  2222  			continue
  2223  		}
  2224  		st.mu.Lock()
  2225  		logsURL := st.logsURLLocked()
  2226  		st.mu.Unlock()
  2227  
  2228  		var commit, goCommit string
  2229  		if st.IsSubrepo() {
  2230  			commit, goCommit = st.SubRev, st.Rev
  2231  		} else {
  2232  			commit = st.Rev
  2233  		}
  2234  		ret = append(ret, types.ActivePostSubmitBuild{
  2235  			StatusURL: logsURL,
  2236  			Builder:   st.Name,
  2237  			Commit:    commit,
  2238  			GoCommit:  goCommit,
  2239  		})
  2240  	}
  2241  	return ret
  2242  }
  2243  
  2244  func mustCreateSecretClientOnGCE() *secret.Client {
  2245  	if !metadata.OnGCE() {
  2246  		return nil
  2247  	}
  2248  	return secret.MustNewClient()
  2249  }
  2250  
  2251  func mustCreateEC2BuildletPool(sc *secret.Client, isRemoteBuildlet func(instName string) bool) *pool.EC2Buildlet {
  2252  	awsKeyID, err := sc.Retrieve(context.Background(), secret.NameAWSKeyID)
  2253  	if err != nil {
  2254  		log.Fatalf("unable to retrieve secret %q: %s", secret.NameAWSKeyID, err)
  2255  	}
  2256  
  2257  	awsAccessKey, err := sc.Retrieve(context.Background(), secret.NameAWSAccessKey)
  2258  	if err != nil {
  2259  		log.Fatalf("unable to retrieve secret %q: %s", secret.NameAWSAccessKey, err)
  2260  	}
  2261  
  2262  	awsClient, err := cloud.NewAWSClient(buildenv.Production.AWSRegion, awsKeyID, awsAccessKey, cloud.WithRateLimiter(cloud.DefaultEC2LimitConfig))
  2263  	if err != nil {
  2264  		log.Fatalf("unable to create AWS client: %s", err)
  2265  	}
  2266  
  2267  	ec2Pool, err := pool.NewEC2Buildlet(awsClient, buildenv.Production, dashboard.Hosts, isRemoteBuildlet)
  2268  	if err != nil {
  2269  		log.Fatalf("unable to create EC2 buildlet pool: %s", err)
  2270  	}
  2271  	return ec2Pool
  2272  }
  2273  
  2274  func mustRetrieveSSHCertificateAuthority() (privateKey []byte) {
  2275  	privateKey, _, err := remote.SSHKeyPair()
  2276  	if err != nil {
  2277  		log.Fatalf("unable to create SSH CA cert: %s", err)
  2278  	}
  2279  	return
  2280  }
  2281  
  2282  func mustStorageClient() *storage.Client {
  2283  	if metadata.OnGCE() {
  2284  		return pool.NewGCEConfiguration().StorageClient()
  2285  	}
  2286  	storageClient, err := storage.NewClient(context.Background(), option.WithoutAuthentication())
  2287  	if err != nil {
  2288  		log.Fatalf("unable to create storage client: %s", err)
  2289  	}
  2290  	return storageClient
  2291  }
  2292  
  2293  func fromSecret(ctx context.Context, sc *secret.Client, secretName string) (string, error) {
  2294  	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
  2295  	defer cancel()
  2296  	return sc.Retrieve(ctx, secretName)
  2297  }
  2298  
  2299  func retrieveSSHKeys(ctx context.Context, sc *secret.Client, m string) (publicKey, privateKey []byte, err error) {
  2300  	if m == "dev" {
  2301  		return remote.SSHKeyPair()
  2302  	} else if metadata.OnGCE() {
  2303  		privateKeyS, err := fromSecret(ctx, sc, secret.NameGomoteSSHPrivateKey)
  2304  		if err != nil {
  2305  			return nil, nil, err
  2306  		}
  2307  		publicKeyS, err := fromSecret(ctx, sc, secret.NameGomoteSSHPublicKey)
  2308  		if err != nil {
  2309  			return nil, nil, err
  2310  		}
  2311  		return []byte(privateKeyS), []byte(publicKeyS), nil
  2312  	}
  2313  	return nil, nil, fmt.Errorf("unable to retrieve ssh keys")
  2314  }