golang.org/x/build@v0.0.0-20240506185731-218518f32b70/cmd/securitybot/main.go (about) 1 // Copyright 2022 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package main 6 7 import ( 8 "bytes" 9 "compress/gzip" 10 "context" 11 "crypto/rand" 12 "errors" 13 "flag" 14 "fmt" 15 "io" 16 "log" 17 "net/http" 18 "os" 19 "os/signal" 20 "path" 21 "strings" 22 "sync" 23 "syscall" 24 "text/tabwriter" 25 "time" 26 27 "golang.org/x/build/buildenv" 28 "golang.org/x/build/buildlet" 29 "golang.org/x/build/dashboard" 30 "golang.org/x/build/gerrit" 31 "golang.org/x/build/internal/gomote/protos" 32 "golang.org/x/build/internal/iapclient" 33 "golang.org/x/build/repos" 34 "golang.org/x/build/types" 35 "golang.org/x/oauth2" 36 "golang.org/x/oauth2/google" 37 38 "cloud.google.com/go/storage" 39 ) 40 41 type tester struct { 42 source string 43 repo string 44 45 coordinator *buildlet.GRPCCoordinatorClient 46 gcs *storage.Client 47 http *http.Client 48 gerrit *gerrit.Client 49 } 50 51 type builderResult struct { 52 builderType string 53 logURL string 54 passed bool 55 err error 56 } 57 58 type buildInfo struct { 59 revision string 60 branch string 61 changeArchive []byte 62 goArchive []byte 63 } 64 65 func (bi *buildInfo) isSubrepo() bool { 66 repo, _, _ := strings.Cut(bi.branch, ".") 67 return repos.ByGerritProject[repo] != nil 68 } 69 70 func createBuildletWithRetry(ctx context.Context, coordinator *buildlet.GRPCCoordinatorClient, builderType string) (buildlet.RemoteClient, error) { 71 const retries int = 5 72 var err error 73 for i := 0; i < retries; i++ { 74 var c buildlet.RemoteClient 75 c, err = coordinator.CreateBuildletWithStatus(ctx, builderType, func(status types.BuildletWaitStatus) {}) 76 if err == nil { 77 return c, nil 78 } 79 // TODO(roland): we currently only care about retrying when we hit this 80 // particular AWS error, but we may want to retry in other cases in the 81 // future? 82 if !strings.Contains(err.Error(), "ResourceNotReady: failed waiting for successful resource state") { 83 return nil, err 84 } 85 log.Printf("%s: failed to create buildlet (attempt %d): %s", builderType, retries, err) 86 time.Sleep(time.Second * 30) 87 } 88 return nil, fmt.Errorf("failed to create buildlet after %d attempts, last error: %s", retries, err) 89 } 90 91 // runTests creates a buildlet for the specified builderType, sends a copy of go1.4 and the change tarball to 92 // the buildlet, and then executes the platform specific 'all' script, streaming the output to a GCS bucket. 93 // The buildlet is destroyed on return. 94 func (t *tester) runTests(ctx context.Context, builderType string, info *buildInfo) builderResult { 95 log.Printf("%s: creating buildlet", builderType) 96 c, err := createBuildletWithRetry(ctx, t.coordinator, builderType) 97 if err != nil { 98 return builderResult{builderType: builderType, err: fmt.Errorf("failed to create buildlet: %s", err)} 99 } 100 buildletName := c.RemoteName() 101 log.Printf("%s: created buildlet (%s)", builderType, buildletName) 102 defer func() { 103 if err := c.Close(); err != nil { 104 log.Printf("%s: unable to close buildlet %q: %s", builderType, buildletName, err) 105 } else { 106 log.Printf("%s: destroyed buildlet", builderType) 107 } 108 }() 109 110 buildConfig, ok := dashboard.Builders[builderType] 111 if !ok { 112 log.Printf("%s: unknown builder type", builderType) 113 return builderResult{builderType: builderType, err: errors.New("unknown builder type")} 114 } 115 bootstrapURL := buildConfig.GoBootstrapURL(buildenv.Production) 116 // Assume if bootstrapURL == "" the buildlet is already bootstrapped 117 if bootstrapURL != "" { 118 if err := c.PutTarFromURL(ctx, bootstrapURL, "go1.4"); err != nil { 119 log.Printf("%s: failed to bootstrap buildlet: %s", builderType, err) 120 return builderResult{builderType: builderType, err: fmt.Errorf("failed to bootstrap buildlet: %s", err)} 121 } 122 } 123 124 suffix := make([]byte, 4) 125 rand.Read(suffix) 126 127 var output io.Writer 128 var logURL string 129 130 if t.gcs != nil { 131 gcsBucket, gcsObject := *gcsBucket, fmt.Sprintf("%s-%x/%s", info.revision, suffix, builderType) 132 gcsWriter, err := newLiveWriter(ctx, t.gcs.Bucket(gcsBucket).Object(gcsObject)) 133 if err != nil { 134 log.Printf("%s: failed to create log writer: %s", builderType, err) 135 return builderResult{builderType: builderType, err: fmt.Errorf("failed to create log writer: %s", err)} 136 } 137 defer func() { 138 if err := gcsWriter.Close(); err != nil { 139 log.Printf("%s: failed to flush GCS writer: %s", builderType, err) 140 } 141 }() 142 logURL = "https://storage.cloud.google.com/" + path.Join(gcsBucket, gcsObject) 143 output = gcsWriter 144 } else { 145 output = &localWriter{buildletName} 146 } 147 148 work, err := c.WorkDir(ctx) 149 if err != nil { 150 log.Printf("%s: failed to retrieve work dir: %s", builderType, err) 151 return builderResult{builderType: builderType, err: fmt.Errorf("failed to get work dir: %s", err)} 152 } 153 154 env := append(buildConfig.Env(), "GOPATH="+work+"/gopath", "GOROOT_FINAL="+dashboard.GorootFinal(buildConfig.GOOS()), "GOROOT="+work+"/go") 155 // Because we are unable to determine the internal GCE hostname of the 156 // coordinator, we cannot use the same GOPROXY proxy that the public TryBots 157 // use to get around the disabled network. Instead of using that proxy 158 // proxy, we instead wait to disable the network until right before we 159 // actually execute the tests, and manually download module dependencies 160 // using "go mod download" if we are testing a subrepo branch. 161 var disableNetwork bool 162 for i, v := range env { 163 if v == "GO_DISABLE_OUTBOUND_NETWORK=1" { 164 env = append(env[:i], env[i+1:]...) 165 disableNetwork = true 166 break 167 } 168 } 169 dirName := "go" 170 171 if info.isSubrepo() { 172 dirName = info.branch 173 174 // fetch and build go at master first 175 if err := c.PutTar(ctx, bytes.NewReader(info.goArchive), "go"); err != nil { 176 log.Printf("%s: failed to upload change archive: %s", builderType, err) 177 return builderResult{builderType: builderType, err: fmt.Errorf("failed to upload change archive: %s", err)} 178 } 179 if err := c.Put(ctx, strings.NewReader("devel "+info.revision), "go/VERSION", 0644); err != nil { 180 log.Printf("%s: failed to upload VERSION file: %s", builderType, err) 181 return builderResult{builderType: builderType, err: fmt.Errorf("failed to upload VERSION file: %s", err)} 182 } 183 184 cmd, args := "go/"+buildConfig.MakeScript(), buildConfig.MakeScriptArgs() 185 remoteErr, execErr := c.Exec(ctx, cmd, buildlet.ExecOpts{ 186 Output: output, 187 ExtraEnv: append(env, "GO_DISABLE_OUTBOUND_NETWORK=0"), 188 Args: args, 189 OnStartExec: func() { 190 log.Printf("%s: starting make.bash %s", builderType, logURL) 191 }, 192 }) 193 if execErr != nil { 194 log.Printf("%s: failed to execute make.bash: %s", builderType, execErr) 195 return builderResult{builderType: builderType, err: fmt.Errorf("failed to execute make.bash: %s", err)} 196 } 197 if remoteErr != nil { 198 log.Printf("%s: make.bash failed: %s", builderType, remoteErr) 199 return builderResult{builderType: builderType, err: fmt.Errorf("make.bash failed: %s", remoteErr)} 200 } 201 } 202 203 if err := c.PutTar(ctx, bytes.NewReader(info.changeArchive), dirName); err != nil { 204 log.Printf("%s: failed to upload change archive: %s", builderType, err) 205 return builderResult{builderType: builderType, err: fmt.Errorf("failed to upload change archive: %s", err)} 206 } 207 208 if !info.isSubrepo() { 209 if err := c.Put(ctx, strings.NewReader("devel "+info.revision), "go/VERSION", 0644); err != nil { 210 log.Printf("%s: failed to upload VERSION file: %s", builderType, err) 211 return builderResult{builderType: builderType, err: fmt.Errorf("failed to upload VERSION file: %s", err)} 212 } 213 } 214 215 var cmd string 216 var args []string 217 if info.isSubrepo() { 218 cmd, args = "go/bin/go", []string{"test", "./..."} 219 } else { 220 cmd, args = "go/"+buildConfig.AllScript(), buildConfig.AllScriptArgs() 221 } 222 opts := buildlet.ExecOpts{ 223 Output: output, 224 ExtraEnv: env, 225 Args: args, 226 OnStartExec: func() { 227 log.Printf("%s: starting tests %s", builderType, logURL) 228 }, 229 } 230 if info.isSubrepo() { 231 opts.Dir = dirName 232 233 remoteErr, execErr := c.Exec(ctx, "go/bin/go", buildlet.ExecOpts{ 234 Args: []string{"mod", "download"}, 235 ExtraEnv: append(env, "GO_DISABLE_OUTBOUND_NETWORK=0"), 236 Dir: dirName, 237 Output: output, 238 OnStartExec: func() { 239 log.Printf("%s: downloading modules %s", builderType, logURL) 240 }, 241 }) 242 if execErr != nil { 243 log.Printf("%s: failed to execute go mod download: %s", builderType, execErr) 244 return builderResult{builderType: builderType, err: fmt.Errorf("failed to execute go mod download: %s", err)} 245 } 246 if remoteErr != nil { 247 log.Printf("%s: go mod download failed: %s", builderType, remoteErr) 248 return builderResult{builderType: builderType, err: fmt.Errorf("go mod download failed: %s", remoteErr)} 249 } 250 } 251 if disableNetwork { 252 opts.ExtraEnv = append(opts.ExtraEnv, "GO_DISABLE_OUTBOUND_NETWORK=1") 253 } 254 remoteErr, execErr := c.Exec(ctx, cmd, opts) 255 if execErr != nil { 256 log.Printf("%s: failed to execute tests: %s", builderType, execErr) 257 return builderResult{builderType: builderType, err: fmt.Errorf("failed to execute all.bash: %s", err)} 258 } 259 if remoteErr != nil { 260 log.Printf("%s: tests failed: %s", builderType, remoteErr) 261 return builderResult{builderType: builderType, logURL: logURL, passed: false} 262 } 263 log.Printf("%s: tests succeeded", builderType) 264 return builderResult{builderType: builderType, logURL: logURL, passed: true} 265 } 266 267 // gcsLiveWriter is an extremely hacky way of getting live(ish) updating logs while 268 // using GCS. The buffer is written out to an object every 5 seconds. 269 type gcsLiveWriter struct { 270 obj *storage.ObjectHandle 271 buf *bytes.Buffer 272 mu *sync.Mutex 273 stop chan bool 274 err chan error 275 } 276 277 func newLiveWriter(ctx context.Context, obj *storage.ObjectHandle) (*gcsLiveWriter, error) { 278 stopCh, errCh := make(chan bool, 1), make(chan error, 1) 279 mu := new(sync.Mutex) 280 buf := new(bytes.Buffer) 281 write := func(b []byte) error { 282 w := obj.NewWriter(ctx) 283 w.Write(b) 284 if err := w.Close(); err != nil { 285 return err 286 } 287 return nil 288 } 289 if err := write([]byte{}); err != nil { 290 return nil, err 291 } 292 go func() { 293 t := time.NewTicker(time.Second * 5) 294 for { 295 select { 296 case <-stopCh: 297 mu.Lock() 298 errCh <- write(buf.Bytes()) 299 mu.Unlock() 300 case <-t.C: 301 mu.Lock() 302 if err := write(buf.Bytes()); err != nil { 303 log.Printf("GCS write to %q failed! %s", path.Join(obj.BucketName(), obj.ObjectName()), err) 304 errCh <- err 305 } 306 mu.Unlock() 307 } 308 } 309 }() 310 return &gcsLiveWriter{obj: obj, buf: buf, mu: mu, stop: stopCh, err: errCh}, nil 311 } 312 313 func (g *gcsLiveWriter) Write(b []byte) (int, error) { 314 g.mu.Lock() 315 g.buf.Write(b) 316 g.mu.Unlock() 317 return len(b), nil 318 } 319 320 func (g *gcsLiveWriter) Close() error { 321 g.stop <- true 322 return <-g.err 323 } 324 325 type localWriter struct { 326 buildlet string 327 } 328 329 func (lw *localWriter) Write(b []byte) (int, error) { 330 prefix := []byte(lw.buildlet + ": ") 331 var prefixed []byte 332 for _, l := range bytes.Split(b, []byte("\n")) { 333 prefixed = append(prefixed, append(prefix, append(l, byte('\n'))...)...) 334 } 335 336 return os.Stdout.Write(prefixed) 337 } 338 339 // getTar retrieves the tarball for a specific git revision from t.source and returns 340 // the bytes. 341 func (t *tester) getTar(revision string) ([]byte, error) { 342 tarURL := t.source + "/" + t.repo + "/+archive/" + revision + ".tar.gz" 343 req, err := http.NewRequest("GET", tarURL, nil) 344 if err != nil { 345 return nil, err 346 } 347 resp, err := t.http.Do(req) 348 if err != nil { 349 return nil, err 350 } 351 if resp.StatusCode != http.StatusOK { 352 return nil, fmt.Errorf("failed to fetch %q: %v", tarURL, resp.Status) 353 } 354 defer resp.Body.Close() 355 archive, err := io.ReadAll(resp.Body) 356 if err != nil { 357 return nil, err 358 } 359 360 // Check what we got back was actually the archive, since Google's SSO page will 361 // return 200. 362 _, err = gzip.NewReader(bytes.NewReader(archive)) 363 if err != nil { 364 return nil, err 365 } 366 367 return archive, nil 368 } 369 370 // run tests the specific revision on the builders specified. 371 func (t *tester) run(ctx context.Context, revision, branch string, builders []string) ([]builderResult, error) { 372 changeArchive, err := t.getTar(revision) 373 if err != nil { 374 return nil, fmt.Errorf("failed to retrieve change archive: %s", err) 375 } 376 377 info := &buildInfo{ 378 revision: revision, 379 branch: branch, 380 changeArchive: changeArchive, 381 } 382 383 if branch != "master" { 384 goArchive, err := t.getTar("master") 385 if err != nil { 386 return nil, fmt.Errorf("failed to retrieve go master archive: %s", err) 387 } 388 info.goArchive = goArchive 389 } 390 391 wg := new(sync.WaitGroup) 392 resultsCh := make(chan builderResult, len(builders)) 393 for _, bt := range builders { 394 wg.Add(1) 395 go func(bt string) { 396 defer wg.Done() 397 result := t.runTests(ctx, bt, info) // have a proper timeout 398 resultsCh <- result 399 }(bt) 400 } 401 wg.Wait() 402 close(resultsCh) 403 results := make([]builderResult, 0, len(builders)) 404 for result := range resultsCh { 405 results = append(results, result) 406 } 407 408 return results, nil 409 } 410 411 // commentBeginning send the review message indicating the trybots are beginning. 412 func (t *tester) commentBeginning(ctx context.Context, change *gerrit.ChangeInfo) error { 413 // It would be nice to do a similar thing to the coordinator, using comment 414 // threads that can be resolved, but that is slightly more complex than what 415 // we really need to start with. 416 // 417 // Similarly it would be nice to comment links to logs earlier. 418 return t.gerrit.SetReview(ctx, change.ID, change.CurrentRevision, gerrit.ReviewInput{ 419 Message: "TryBots beginning", 420 }) 421 } 422 423 // commentResults sends the review message containing the results for the change 424 // and applies the TryBot-Result label. 425 func (t *tester) commentResults(ctx context.Context, change *gerrit.ChangeInfo, results []builderResult) error { 426 state := "succeeded" 427 label := 1 428 buf := new(bytes.Buffer) 429 w := tabwriter.NewWriter(buf, 0, 0, 1, ' ', 0) 430 for _, res := range results { 431 s := "pass" 432 context := res.logURL 433 if res.err != nil { 434 s = "error" 435 state = "failed" 436 label = -1 437 context = res.err.Error() 438 } else if !res.passed { 439 s = "failed" 440 state = "failed" 441 label = -1 442 } 443 fmt.Fprintf(w, " %s\t[%s]\t%s\n", res.builderType, s, context) 444 } 445 w.Flush() 446 447 comment := fmt.Sprintf("Tests %s\n\n%s", state, buf.String()) 448 if err := t.gerrit.SetReview(ctx, change.ID, change.CurrentRevision, gerrit.ReviewInput{ 449 Message: comment, 450 Labels: map[string]int{"TryBot-Result": label}, 451 }); err != nil { 452 return err 453 } 454 455 return nil 456 } 457 458 // findChanges queries a gerrit instance for changes which should be tested, returning a 459 // slice of revisions for each change. 460 func (t *tester) findChanges(ctx context.Context) ([]*gerrit.ChangeInfo, error) { 461 return t.gerrit.QueryChanges( 462 ctx, 463 fmt.Sprintf("project:%s status:open label:Run-TryBot+1 -label:TryBot-Result-1 -label:TryBot-Result+1", t.repo), 464 gerrit.QueryChangesOpt{Fields: []string{"CURRENT_REVISION"}}, 465 ) 466 } 467 468 var ( 469 username = flag.String("user", "user-security", "Coordinator username") 470 471 gerritURL = flag.String("gerrit", "https://team-review.googlesource.com", "URL for the gerrit instance") 472 sourceURL = flag.String("source", "https://team.googlesource.com", "URL for the source instance") 473 repoName = flag.String("repo", "golang/go-private", "Gerrit repository name") 474 475 gcsBucket = flag.String("gcs", "", "GCS bucket path for logs") 476 477 revision = flag.String("revision", "", "Revision to test, when running in one-shot mode") 478 buildersStr = flag.String("builders", "", "Comma separated list of builder types to test against by default") 479 ) 480 481 // allowedBuilders contains the set of builders which are acceptable to use for testing 482 // PRIVATE track security changes. These builders should, generally, be controlled by 483 // Google. 484 var allowedBuilders = map[string]bool{ 485 "js-wasm": true, 486 487 "linux-386": true, 488 "linux-386-longtest": true, 489 "linux-amd64": true, 490 "linux-amd64-longtest": true, 491 492 "linux-amd64-bullseye": true, 493 494 "darwin-amd64-12_0": true, 495 "darwin-arm64-12": true, 496 497 "windows-386-2012": true, 498 "windows-amd64-2016": true, 499 "windows-arm64-11": true, 500 } 501 502 // firstClassBuilders is the default set of builders to test against, 503 // representing the first class ports as defined by the port policy. 504 var firstClassBuilders = []string{ 505 "linux-386", 506 "linux-amd64-longtest-race", 507 "linux-arm-aws", 508 "linux-arm64", 509 510 "darwin-amd64-12_0", 511 "darwin-arm64-12", 512 513 "windows-386-2012", 514 "windows-amd64-longtest", 515 } 516 517 func main() { 518 flag.Parse() 519 ctx, cancel := context.WithCancel(context.Background()) 520 521 // When kubernetes attempts to kill a workload (i.e. during a restart or 522 // rollout) it sends a SIGTERM, followed by a SIGKILL after a specified 523 // timeout. In order to cleanly shutdown the service, as well as destroying 524 // any created buildlets etc, cancel the global context we pass around, 525 // which should cascade down. 526 sigtermChan := make(chan os.Signal, 1) 527 signal.Notify(sigtermChan, syscall.SIGTERM) 528 go func() { 529 <-sigtermChan 530 // Cancelling the context should cause the program to exit, either via 531 // a error leading to a log.Fatalf, or the select loop hitting ctx.Done. 532 // TODO(roland): we may want to make the shutdown somewhat more graceful, 533 // perhaps commenting that the current run was aborted if we are in the 534 // middle of one, but for now just exiting cleanly is better than nothing. 535 cancel() 536 }() 537 538 creds, err := google.FindDefaultCredentials(ctx, gerrit.OAuth2Scopes...) 539 if err != nil { 540 log.Fatalf("reading GCP credentials: %v", err) 541 } 542 gerritClient := gerrit.NewClient(*gerritURL, gerrit.OAuth2Auth(creds.TokenSource)) 543 httpClient := oauth2.NewClient(ctx, creds.TokenSource) 544 545 var builders []string 546 if *buildersStr != "" { 547 for _, b := range strings.Split(*buildersStr, ",") { 548 if !allowedBuilders[b] { 549 log.Fatalf("builder type %q not allowed", b) 550 } 551 builders = append(builders, b) 552 } 553 554 } else { 555 builders = firstClassBuilders 556 } 557 558 var gcsClient *storage.Client 559 if *gcsBucket != "" { 560 gcsClient, err = storage.NewClient(ctx) 561 if err != nil { 562 log.Fatalf("Could not connect to GCS: %v", err) 563 } 564 } 565 566 cc, err := iapclient.GRPCClient(ctx, "build.golang.org:443") 567 if err != nil { 568 log.Fatalf("Could not connect to coordinator: %v", err) 569 } 570 b := buildlet.GRPCCoordinatorClient{ 571 Client: protos.NewGomoteServiceClient(cc), 572 } 573 574 t := &tester{ 575 source: strings.TrimSuffix(*sourceURL, "/"), 576 repo: *repoName, 577 coordinator: &b, 578 http: httpClient, 579 gcs: gcsClient, 580 gerrit: gerritClient, 581 } 582 583 if *revision != "" { 584 if _, err := t.run(ctx, *revision, "", builders); err != nil { 585 log.Fatal(err) 586 } 587 } else { 588 ticker := time.NewTicker(time.Minute) 589 for { 590 select { 591 case <-ticker.C: 592 case <-ctx.Done(): 593 return 594 } 595 changes, err := t.findChanges(ctx) 596 if err != nil { 597 log.Fatalf("findChanges failed: %v", err) 598 } 599 log.Printf("found %d changes", len(changes)) 600 601 for _, change := range changes { 602 log.Printf("testing CL %d patchset %d (%s)", change.ChangeNumber, change.Revisions[change.CurrentRevision].PatchSetNumber, change.CurrentRevision) 603 if err := t.commentBeginning(ctx, change); err != nil { 604 log.Fatalf("commentBeginning failed: %v", err) 605 } 606 results, err := t.run(ctx, change.CurrentRevision, change.Branch, builders) 607 if err != nil { 608 log.Fatalf("run failed: %v", err) 609 } 610 if err := t.commentResults(ctx, change, results); err != nil { 611 log.Fatalf("commentResults failed: %v", err) 612 } 613 } 614 } 615 } 616 }