go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/milo/internal/buildsource/swarming/build.go (about) 1 // Copyright 2015 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package swarming 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "net/http" 22 "net/url" 23 "regexp" 24 "strconv" 25 "strings" 26 "time" 27 28 "google.golang.org/api/googleapi" 29 "google.golang.org/protobuf/proto" 30 "google.golang.org/protobuf/types/known/timestamppb" 31 32 buildbucketpb "go.chromium.org/luci/buildbucket/proto" 33 "go.chromium.org/luci/buildbucket/protoutil" 34 bbv1 "go.chromium.org/luci/common/api/buildbucket/buildbucket/v1" 35 swarming "go.chromium.org/luci/common/api/swarming/swarming/v1" 36 "go.chromium.org/luci/common/data/strpair" 37 "go.chromium.org/luci/common/errors" 38 "go.chromium.org/luci/common/logging" 39 "go.chromium.org/luci/common/sync/parallel" 40 "go.chromium.org/luci/grpc/grpcutil" 41 "go.chromium.org/luci/logdog/client/butlerlib/streamclient" 42 "go.chromium.org/luci/logdog/client/coordinator" 43 "go.chromium.org/luci/logdog/common/types" 44 "go.chromium.org/luci/luciexe/legacy/annotee" 45 annopb "go.chromium.org/luci/luciexe/legacy/annotee/proto" 46 "go.chromium.org/luci/milo/frontend/ui" 47 "go.chromium.org/luci/milo/internal/buildsource/rawpresentation" 48 "go.chromium.org/luci/milo/internal/config" 49 "go.chromium.org/luci/milo/internal/model/milostatus" 50 "go.chromium.org/luci/milo/internal/projectconfig" 51 "go.chromium.org/luci/server/auth" 52 ) 53 54 // swarmingService is an interface that fetches data from Swarming. 55 // 56 // In production, this is fetched from a Swarming host. For testing, this can 57 // be replaced with a mock. 58 type swarmingService interface { 59 GetHost() string 60 GetSwarmingResult(c context.Context, taskID string) (*swarming.SwarmingRpcsTaskResult, error) 61 GetSwarmingRequest(c context.Context, taskID string) (*swarming.SwarmingRpcsTaskRequest, error) 62 GetTaskOutput(c context.Context, taskID string) (string, error) 63 Close() 64 } 65 66 // ErrNotMiloJob is returned if a Swarming task is fetched that does not self- 67 // identify as a Milo job. 68 var ErrNotMiloJob = errors.New("Not a Milo Job or access denied", grpcutil.PermissionDeniedTag) 69 70 // SwarmingTimeLayout is time layout used by swarming. 71 const SwarmingTimeLayout = "2006-01-02T15:04:05.999999999" 72 73 // Swarming task states.. 74 const ( 75 // TaskRunning means task is running. 76 TaskRunning = "RUNNING" 77 // TaskPending means task didn't start yet. 78 TaskPending = "PENDING" 79 // TaskExpired means task expired and did not start. 80 TaskExpired = "EXPIRED" 81 // TaskTimedOut means task started, but took too long. 82 TaskTimedOut = "TIMED_OUT" 83 // TaskBotDied means task started but bot died. 84 TaskBotDied = "BOT_DIED" 85 // TaskCanceled means the task was canceled. See CompletedTs to determine whether it was started. 86 TaskCanceled = "CANCELED" 87 // TaskKill means the task was canceled. See CompletedTs to determine whether it was started. 88 TaskKilled = "KILLED" 89 // TaskCompleted means task is complete. 90 TaskCompleted = "COMPLETED" 91 // TaskNoResource means there was not enough capacity when scheduled, so the 92 // task failed immediately. 93 TaskNoResource = "NO_RESOURCE" 94 // TaskClientError means that the client has caused a task to error 95 TaskClientError = "CLIENT_ERROR" 96 ) 97 98 func getSwarmingClient(c context.Context, host string) (*swarming.Service, error) { 99 t, err := auth.GetRPCTransport(c, auth.AsSelf) 100 if err != nil { 101 return nil, err 102 } 103 sc, err := swarming.New(&http.Client{Transport: t}) 104 if err != nil { 105 return nil, err 106 } 107 sc.BasePath = fmt.Sprintf("https://%s/_ah/api/swarming/v1/", host) 108 return sc, nil 109 } 110 111 type prodSwarmingService struct { 112 host string 113 client *swarming.Service 114 cancel func() 115 } 116 117 func newProdService(c context.Context, host string) (*prodSwarmingService, error) { 118 host, err := getSwarmingHost(c, host) 119 if err != nil { 120 return nil, err 121 } 122 123 c, cancel := context.WithTimeout(c, 60*time.Second) 124 125 client, err := getSwarmingClient(c, host) 126 if err != nil { 127 cancel() 128 return nil, err 129 } 130 131 return &prodSwarmingService{ 132 host: host, 133 client: client, 134 cancel: cancel, 135 }, nil 136 } 137 138 func (svc *prodSwarmingService) GetHost() string { return svc.host } 139 140 func (svc *prodSwarmingService) GetSwarmingResult(c context.Context, taskID string) (*swarming.SwarmingRpcsTaskResult, error) { 141 return svc.client.Task.Result(taskID).Context(c).Do() 142 } 143 144 func (svc *prodSwarmingService) GetTaskOutput(c context.Context, taskID string) (string, error) { 145 stdout, err := svc.client.Task.Stdout(taskID).Context(c).Do() 146 if err != nil { 147 return "", err 148 } 149 return stdout.Output, nil 150 } 151 152 func (svc *prodSwarmingService) GetSwarmingRequest(c context.Context, taskID string) (*swarming.SwarmingRpcsTaskRequest, error) { 153 return svc.client.Task.Request(taskID).Context(c).Do() 154 } 155 156 func (svc *prodSwarmingService) Close() { 157 svc.cancel() 158 } 159 160 type swarmingFetchParams struct { 161 fetchLog bool 162 163 // taskResCallback, if not nil, is a callback that will be invoked after 164 // fetching the result. It will be passed a key/value map 165 // of the Swarming result's tags. 166 // 167 // If taskResCallback returns true, any pending log fetch will be canceled 168 // without error. 169 taskResCallback func(*swarming.SwarmingRpcsTaskResult) bool 170 } 171 172 type swarmingFetchResult struct { 173 res *swarming.SwarmingRpcsTaskResult 174 175 // log is the log data content. If no log data was fetched, this will empty. 176 // If the log fetch was canceled, this is undefined. 177 log string 178 } 179 180 // swarmingFetch fetches (in parallel) the components that it is configured to 181 // fetch. 182 // 183 // After fetching, an ACL check is performed to confirm that the user is 184 // permitted to view the resulting data. If this check fails, get returns 185 // errNotMiloJob. 186 func swarmingFetch(c context.Context, svc swarmingService, taskID string, req swarmingFetchParams) ( 187 *swarmingFetchResult, error) { 188 189 // logErr is managed separately from other fetch errors, since in some 190 // situations it's acceptable to not have a log stream. 191 var logErr error 192 var fr swarmingFetchResult 193 194 // Special Context to enable the cancellation of log fetching. 195 logsCanceled := false 196 logCtx, cancelLogs := context.WithCancel(c) 197 defer cancelLogs() 198 199 err := parallel.FanOutIn(func(workC chan<- func() error) { 200 workC <- func() (err error) { 201 if fr.res, err = svc.GetSwarmingResult(c, taskID); err == nil { 202 if req.taskResCallback != nil && req.taskResCallback(fr.res) { 203 logsCanceled = true 204 cancelLogs() 205 } 206 } else if ierr, ok := err.(*googleapi.Error); ok { 207 switch ierr.Code { 208 case http.StatusNotFound: 209 err = errors.Annotate(ierr, "not found on swarming").Tag(grpcutil.NotFoundTag).Err() 210 case http.StatusBadRequest: 211 err = errors.Annotate(ierr, "bad request").Tag(grpcutil.InvalidArgumentTag).Err() 212 } 213 } 214 return 215 } 216 217 if req.fetchLog { 218 workC <- func() error { 219 // Note: we're using the log Context here so we can cancel log fetch 220 // explicitly. 221 fr.log, logErr = svc.GetTaskOutput(logCtx, taskID) 222 return nil 223 } 224 } 225 }) 226 if err != nil { 227 return nil, err 228 } 229 230 // Current ACL implementation: 231 // If allow_milo:1 is present, it is a public job. Don't bother with ACL check. 232 // If it is not present, check the luci_project tag, and see if user is allowed 233 // to access said project. 234 if !isAllowed(c, fr.res.Tags) { 235 return nil, ErrNotMiloJob 236 } 237 238 if logErr != nil { 239 switch fr.res.State { 240 case TaskCompleted, TaskRunning, TaskCanceled, TaskKilled, TaskNoResource: 241 default: 242 // Ignore log errors if the task might be pending, timed out, expired, etc. 243 if err != nil { 244 fr.log = "" 245 logErr = nil 246 } 247 } 248 } 249 250 // If we explicitly canceled logs, everything is OK. 251 if logErr == context.Canceled && logsCanceled { 252 logErr = nil 253 } 254 return &fr, logErr 255 } 256 257 func taskProperties(sr *swarming.SwarmingRpcsTaskResult) *ui.PropertyGroup { 258 props := &ui.PropertyGroup{GroupName: "Swarming"} 259 if len(sr.CostsUsd) == 1 { 260 props.Property = append(props.Property, &ui.Property{ 261 Key: "Cost of job (USD)", 262 Value: fmt.Sprintf("$%.2f", sr.CostsUsd[0]), 263 }) 264 } 265 if sr.State == TaskCompleted || sr.State == TaskTimedOut { 266 props.Property = append(props.Property, &ui.Property{ 267 Key: "Exit Code", 268 Value: fmt.Sprintf("%d", sr.ExitCode), 269 }) 270 } 271 return props 272 } 273 274 // addBuilderLink adds a link to the buildbucket builder view. 275 func addBuilderLink(c context.Context, build *ui.MiloBuildLegacy, tags strpair.Map) { 276 bucket := tags.Get("buildbucket_bucket") 277 builder := tags.Get("builder") 278 project := tags.Get("luci_project") 279 if bucket != "" && builder != "" { 280 builderParts := strings.Split(builder, "/") 281 builder = builderParts[len(builderParts)-1] 282 build.Summary.ParentLabel = ui.NewLink( 283 builder, fmt.Sprintf("/p/%s/builders/%s/%s", project, bucket, builder), 284 fmt.Sprintf("buildbucket builder %s on bucket %s", builder, bucket)) 285 } 286 } 287 288 // AddBanner adds an OS banner derived from "os" swarming tag, if present. 289 func AddBanner(build *ui.MiloBuildLegacy, tags strpair.Map) { 290 os := tags.Get("os") 291 parts := strings.SplitN(os, "-", 2) 292 var ver string 293 if len(parts) == 2 { 294 os = parts[0] 295 ver = parts[1] 296 } 297 298 var base ui.LogoBase 299 switch os { 300 case "Ubuntu": 301 base = ui.Ubuntu 302 case "Windows": 303 base = ui.Windows 304 case "Mac": 305 base = ui.OSX 306 case "Android": 307 base = ui.Android 308 default: 309 return 310 } 311 build.Summary.Banner = &ui.LogoBanner{ 312 OS: []ui.Logo{{ 313 LogoBase: base, 314 Subtitle: ver, 315 Count: 1, 316 }}, 317 } 318 } 319 320 // addTaskToMiloStep augments a Milo Annotation Protobuf with state from the 321 // Swarming task. 322 func addTaskToMiloStep(c context.Context, host string, sr *swarming.SwarmingRpcsTaskResult, step *annopb.Step) error { 323 step.Link = &annopb.AnnotationLink{ 324 Label: "Task " + sr.TaskId, 325 Value: &annopb.AnnotationLink_Url{ 326 Url: TaskPageURL(host, sr.TaskId).String(), 327 }, 328 } 329 330 switch sr.State { 331 case TaskRunning: 332 step.Status = annopb.Status_RUNNING 333 334 case TaskPending: 335 step.Status = annopb.Status_PENDING 336 337 case TaskExpired, TaskTimedOut, TaskBotDied, TaskClientError: 338 step.Status = annopb.Status_FAILURE 339 340 switch sr.State { 341 case TaskExpired: 342 step.FailureDetails = &annopb.FailureDetails{ 343 Type: annopb.FailureDetails_EXPIRED, 344 Text: "Task expired", 345 } 346 case TaskTimedOut: 347 step.FailureDetails = &annopb.FailureDetails{ 348 Type: annopb.FailureDetails_INFRA, 349 Text: "Task timed out", 350 } 351 case TaskBotDied: 352 step.FailureDetails = &annopb.FailureDetails{ 353 Type: annopb.FailureDetails_INFRA, 354 Text: "Bot died", 355 } 356 case TaskClientError: 357 step.FailureDetails = &annopb.FailureDetails{ 358 Type: annopb.FailureDetails_INFRA, 359 Text: "Client error", 360 } 361 } 362 363 case TaskCanceled, TaskKilled: 364 // Canceled build is user action, so it is not an infra failure. 365 step.Status = annopb.Status_FAILURE 366 step.FailureDetails = &annopb.FailureDetails{ 367 Type: annopb.FailureDetails_CANCELLED, 368 Text: "Task canceled by user", 369 } 370 371 case TaskNoResource: 372 step.Status = annopb.Status_FAILURE 373 step.FailureDetails = &annopb.FailureDetails{ 374 Type: annopb.FailureDetails_EXPIRED, 375 Text: "No resource available on Swarming", 376 } 377 378 case TaskCompleted: 379 380 switch { 381 case sr.InternalFailure: 382 step.Status = annopb.Status_FAILURE 383 step.FailureDetails = &annopb.FailureDetails{ 384 Type: annopb.FailureDetails_INFRA, 385 } 386 387 case sr.Failure: 388 step.Status = annopb.Status_FAILURE 389 390 default: 391 step.Status = annopb.Status_SUCCESS 392 } 393 394 default: 395 return fmt.Errorf("unknown swarming task state %q", sr.State) 396 } 397 398 // Compute start and finished times. 399 if sr.StartedTs != "" { 400 ts, err := time.Parse(SwarmingTimeLayout, sr.StartedTs) 401 if err != nil { 402 return fmt.Errorf("invalid task StartedTs: %s", err) 403 } 404 step.Started = timestamppb.New(ts) 405 } 406 if sr.CompletedTs != "" { 407 ts, err := time.Parse(SwarmingTimeLayout, sr.CompletedTs) 408 if err != nil { 409 return fmt.Errorf("invalid task CompletedTs: %s", err) 410 } 411 step.Ended = timestamppb.New(ts) 412 } 413 414 return nil 415 } 416 417 func addBuildsetInfo(build *ui.MiloBuildLegacy, tags strpair.Map) { 418 for _, bs := range tags[bbv1.TagBuildSet] { 419 if cl, ok := protoutil.ParseBuildSet(bs).(*buildbucketpb.GerritChange); ok { 420 if build.Trigger == nil { 421 build.Trigger = &ui.Trigger{} 422 } 423 build.Trigger.Changelist = ui.NewPatchLink(cl) 424 break 425 } 426 } 427 } 428 429 var regexRepoFromRecipeBundle = regexp.MustCompile(`/[^/]+\.googlesource\.com/.+$`) 430 431 // AddRecipeLink adds links to the recipe to the build. 432 func AddRecipeLink(build *ui.MiloBuildLegacy, tags strpair.Map) { 433 name := tags.Get("recipe_name") 434 repoURL := tags.Get("recipe_repository") 435 switch { 436 case name == "": 437 return 438 case repoURL == "": 439 // Was recipe_bundler-created CIPD package used? 440 repoURL = regexRepoFromRecipeBundle.FindString(tags.Get("recipe_package")) 441 if repoURL == "" { 442 return 443 } 444 // note that regex match will start with a slash, e.g., 445 // "/chromium.googlesource.com/infra/infra" 446 repoURL = "https:/" + repoURL // make it valid URL. 447 } 448 449 // We don't know location of recipes within the repo and getting that 450 // information is not trivial, so use code search, which is precise enough. 451 // TODO(nodir): load location from infra/config/recipes.cfg of the 452 // recipe_repository. 453 csHost := "cs.chromium.org" 454 repoURLParsed, _ := url.Parse(repoURL) 455 if repoURLParsed != nil && strings.Contains(repoURLParsed.Host, "internal") { 456 csHost = "cs.corp.google.com" 457 } 458 recipeURL := fmt.Sprintf("https://%s/search/?q=file:recipes/%s.py", csHost, name) 459 build.Summary.Recipe = ui.NewLink(name, recipeURL, fmt.Sprintf("recipe %s", name)) 460 } 461 462 // AddProjectInfo adds the luci_project swarming tag to the build. 463 func AddProjectInfo(build *ui.MiloBuildLegacy, tags strpair.Map) { 464 if proj := tags.Get("luci_project"); proj != "" { 465 if build.Trigger == nil { 466 build.Trigger = &ui.Trigger{} 467 } 468 build.Trigger.Project = proj 469 } 470 } 471 472 // addPendingTiming adds pending timing information to the build. 473 func addPendingTiming(c context.Context, build *ui.MiloBuildLegacy, sr *swarming.SwarmingRpcsTaskResult) { 474 created, err := time.Parse(SwarmingTimeLayout, sr.CreatedTs) 475 if err != nil { 476 return 477 } 478 build.Summary.PendingTime = ui.NewInterval(c, created, build.Summary.ExecutionTime.Started) 479 } 480 481 func addTaskToBuild(c context.Context, host string, sr *swarming.SwarmingRpcsTaskResult, build *ui.MiloBuildLegacy) error { 482 build.Summary.Label = ui.NewEmptyLink(sr.TaskId) 483 build.Summary.Type = ui.Recipe 484 build.Summary.Source = ui.NewLink( 485 "Task "+sr.TaskId, TaskPageURL(host, sr.TaskId).String(), 486 fmt.Sprintf("swarming task %s", sr.TaskId)) 487 488 // Extract more swarming specific information into the properties. 489 if props := taskProperties(sr); len(props.Property) > 0 { 490 build.PropertyGroup = append(build.PropertyGroup, props) 491 } 492 tags := strpair.ParseMap(sr.Tags) 493 494 addBuildsetInfo(build, tags) 495 AddBanner(build, tags) 496 addBuilderLink(c, build, tags) 497 AddRecipeLink(build, tags) 498 AddProjectInfo(build, tags) 499 addPendingTiming(c, build, sr) 500 501 // Add a link to the bot. 502 if sr.BotId != "" { 503 build.Summary.Bot = ui.NewLink(sr.BotId, botPageURL(host, sr.BotId), 504 fmt.Sprintf("swarming bot %s", sr.BotId)) 505 } 506 507 return nil 508 } 509 510 // streamsFromAnnotatedLog takes in an annotated log and returns a fully 511 // populated set of logdog streams 512 func streamsFromAnnotatedLog(ctx context.Context, log string) (*rawpresentation.Streams, error) { 513 scFake, c := streamclient.NewUnregisteredFake("") 514 p := annotee.New(ctx, annotee.Options{ 515 Client: c, 516 MetadataUpdateInterval: -1, // Neverrrrrr send incr updates. 517 Offline: true, 518 }) 519 520 is := annotee.Stream{ 521 Reader: bytes.NewBufferString(log), 522 Name: types.StreamName("stdout"), 523 Annotate: true, 524 StripAnnotations: true, 525 } 526 // If this ever has more than one stream then memoryClient needs to become 527 // goroutine safe 528 if err := p.RunStreams([]*annotee.Stream{&is}); err != nil { 529 return nil, err 530 } 531 p.Finish() 532 return parseAnnotations(scFake) 533 } 534 535 // failedToStart is called in the case where logdog-only mode is on but the 536 // stream doesn't exist and the swarming job is complete. It modifies the build 537 // to add information that would've otherwise been in the annotation stream. 538 func failedToStart(c context.Context, build *ui.MiloBuildLegacy, res *swarming.SwarmingRpcsTaskResult, host string) error { 539 build.Summary.Status = milostatus.InfraFailure 540 started, err := time.Parse(SwarmingTimeLayout, res.StartedTs) 541 if err != nil { 542 return err 543 } 544 ended, err := time.Parse(SwarmingTimeLayout, res.CompletedTs) 545 if err != nil { 546 return err 547 } 548 build.Summary.ExecutionTime = ui.NewInterval(c, started, ended) 549 infoComp := infoComponent(milostatus.InfraFailure, 550 "LogDog stream not found", "Job likely failed to start.") 551 infoComp.ExecutionTime = build.Summary.ExecutionTime 552 build.Components = append(build.Components, infoComp) 553 return addTaskToBuild(c, host, res, build) 554 } 555 556 // swarmingFetchMaybeLogs fetches the swarming task result. It also fetches 557 // the log iff the task is not a logdog enabled task. 558 func swarmingFetchMaybeLogs(c context.Context, svc swarmingService, taskID string) ( 559 *swarmingFetchResult, *types.StreamAddr, error) { 560 // Fetch the data from Swarming 561 var logDogStreamAddr *types.StreamAddr 562 563 fetchParams := swarmingFetchParams{ 564 fetchLog: true, 565 566 // Cancel if LogDog annotation stream parameters are present in the tag set. 567 taskResCallback: func(res *swarming.SwarmingRpcsTaskResult) (cancelLogs bool) { 568 // If the build hasn't started yet, then there is no LogDog log stream to 569 // render. 570 switch res.State { 571 case TaskPending, TaskExpired: 572 return false 573 574 case TaskCanceled, TaskKilled: 575 // If the task wasn't created, then it wasn't started. 576 if res.CreatedTs == "" { 577 return false 578 } 579 } 580 581 // The task started ... is it using LogDog for logging? 582 tags := swarmingTags(res.Tags) 583 584 var err error 585 if logDogStreamAddr, err = resolveLogDogStreamAddrFromTags(tags); err != nil { 586 logging.WithError(err).Debugf(c, "Not using LogDog annotation stream.") 587 return false 588 } 589 return true 590 }, 591 } 592 fr, err := swarmingFetch(c, svc, taskID, fetchParams) 593 return fr, logDogStreamAddr, err 594 } 595 596 // resolveLogDogStreamAddrFromTags returns a configured AnnotationStream given 597 // the tags swarming task's tags. 598 func resolveLogDogStreamAddrFromTags(tags map[string]string) (*types.StreamAddr, error) { 599 // If we don't have a LUCI project, abort. 600 luciProject, logLocation := tags["luci_project"], tags["log_location"] 601 switch { 602 case luciProject == "": 603 return nil, errors.New("no 'luci_project' tag") 604 case logLocation == "": 605 return nil, errors.New("no 'log_location' tag") 606 } 607 608 addr, err := types.ParseURL(logLocation) 609 if err != nil { 610 return nil, errors.Annotate(err, "could not parse LogDog stream from location").Err() 611 } 612 613 // The LogDog stream's project should match the LUCI project. 614 if string(addr.Project) != luciProject { 615 return nil, errors.Reason("stream project %q doesn't match LUCI project %q", addr.Project, luciProject).Err() 616 } 617 618 return addr, nil 619 } 620 621 // buildFromLogs returns a milo build from just the swarming log and result data. 622 // TODO(hinoka): Remove this once skia moves logging to logdog/kitchen. 623 func buildFromLogs(c context.Context, taskURL *url.URL, fr *swarmingFetchResult) (*ui.MiloBuildLegacy, error) { 624 var build ui.MiloBuildLegacy 625 var step *annopb.Step 626 627 // Decode the data using annotee. The logdog stream returned here is assumed 628 // to be consistent, which is why the following block of code are not 629 // expected to ever err out. 630 if fr.log != "" { 631 lds, err := streamsFromAnnotatedLog(c, fr.log) 632 if err != nil { 633 comp := infoComponent(milostatus.InfraFailure, "Milo annotation parser", err.Error()) 634 comp.SubLink = append(comp.SubLink, ui.LinkSet{ 635 ui.NewLink("swarming task", taskURL.String(), ""), 636 }) 637 build.Components = append(build.Components, comp) 638 } else if lds.MainStream != nil { 639 step = proto.Clone(lds.MainStream.Data).(*annopb.Step) 640 } 641 } 642 643 if err := addTaskToMiloStep(c, taskURL.Host, fr.res, step); err != nil { 644 return nil, err 645 } 646 647 // Log links are built relative to swarming URLs 648 id := taskURL.Query().Get("id") 649 ub := swarmingURLBuilder(id) 650 rawpresentation.AddLogDogToBuild(c, ub, step, &build) 651 652 addFailureSummary(&build) 653 654 err := addTaskToBuild(c, taskURL.Host, fr.res, &build) 655 return &build, err 656 } 657 658 // addFailureSummary adds failure summary information to the main status, 659 // derivied from individual steps. 660 func addFailureSummary(b *ui.MiloBuildLegacy) { 661 for _, comp := range b.Components { 662 // Add interesting information into the main summary text. 663 if comp.Status != milostatus.Success { 664 b.Summary.Text = append( 665 b.Summary.Text, fmt.Sprintf("%s %s", comp.Status, comp.Label)) 666 } 667 } 668 } 669 670 // SwarmingBuildImpl fetches data from Swarming and LogDog and produces a resp.MiloBuildLegacy 671 // representation of a build state given a Swarming TaskID. 672 func SwarmingBuildImpl(c context.Context, svc swarmingService, taskID string) (*ui.MiloBuildLegacy, error) { 673 // First, get the task result from swarming, and maybe the logs. 674 fr, logDogStreamAddr, err := swarmingFetchMaybeLogs(c, svc, taskID) 675 if err != nil { 676 return nil, err 677 } 678 swarmingResult := fr.res 679 680 // Legacy codepath - Annotations are encoded in the swarming log instead of LogDog. 681 // TODO(hinoka): Remove this once skia moves logging to logdog/kitchen. 682 if logDogStreamAddr == nil { 683 taskURL := TaskPageURL(svc.GetHost(), taskID) 684 return buildFromLogs(c, taskURL, fr) 685 } 686 687 // Create an empty build here first because we might want to add some 688 // system-level messages. 689 var build ui.MiloBuildLegacy 690 691 // Load the build from the LogDog service. For known classes of errors, add 692 // steps in the build presentation to explain what may be going on. 693 step, err := rawpresentation.ReadAnnotations(c, logDogStreamAddr) 694 switch errors.Unwrap(err) { 695 case coordinator.ErrNoSuchStream: 696 // The stream was not found. This could be due to one of two things: 697 // 1. The step just started and we're just waiting for the logs 698 // to propogage to logdog. 699 // 2. The bootstrap on the client failed, and never sent data to logdog. 700 // This would be evident because the swarming result would be a failure. 701 if swarmingResult.State == TaskCompleted { 702 err = failedToStart(c, &build, swarmingResult, svc.GetHost()) 703 return &build, err 704 } 705 logging.WithError(err).Errorf(c, "User cannot access stream.") 706 build.Components = append(build.Components, infoComponent(milostatus.Running, 707 "Waiting...", "waiting for annotation stream")) 708 709 case coordinator.ErrNoAccess: 710 logging.WithError(err).Errorf(c, "User cannot access stream.") 711 build.Components = append(build.Components, infoComponent(milostatus.Failure, 712 "No Access", "no access to annotation stream")) 713 case nil: 714 // continue 715 716 default: 717 logging.WithError(err).Errorf(c, "Failed to load LogDog annotation stream.") 718 build.Components = append(build.Components, infoComponent(milostatus.InfraFailure, 719 "Error", "failed to load annotation stream: "+err.Error())) 720 } 721 722 // Skip these steps if the LogDog stream doesn't exist. 723 // i.e. when the stream isn't ready yet, or errored out. 724 if step != nil { 725 // Milo Step Proto += Swarming Result Data 726 if err := addTaskToMiloStep(c, svc.GetHost(), swarmingResult, step); err != nil { 727 return nil, err 728 } 729 // Log links are linked directly to the logdog service. This is used when 730 // converting proto step data to resp build structs 731 ub := rawpresentation.NewURLBuilder(logDogStreamAddr) 732 rawpresentation.AddLogDogToBuild(c, ub, step, &build) 733 } 734 addFailureSummary(&build) 735 736 // Milo Resp Build += Swarming Result Data 737 // This is done for things in resp but not in step like the banner, buildset, 738 // recipe link, bot info, title, etc. 739 err = addTaskToBuild(c, svc.GetHost(), swarmingResult, &build) 740 return &build, err 741 } 742 743 // infoComponent is a helper function to return a resp build step with the 744 // given status, label, and step text. 745 func infoComponent(st milostatus.Status, label, text string) *ui.BuildComponent { 746 return &ui.BuildComponent{ 747 Type: ui.Summary, 748 Label: ui.NewEmptyLink(label), 749 Text: []string{text}, 750 Status: st, 751 } 752 } 753 754 // isAllowed checks if: 755 // 1. allow_milo:1 is present. If so, it's a public job. 756 // 2. luci_project is present, and if the logged in user has access to that project. 757 func isAllowed(c context.Context, tags []string) bool { 758 for _, t := range tags { 759 if t == "allow_milo:1" { 760 return true 761 } 762 } 763 for _, t := range tags { 764 if strings.HasPrefix(t, "luci_project:") { 765 sp := strings.SplitN(t, ":", 2) 766 if len(sp) != 2 { 767 return false 768 } 769 logging.Debugf(c, "Checking if user has access to %s", sp[1]) 770 // sp[1] is the project ID. 771 allowed, err := projectconfig.IsAllowed(c, sp[1]) 772 if err != nil { 773 logging.WithError(err).Errorf(c, "could not perform acl check") 774 return false 775 } 776 return allowed 777 } 778 } 779 return false 780 } 781 782 // TaskPageURL returns a URL to a human-consumable page of a swarming task. 783 // Supports host aliases. 784 func TaskPageURL(swarmingHostname, taskID string) *url.URL { 785 val := url.Values{} 786 val.Set("id", taskID) 787 val.Set("show_raw", "1") 788 val.Set("wide_logs", "true") 789 return &url.URL{ 790 Scheme: "https", 791 Host: swarmingHostname, 792 Path: "task", 793 RawQuery: val.Encode(), 794 } 795 } 796 797 // botPageURL returns a URL to a human-consumable page of a swarming bot. 798 // Supports host aliases. 799 func botPageURL(swarmingHostname, botID string) string { 800 return fmt.Sprintf("https://%s/restricted/bot/%s", swarmingHostname, botID) 801 } 802 803 // URLBase is the routing prefix for swarming endpoints. It's here so that it 804 // can be a constant between the swarmingURLBuilder and the frontend. 805 const URLBase = "/swarming/task" 806 807 // swarmingURLBuilder is a logdog.URLBuilder that builds Milo swarming log 808 // links. 809 // 810 // It should be the swarming task id. 811 type swarmingURLBuilder string 812 813 func (b swarmingURLBuilder) BuildLink(l *annopb.AnnotationLink) *ui.Link { 814 switch t := l.Value.(type) { 815 case *annopb.AnnotationLink_LogdogStream: 816 ls := t.LogdogStream 817 818 link := ui.NewLink(l.Label, fmt.Sprintf("%s/%s/%s", URLBase, b, ls.Name), "") 819 if link.Label == "" { 820 link.Label = ls.Name 821 } 822 link.AriaLabel = fmt.Sprintf("log link for %s", link.Label) 823 return link 824 825 case *annopb.AnnotationLink_Url: 826 return ui.NewLink(l.Label, t.Url, fmt.Sprintf("step link for %s", l.Label)) 827 828 default: 829 return nil 830 } 831 } 832 833 func swarmingTags(v []string) map[string]string { 834 res := make(map[string]string, len(v)) 835 for _, tag := range v { 836 var value string 837 parts := strings.SplitN(tag, ":", 2) 838 if len(parts) == 2 { 839 value = parts[1] 840 } 841 res[parts[0]] = value 842 } 843 return res 844 } 845 846 // BuildID is swarming's notion of a Build. See buildsource.ID. 847 type BuildID struct { 848 // (Required) The Swarming TaskID. 849 TaskID string 850 851 // (Optional) The Swarming host. If empty, will use the 852 // milo-instance-configured swarming host. 853 Host string 854 } 855 856 // getSwarmingHost returns default hostname if host is empty. 857 // If host is not empty and not allowed, returns an error. 858 func getSwarmingHost(c context.Context, host string) (string, error) { 859 settings := config.GetSettings(c) 860 if settings.Swarming == nil { 861 err := errors.New("swarming not in settings") 862 logging.WithError(err).Errorf(c, "Go configure swarming in the settings page.") 863 return "", err 864 } 865 866 if host == "" || host == settings.Swarming.DefaultHost { 867 return settings.Swarming.DefaultHost, nil 868 } 869 // If it is specified, validate the hostname. 870 for _, allowed := range settings.Swarming.AllowedHosts { 871 if host == allowed { 872 return host, nil 873 } 874 } 875 return "", errors.New("unknown swarming host", grpcutil.InvalidArgumentTag) 876 } 877 878 // GetBuild returns a milo build from a swarming task id. 879 func GetBuild(c context.Context, host, taskID string) (*ui.MiloBuildLegacy, error) { 880 if taskID == "" { 881 return nil, errors.New("no swarming task id", grpcutil.InvalidArgumentTag) 882 } 883 884 sf, err := newProdService(c, host) 885 if err != nil { 886 return nil, err 887 } 888 defer sf.Close() 889 890 return SwarmingBuildImpl(c, sf, taskID) 891 } 892 893 // RedirectsFromTask returns either 894 // - The ID of the buildbucket build corresponding to this task. OR 895 // - The build.proto logdog stream from this swarming task. 896 // 897 // If the task does not represent a buildbucket build, returns (0, "", nil). 898 func RedirectsFromTask(c context.Context, host, taskID string) (int64, string, error) { 899 sf, err := newProdService(c, host) 900 if err != nil { 901 return 0, "", err 902 } 903 defer sf.Close() 904 905 res, err := sf.client.Task.Request(taskID).Context(c).Do() 906 switch err := err.(type) { 907 case *googleapi.Error: 908 switch err.Code { 909 case http.StatusNotFound: 910 return 0, "", errors.Annotate(err, "task %s/%s not found", host, taskID).Tag(grpcutil.NotFoundTag).Err() 911 case http.StatusBadRequest: 912 return 0, "", errors.Annotate(err, "bad request").Tag(grpcutil.InvalidArgumentTag).Err() 913 } 914 case error: 915 return 0, "", err 916 } 917 918 for _, t := range res.Tags { 919 const bbPrefix = "buildbucket_build_id:" 920 if strings.HasPrefix(t, bbPrefix) { 921 value := t[len(bbPrefix):] 922 id, err := strconv.ParseInt(value, 10, 64) 923 if err != nil { 924 logging.Errorf(c, "failed to parse buildbucket_build_id tag %q as int64: %s", value, err) 925 return 0, "", nil 926 } 927 return id, "", nil 928 } 929 930 const ldPrefix = "log_location:" 931 if strings.HasPrefix(t, ldPrefix) { 932 url := t[len(ldPrefix):] 933 url = strings.TrimPrefix(url, "logdog://") 934 return 0, url, nil 935 } 936 } 937 return 0, "", nil 938 }