go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/milo/internal/buildsource/swarming/build.go (about)

     1  // Copyright 2015 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package swarming
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"net/http"
    22  	"net/url"
    23  	"regexp"
    24  	"strconv"
    25  	"strings"
    26  	"time"
    27  
    28  	"google.golang.org/api/googleapi"
    29  	"google.golang.org/protobuf/proto"
    30  	"google.golang.org/protobuf/types/known/timestamppb"
    31  
    32  	buildbucketpb "go.chromium.org/luci/buildbucket/proto"
    33  	"go.chromium.org/luci/buildbucket/protoutil"
    34  	bbv1 "go.chromium.org/luci/common/api/buildbucket/buildbucket/v1"
    35  	swarming "go.chromium.org/luci/common/api/swarming/swarming/v1"
    36  	"go.chromium.org/luci/common/data/strpair"
    37  	"go.chromium.org/luci/common/errors"
    38  	"go.chromium.org/luci/common/logging"
    39  	"go.chromium.org/luci/common/sync/parallel"
    40  	"go.chromium.org/luci/grpc/grpcutil"
    41  	"go.chromium.org/luci/logdog/client/butlerlib/streamclient"
    42  	"go.chromium.org/luci/logdog/client/coordinator"
    43  	"go.chromium.org/luci/logdog/common/types"
    44  	"go.chromium.org/luci/luciexe/legacy/annotee"
    45  	annopb "go.chromium.org/luci/luciexe/legacy/annotee/proto"
    46  	"go.chromium.org/luci/milo/frontend/ui"
    47  	"go.chromium.org/luci/milo/internal/buildsource/rawpresentation"
    48  	"go.chromium.org/luci/milo/internal/config"
    49  	"go.chromium.org/luci/milo/internal/model/milostatus"
    50  	"go.chromium.org/luci/milo/internal/projectconfig"
    51  	"go.chromium.org/luci/server/auth"
    52  )
    53  
    54  // swarmingService is an interface that fetches data from Swarming.
    55  //
    56  // In production, this is fetched from a Swarming host. For testing, this can
    57  // be replaced with a mock.
    58  type swarmingService interface {
    59  	GetHost() string
    60  	GetSwarmingResult(c context.Context, taskID string) (*swarming.SwarmingRpcsTaskResult, error)
    61  	GetSwarmingRequest(c context.Context, taskID string) (*swarming.SwarmingRpcsTaskRequest, error)
    62  	GetTaskOutput(c context.Context, taskID string) (string, error)
    63  	Close()
    64  }
    65  
    66  // ErrNotMiloJob is returned if a Swarming task is fetched that does not self-
    67  // identify as a Milo job.
    68  var ErrNotMiloJob = errors.New("Not a Milo Job or access denied", grpcutil.PermissionDeniedTag)
    69  
    70  // SwarmingTimeLayout is time layout used by swarming.
    71  const SwarmingTimeLayout = "2006-01-02T15:04:05.999999999"
    72  
    73  // Swarming task states..
    74  const (
    75  	// TaskRunning means task is running.
    76  	TaskRunning = "RUNNING"
    77  	// TaskPending means task didn't start yet.
    78  	TaskPending = "PENDING"
    79  	// TaskExpired means task expired and did not start.
    80  	TaskExpired = "EXPIRED"
    81  	// TaskTimedOut means task started, but took too long.
    82  	TaskTimedOut = "TIMED_OUT"
    83  	// TaskBotDied means task started but bot died.
    84  	TaskBotDied = "BOT_DIED"
    85  	// TaskCanceled means the task was canceled. See CompletedTs to determine whether it was started.
    86  	TaskCanceled = "CANCELED"
    87  	// TaskKill means the task was canceled. See CompletedTs to determine whether it was started.
    88  	TaskKilled = "KILLED"
    89  	// TaskCompleted means task is complete.
    90  	TaskCompleted = "COMPLETED"
    91  	// TaskNoResource means there was not enough capacity when scheduled, so the
    92  	// task failed immediately.
    93  	TaskNoResource = "NO_RESOURCE"
    94  	// TaskClientError means that the client has caused a task to error
    95  	TaskClientError = "CLIENT_ERROR"
    96  )
    97  
    98  func getSwarmingClient(c context.Context, host string) (*swarming.Service, error) {
    99  	t, err := auth.GetRPCTransport(c, auth.AsSelf)
   100  	if err != nil {
   101  		return nil, err
   102  	}
   103  	sc, err := swarming.New(&http.Client{Transport: t})
   104  	if err != nil {
   105  		return nil, err
   106  	}
   107  	sc.BasePath = fmt.Sprintf("https://%s/_ah/api/swarming/v1/", host)
   108  	return sc, nil
   109  }
   110  
   111  type prodSwarmingService struct {
   112  	host   string
   113  	client *swarming.Service
   114  	cancel func()
   115  }
   116  
   117  func newProdService(c context.Context, host string) (*prodSwarmingService, error) {
   118  	host, err := getSwarmingHost(c, host)
   119  	if err != nil {
   120  		return nil, err
   121  	}
   122  
   123  	c, cancel := context.WithTimeout(c, 60*time.Second)
   124  
   125  	client, err := getSwarmingClient(c, host)
   126  	if err != nil {
   127  		cancel()
   128  		return nil, err
   129  	}
   130  
   131  	return &prodSwarmingService{
   132  		host:   host,
   133  		client: client,
   134  		cancel: cancel,
   135  	}, nil
   136  }
   137  
   138  func (svc *prodSwarmingService) GetHost() string { return svc.host }
   139  
   140  func (svc *prodSwarmingService) GetSwarmingResult(c context.Context, taskID string) (*swarming.SwarmingRpcsTaskResult, error) {
   141  	return svc.client.Task.Result(taskID).Context(c).Do()
   142  }
   143  
   144  func (svc *prodSwarmingService) GetTaskOutput(c context.Context, taskID string) (string, error) {
   145  	stdout, err := svc.client.Task.Stdout(taskID).Context(c).Do()
   146  	if err != nil {
   147  		return "", err
   148  	}
   149  	return stdout.Output, nil
   150  }
   151  
   152  func (svc *prodSwarmingService) GetSwarmingRequest(c context.Context, taskID string) (*swarming.SwarmingRpcsTaskRequest, error) {
   153  	return svc.client.Task.Request(taskID).Context(c).Do()
   154  }
   155  
   156  func (svc *prodSwarmingService) Close() {
   157  	svc.cancel()
   158  }
   159  
   160  type swarmingFetchParams struct {
   161  	fetchLog bool
   162  
   163  	// taskResCallback, if not nil, is a callback that will be invoked after
   164  	// fetching the result. It will be passed a key/value map
   165  	// of the Swarming result's tags.
   166  	//
   167  	// If taskResCallback returns true, any pending log fetch will be canceled
   168  	// without error.
   169  	taskResCallback func(*swarming.SwarmingRpcsTaskResult) bool
   170  }
   171  
   172  type swarmingFetchResult struct {
   173  	res *swarming.SwarmingRpcsTaskResult
   174  
   175  	// log is the log data content. If no log data was fetched, this will empty.
   176  	// If the log fetch was canceled, this is undefined.
   177  	log string
   178  }
   179  
   180  // swarmingFetch fetches (in parallel) the components that it is configured to
   181  // fetch.
   182  //
   183  // After fetching, an ACL check is performed to confirm that the user is
   184  // permitted to view the resulting data. If this check fails, get returns
   185  // errNotMiloJob.
   186  func swarmingFetch(c context.Context, svc swarmingService, taskID string, req swarmingFetchParams) (
   187  	*swarmingFetchResult, error) {
   188  
   189  	// logErr is managed separately from other fetch errors, since in some
   190  	// situations it's acceptable to not have a log stream.
   191  	var logErr error
   192  	var fr swarmingFetchResult
   193  
   194  	// Special Context to enable the cancellation of log fetching.
   195  	logsCanceled := false
   196  	logCtx, cancelLogs := context.WithCancel(c)
   197  	defer cancelLogs()
   198  
   199  	err := parallel.FanOutIn(func(workC chan<- func() error) {
   200  		workC <- func() (err error) {
   201  			if fr.res, err = svc.GetSwarmingResult(c, taskID); err == nil {
   202  				if req.taskResCallback != nil && req.taskResCallback(fr.res) {
   203  					logsCanceled = true
   204  					cancelLogs()
   205  				}
   206  			} else if ierr, ok := err.(*googleapi.Error); ok {
   207  				switch ierr.Code {
   208  				case http.StatusNotFound:
   209  					err = errors.Annotate(ierr, "not found on swarming").Tag(grpcutil.NotFoundTag).Err()
   210  				case http.StatusBadRequest:
   211  					err = errors.Annotate(ierr, "bad request").Tag(grpcutil.InvalidArgumentTag).Err()
   212  				}
   213  			}
   214  			return
   215  		}
   216  
   217  		if req.fetchLog {
   218  			workC <- func() error {
   219  				// Note: we're using the log Context here so we can cancel log fetch
   220  				// explicitly.
   221  				fr.log, logErr = svc.GetTaskOutput(logCtx, taskID)
   222  				return nil
   223  			}
   224  		}
   225  	})
   226  	if err != nil {
   227  		return nil, err
   228  	}
   229  
   230  	// Current ACL implementation:
   231  	// If allow_milo:1 is present, it is a public job.  Don't bother with ACL check.
   232  	// If it is not present, check the luci_project tag, and see if user is allowed
   233  	// to access said project.
   234  	if !isAllowed(c, fr.res.Tags) {
   235  		return nil, ErrNotMiloJob
   236  	}
   237  
   238  	if logErr != nil {
   239  		switch fr.res.State {
   240  		case TaskCompleted, TaskRunning, TaskCanceled, TaskKilled, TaskNoResource:
   241  		default:
   242  			//  Ignore log errors if the task might be pending, timed out, expired, etc.
   243  			if err != nil {
   244  				fr.log = ""
   245  				logErr = nil
   246  			}
   247  		}
   248  	}
   249  
   250  	// If we explicitly canceled logs, everything is OK.
   251  	if logErr == context.Canceled && logsCanceled {
   252  		logErr = nil
   253  	}
   254  	return &fr, logErr
   255  }
   256  
   257  func taskProperties(sr *swarming.SwarmingRpcsTaskResult) *ui.PropertyGroup {
   258  	props := &ui.PropertyGroup{GroupName: "Swarming"}
   259  	if len(sr.CostsUsd) == 1 {
   260  		props.Property = append(props.Property, &ui.Property{
   261  			Key:   "Cost of job (USD)",
   262  			Value: fmt.Sprintf("$%.2f", sr.CostsUsd[0]),
   263  		})
   264  	}
   265  	if sr.State == TaskCompleted || sr.State == TaskTimedOut {
   266  		props.Property = append(props.Property, &ui.Property{
   267  			Key:   "Exit Code",
   268  			Value: fmt.Sprintf("%d", sr.ExitCode),
   269  		})
   270  	}
   271  	return props
   272  }
   273  
   274  // addBuilderLink adds a link to the buildbucket builder view.
   275  func addBuilderLink(c context.Context, build *ui.MiloBuildLegacy, tags strpair.Map) {
   276  	bucket := tags.Get("buildbucket_bucket")
   277  	builder := tags.Get("builder")
   278  	project := tags.Get("luci_project")
   279  	if bucket != "" && builder != "" {
   280  		builderParts := strings.Split(builder, "/")
   281  		builder = builderParts[len(builderParts)-1]
   282  		build.Summary.ParentLabel = ui.NewLink(
   283  			builder, fmt.Sprintf("/p/%s/builders/%s/%s", project, bucket, builder),
   284  			fmt.Sprintf("buildbucket builder %s on bucket %s", builder, bucket))
   285  	}
   286  }
   287  
   288  // AddBanner adds an OS banner derived from "os" swarming tag, if present.
   289  func AddBanner(build *ui.MiloBuildLegacy, tags strpair.Map) {
   290  	os := tags.Get("os")
   291  	parts := strings.SplitN(os, "-", 2)
   292  	var ver string
   293  	if len(parts) == 2 {
   294  		os = parts[0]
   295  		ver = parts[1]
   296  	}
   297  
   298  	var base ui.LogoBase
   299  	switch os {
   300  	case "Ubuntu":
   301  		base = ui.Ubuntu
   302  	case "Windows":
   303  		base = ui.Windows
   304  	case "Mac":
   305  		base = ui.OSX
   306  	case "Android":
   307  		base = ui.Android
   308  	default:
   309  		return
   310  	}
   311  	build.Summary.Banner = &ui.LogoBanner{
   312  		OS: []ui.Logo{{
   313  			LogoBase: base,
   314  			Subtitle: ver,
   315  			Count:    1,
   316  		}},
   317  	}
   318  }
   319  
   320  // addTaskToMiloStep augments a Milo Annotation Protobuf with state from the
   321  // Swarming task.
   322  func addTaskToMiloStep(c context.Context, host string, sr *swarming.SwarmingRpcsTaskResult, step *annopb.Step) error {
   323  	step.Link = &annopb.AnnotationLink{
   324  		Label: "Task " + sr.TaskId,
   325  		Value: &annopb.AnnotationLink_Url{
   326  			Url: TaskPageURL(host, sr.TaskId).String(),
   327  		},
   328  	}
   329  
   330  	switch sr.State {
   331  	case TaskRunning:
   332  		step.Status = annopb.Status_RUNNING
   333  
   334  	case TaskPending:
   335  		step.Status = annopb.Status_PENDING
   336  
   337  	case TaskExpired, TaskTimedOut, TaskBotDied, TaskClientError:
   338  		step.Status = annopb.Status_FAILURE
   339  
   340  		switch sr.State {
   341  		case TaskExpired:
   342  			step.FailureDetails = &annopb.FailureDetails{
   343  				Type: annopb.FailureDetails_EXPIRED,
   344  				Text: "Task expired",
   345  			}
   346  		case TaskTimedOut:
   347  			step.FailureDetails = &annopb.FailureDetails{
   348  				Type: annopb.FailureDetails_INFRA,
   349  				Text: "Task timed out",
   350  			}
   351  		case TaskBotDied:
   352  			step.FailureDetails = &annopb.FailureDetails{
   353  				Type: annopb.FailureDetails_INFRA,
   354  				Text: "Bot died",
   355  			}
   356  		case TaskClientError:
   357  			step.FailureDetails = &annopb.FailureDetails{
   358  				Type: annopb.FailureDetails_INFRA,
   359  				Text: "Client error",
   360  			}
   361  		}
   362  
   363  	case TaskCanceled, TaskKilled:
   364  		// Canceled build is user action, so it is not an infra failure.
   365  		step.Status = annopb.Status_FAILURE
   366  		step.FailureDetails = &annopb.FailureDetails{
   367  			Type: annopb.FailureDetails_CANCELLED,
   368  			Text: "Task canceled by user",
   369  		}
   370  
   371  	case TaskNoResource:
   372  		step.Status = annopb.Status_FAILURE
   373  		step.FailureDetails = &annopb.FailureDetails{
   374  			Type: annopb.FailureDetails_EXPIRED,
   375  			Text: "No resource available on Swarming",
   376  		}
   377  
   378  	case TaskCompleted:
   379  
   380  		switch {
   381  		case sr.InternalFailure:
   382  			step.Status = annopb.Status_FAILURE
   383  			step.FailureDetails = &annopb.FailureDetails{
   384  				Type: annopb.FailureDetails_INFRA,
   385  			}
   386  
   387  		case sr.Failure:
   388  			step.Status = annopb.Status_FAILURE
   389  
   390  		default:
   391  			step.Status = annopb.Status_SUCCESS
   392  		}
   393  
   394  	default:
   395  		return fmt.Errorf("unknown swarming task state %q", sr.State)
   396  	}
   397  
   398  	// Compute start and finished times.
   399  	if sr.StartedTs != "" {
   400  		ts, err := time.Parse(SwarmingTimeLayout, sr.StartedTs)
   401  		if err != nil {
   402  			return fmt.Errorf("invalid task StartedTs: %s", err)
   403  		}
   404  		step.Started = timestamppb.New(ts)
   405  	}
   406  	if sr.CompletedTs != "" {
   407  		ts, err := time.Parse(SwarmingTimeLayout, sr.CompletedTs)
   408  		if err != nil {
   409  			return fmt.Errorf("invalid task CompletedTs: %s", err)
   410  		}
   411  		step.Ended = timestamppb.New(ts)
   412  	}
   413  
   414  	return nil
   415  }
   416  
   417  func addBuildsetInfo(build *ui.MiloBuildLegacy, tags strpair.Map) {
   418  	for _, bs := range tags[bbv1.TagBuildSet] {
   419  		if cl, ok := protoutil.ParseBuildSet(bs).(*buildbucketpb.GerritChange); ok {
   420  			if build.Trigger == nil {
   421  				build.Trigger = &ui.Trigger{}
   422  			}
   423  			build.Trigger.Changelist = ui.NewPatchLink(cl)
   424  			break
   425  		}
   426  	}
   427  }
   428  
   429  var regexRepoFromRecipeBundle = regexp.MustCompile(`/[^/]+\.googlesource\.com/.+$`)
   430  
   431  // AddRecipeLink adds links to the recipe to the build.
   432  func AddRecipeLink(build *ui.MiloBuildLegacy, tags strpair.Map) {
   433  	name := tags.Get("recipe_name")
   434  	repoURL := tags.Get("recipe_repository")
   435  	switch {
   436  	case name == "":
   437  		return
   438  	case repoURL == "":
   439  		// Was recipe_bundler-created CIPD package used?
   440  		repoURL = regexRepoFromRecipeBundle.FindString(tags.Get("recipe_package"))
   441  		if repoURL == "" {
   442  			return
   443  		}
   444  		// note that regex match will start with a slash, e.g.,
   445  		// "/chromium.googlesource.com/infra/infra"
   446  		repoURL = "https:/" + repoURL // make it valid URL.
   447  	}
   448  
   449  	// We don't know location of recipes within the repo and getting that
   450  	// information is not trivial, so use code search, which is precise enough.
   451  	// TODO(nodir): load location from infra/config/recipes.cfg of the
   452  	// recipe_repository.
   453  	csHost := "cs.chromium.org"
   454  	repoURLParsed, _ := url.Parse(repoURL)
   455  	if repoURLParsed != nil && strings.Contains(repoURLParsed.Host, "internal") {
   456  		csHost = "cs.corp.google.com"
   457  	}
   458  	recipeURL := fmt.Sprintf("https://%s/search/?q=file:recipes/%s.py", csHost, name)
   459  	build.Summary.Recipe = ui.NewLink(name, recipeURL, fmt.Sprintf("recipe %s", name))
   460  }
   461  
   462  // AddProjectInfo adds the luci_project swarming tag to the build.
   463  func AddProjectInfo(build *ui.MiloBuildLegacy, tags strpair.Map) {
   464  	if proj := tags.Get("luci_project"); proj != "" {
   465  		if build.Trigger == nil {
   466  			build.Trigger = &ui.Trigger{}
   467  		}
   468  		build.Trigger.Project = proj
   469  	}
   470  }
   471  
   472  // addPendingTiming adds pending timing information to the build.
   473  func addPendingTiming(c context.Context, build *ui.MiloBuildLegacy, sr *swarming.SwarmingRpcsTaskResult) {
   474  	created, err := time.Parse(SwarmingTimeLayout, sr.CreatedTs)
   475  	if err != nil {
   476  		return
   477  	}
   478  	build.Summary.PendingTime = ui.NewInterval(c, created, build.Summary.ExecutionTime.Started)
   479  }
   480  
   481  func addTaskToBuild(c context.Context, host string, sr *swarming.SwarmingRpcsTaskResult, build *ui.MiloBuildLegacy) error {
   482  	build.Summary.Label = ui.NewEmptyLink(sr.TaskId)
   483  	build.Summary.Type = ui.Recipe
   484  	build.Summary.Source = ui.NewLink(
   485  		"Task "+sr.TaskId, TaskPageURL(host, sr.TaskId).String(),
   486  		fmt.Sprintf("swarming task %s", sr.TaskId))
   487  
   488  	// Extract more swarming specific information into the properties.
   489  	if props := taskProperties(sr); len(props.Property) > 0 {
   490  		build.PropertyGroup = append(build.PropertyGroup, props)
   491  	}
   492  	tags := strpair.ParseMap(sr.Tags)
   493  
   494  	addBuildsetInfo(build, tags)
   495  	AddBanner(build, tags)
   496  	addBuilderLink(c, build, tags)
   497  	AddRecipeLink(build, tags)
   498  	AddProjectInfo(build, tags)
   499  	addPendingTiming(c, build, sr)
   500  
   501  	// Add a link to the bot.
   502  	if sr.BotId != "" {
   503  		build.Summary.Bot = ui.NewLink(sr.BotId, botPageURL(host, sr.BotId),
   504  			fmt.Sprintf("swarming bot %s", sr.BotId))
   505  	}
   506  
   507  	return nil
   508  }
   509  
   510  // streamsFromAnnotatedLog takes in an annotated log and returns a fully
   511  // populated set of logdog streams
   512  func streamsFromAnnotatedLog(ctx context.Context, log string) (*rawpresentation.Streams, error) {
   513  	scFake, c := streamclient.NewUnregisteredFake("")
   514  	p := annotee.New(ctx, annotee.Options{
   515  		Client:                 c,
   516  		MetadataUpdateInterval: -1, // Neverrrrrr send incr updates.
   517  		Offline:                true,
   518  	})
   519  
   520  	is := annotee.Stream{
   521  		Reader:           bytes.NewBufferString(log),
   522  		Name:             types.StreamName("stdout"),
   523  		Annotate:         true,
   524  		StripAnnotations: true,
   525  	}
   526  	// If this ever has more than one stream then memoryClient needs to become
   527  	// goroutine safe
   528  	if err := p.RunStreams([]*annotee.Stream{&is}); err != nil {
   529  		return nil, err
   530  	}
   531  	p.Finish()
   532  	return parseAnnotations(scFake)
   533  }
   534  
   535  // failedToStart is called in the case where logdog-only mode is on but the
   536  // stream doesn't exist and the swarming job is complete.  It modifies the build
   537  // to add information that would've otherwise been in the annotation stream.
   538  func failedToStart(c context.Context, build *ui.MiloBuildLegacy, res *swarming.SwarmingRpcsTaskResult, host string) error {
   539  	build.Summary.Status = milostatus.InfraFailure
   540  	started, err := time.Parse(SwarmingTimeLayout, res.StartedTs)
   541  	if err != nil {
   542  		return err
   543  	}
   544  	ended, err := time.Parse(SwarmingTimeLayout, res.CompletedTs)
   545  	if err != nil {
   546  		return err
   547  	}
   548  	build.Summary.ExecutionTime = ui.NewInterval(c, started, ended)
   549  	infoComp := infoComponent(milostatus.InfraFailure,
   550  		"LogDog stream not found", "Job likely failed to start.")
   551  	infoComp.ExecutionTime = build.Summary.ExecutionTime
   552  	build.Components = append(build.Components, infoComp)
   553  	return addTaskToBuild(c, host, res, build)
   554  }
   555  
   556  // swarmingFetchMaybeLogs fetches the swarming task result.  It also fetches
   557  // the log iff the task is not a logdog enabled task.
   558  func swarmingFetchMaybeLogs(c context.Context, svc swarmingService, taskID string) (
   559  	*swarmingFetchResult, *types.StreamAddr, error) {
   560  	// Fetch the data from Swarming
   561  	var logDogStreamAddr *types.StreamAddr
   562  
   563  	fetchParams := swarmingFetchParams{
   564  		fetchLog: true,
   565  
   566  		// Cancel if LogDog annotation stream parameters are present in the tag set.
   567  		taskResCallback: func(res *swarming.SwarmingRpcsTaskResult) (cancelLogs bool) {
   568  			// If the build hasn't started yet, then there is no LogDog log stream to
   569  			// render.
   570  			switch res.State {
   571  			case TaskPending, TaskExpired:
   572  				return false
   573  
   574  			case TaskCanceled, TaskKilled:
   575  				// If the task wasn't created, then it wasn't started.
   576  				if res.CreatedTs == "" {
   577  					return false
   578  				}
   579  			}
   580  
   581  			// The task started ... is it using LogDog for logging?
   582  			tags := swarmingTags(res.Tags)
   583  
   584  			var err error
   585  			if logDogStreamAddr, err = resolveLogDogStreamAddrFromTags(tags); err != nil {
   586  				logging.WithError(err).Debugf(c, "Not using LogDog annotation stream.")
   587  				return false
   588  			}
   589  			return true
   590  		},
   591  	}
   592  	fr, err := swarmingFetch(c, svc, taskID, fetchParams)
   593  	return fr, logDogStreamAddr, err
   594  }
   595  
   596  // resolveLogDogStreamAddrFromTags returns a configured AnnotationStream given
   597  // the tags swarming task's tags.
   598  func resolveLogDogStreamAddrFromTags(tags map[string]string) (*types.StreamAddr, error) {
   599  	// If we don't have a LUCI project, abort.
   600  	luciProject, logLocation := tags["luci_project"], tags["log_location"]
   601  	switch {
   602  	case luciProject == "":
   603  		return nil, errors.New("no 'luci_project' tag")
   604  	case logLocation == "":
   605  		return nil, errors.New("no 'log_location' tag")
   606  	}
   607  
   608  	addr, err := types.ParseURL(logLocation)
   609  	if err != nil {
   610  		return nil, errors.Annotate(err, "could not parse LogDog stream from location").Err()
   611  	}
   612  
   613  	// The LogDog stream's project should match the LUCI project.
   614  	if string(addr.Project) != luciProject {
   615  		return nil, errors.Reason("stream project %q doesn't match LUCI project %q", addr.Project, luciProject).Err()
   616  	}
   617  
   618  	return addr, nil
   619  }
   620  
   621  // buildFromLogs returns a milo build from just the swarming log and result data.
   622  // TODO(hinoka): Remove this once skia moves logging to logdog/kitchen.
   623  func buildFromLogs(c context.Context, taskURL *url.URL, fr *swarmingFetchResult) (*ui.MiloBuildLegacy, error) {
   624  	var build ui.MiloBuildLegacy
   625  	var step *annopb.Step
   626  
   627  	// Decode the data using annotee. The logdog stream returned here is assumed
   628  	// to be consistent, which is why the following block of code are not
   629  	// expected to ever err out.
   630  	if fr.log != "" {
   631  		lds, err := streamsFromAnnotatedLog(c, fr.log)
   632  		if err != nil {
   633  			comp := infoComponent(milostatus.InfraFailure, "Milo annotation parser", err.Error())
   634  			comp.SubLink = append(comp.SubLink, ui.LinkSet{
   635  				ui.NewLink("swarming task", taskURL.String(), ""),
   636  			})
   637  			build.Components = append(build.Components, comp)
   638  		} else if lds.MainStream != nil {
   639  			step = proto.Clone(lds.MainStream.Data).(*annopb.Step)
   640  		}
   641  	}
   642  
   643  	if err := addTaskToMiloStep(c, taskURL.Host, fr.res, step); err != nil {
   644  		return nil, err
   645  	}
   646  
   647  	// Log links are built relative to swarming URLs
   648  	id := taskURL.Query().Get("id")
   649  	ub := swarmingURLBuilder(id)
   650  	rawpresentation.AddLogDogToBuild(c, ub, step, &build)
   651  
   652  	addFailureSummary(&build)
   653  
   654  	err := addTaskToBuild(c, taskURL.Host, fr.res, &build)
   655  	return &build, err
   656  }
   657  
   658  // addFailureSummary adds failure summary information to the main status,
   659  // derivied from individual steps.
   660  func addFailureSummary(b *ui.MiloBuildLegacy) {
   661  	for _, comp := range b.Components {
   662  		// Add interesting information into the main summary text.
   663  		if comp.Status != milostatus.Success {
   664  			b.Summary.Text = append(
   665  				b.Summary.Text, fmt.Sprintf("%s %s", comp.Status, comp.Label))
   666  		}
   667  	}
   668  }
   669  
   670  // SwarmingBuildImpl fetches data from Swarming and LogDog and produces a resp.MiloBuildLegacy
   671  // representation of a build state given a Swarming TaskID.
   672  func SwarmingBuildImpl(c context.Context, svc swarmingService, taskID string) (*ui.MiloBuildLegacy, error) {
   673  	// First, get the task result from swarming, and maybe the logs.
   674  	fr, logDogStreamAddr, err := swarmingFetchMaybeLogs(c, svc, taskID)
   675  	if err != nil {
   676  		return nil, err
   677  	}
   678  	swarmingResult := fr.res
   679  
   680  	// Legacy codepath - Annotations are encoded in the swarming log instead of LogDog.
   681  	// TODO(hinoka): Remove this once skia moves logging to logdog/kitchen.
   682  	if logDogStreamAddr == nil {
   683  		taskURL := TaskPageURL(svc.GetHost(), taskID)
   684  		return buildFromLogs(c, taskURL, fr)
   685  	}
   686  
   687  	// Create an empty build here first because we might want to add some
   688  	// system-level messages.
   689  	var build ui.MiloBuildLegacy
   690  
   691  	// Load the build from the LogDog service.  For known classes of errors, add
   692  	// steps in the build presentation to explain what may be going on.
   693  	step, err := rawpresentation.ReadAnnotations(c, logDogStreamAddr)
   694  	switch errors.Unwrap(err) {
   695  	case coordinator.ErrNoSuchStream:
   696  		// The stream was not found.  This could be due to one of two things:
   697  		// 1. The step just started and we're just waiting for the logs
   698  		// to propogage to logdog.
   699  		// 2. The bootstrap on the client failed, and never sent data to logdog.
   700  		// This would be evident because the swarming result would be a failure.
   701  		if swarmingResult.State == TaskCompleted {
   702  			err = failedToStart(c, &build, swarmingResult, svc.GetHost())
   703  			return &build, err
   704  		}
   705  		logging.WithError(err).Errorf(c, "User cannot access stream.")
   706  		build.Components = append(build.Components, infoComponent(milostatus.Running,
   707  			"Waiting...", "waiting for annotation stream"))
   708  
   709  	case coordinator.ErrNoAccess:
   710  		logging.WithError(err).Errorf(c, "User cannot access stream.")
   711  		build.Components = append(build.Components, infoComponent(milostatus.Failure,
   712  			"No Access", "no access to annotation stream"))
   713  	case nil:
   714  		// continue
   715  
   716  	default:
   717  		logging.WithError(err).Errorf(c, "Failed to load LogDog annotation stream.")
   718  		build.Components = append(build.Components, infoComponent(milostatus.InfraFailure,
   719  			"Error", "failed to load annotation stream: "+err.Error()))
   720  	}
   721  
   722  	// Skip these steps if the LogDog stream doesn't exist.
   723  	// i.e. when the stream isn't ready yet, or errored out.
   724  	if step != nil {
   725  		// Milo Step Proto += Swarming Result Data
   726  		if err := addTaskToMiloStep(c, svc.GetHost(), swarmingResult, step); err != nil {
   727  			return nil, err
   728  		}
   729  		// Log links are linked directly to the logdog service.  This is used when
   730  		// converting proto step data to resp build structs
   731  		ub := rawpresentation.NewURLBuilder(logDogStreamAddr)
   732  		rawpresentation.AddLogDogToBuild(c, ub, step, &build)
   733  	}
   734  	addFailureSummary(&build)
   735  
   736  	// Milo Resp Build += Swarming Result Data
   737  	// This is done for things in resp but not in step like the banner, buildset,
   738  	// recipe link, bot info, title, etc.
   739  	err = addTaskToBuild(c, svc.GetHost(), swarmingResult, &build)
   740  	return &build, err
   741  }
   742  
   743  // infoComponent is a helper function to return a resp build step with the
   744  // given status, label, and step text.
   745  func infoComponent(st milostatus.Status, label, text string) *ui.BuildComponent {
   746  	return &ui.BuildComponent{
   747  		Type:   ui.Summary,
   748  		Label:  ui.NewEmptyLink(label),
   749  		Text:   []string{text},
   750  		Status: st,
   751  	}
   752  }
   753  
   754  // isAllowed checks if:
   755  // 1. allow_milo:1 is present.  If so, it's a public job.
   756  // 2. luci_project is present, and if the logged in user has access to that project.
   757  func isAllowed(c context.Context, tags []string) bool {
   758  	for _, t := range tags {
   759  		if t == "allow_milo:1" {
   760  			return true
   761  		}
   762  	}
   763  	for _, t := range tags {
   764  		if strings.HasPrefix(t, "luci_project:") {
   765  			sp := strings.SplitN(t, ":", 2)
   766  			if len(sp) != 2 {
   767  				return false
   768  			}
   769  			logging.Debugf(c, "Checking if user has access to %s", sp[1])
   770  			// sp[1] is the project ID.
   771  			allowed, err := projectconfig.IsAllowed(c, sp[1])
   772  			if err != nil {
   773  				logging.WithError(err).Errorf(c, "could not perform acl check")
   774  				return false
   775  			}
   776  			return allowed
   777  		}
   778  	}
   779  	return false
   780  }
   781  
   782  // TaskPageURL returns a URL to a human-consumable page of a swarming task.
   783  // Supports host aliases.
   784  func TaskPageURL(swarmingHostname, taskID string) *url.URL {
   785  	val := url.Values{}
   786  	val.Set("id", taskID)
   787  	val.Set("show_raw", "1")
   788  	val.Set("wide_logs", "true")
   789  	return &url.URL{
   790  		Scheme:   "https",
   791  		Host:     swarmingHostname,
   792  		Path:     "task",
   793  		RawQuery: val.Encode(),
   794  	}
   795  }
   796  
   797  // botPageURL returns a URL to a human-consumable page of a swarming bot.
   798  // Supports host aliases.
   799  func botPageURL(swarmingHostname, botID string) string {
   800  	return fmt.Sprintf("https://%s/restricted/bot/%s", swarmingHostname, botID)
   801  }
   802  
   803  // URLBase is the routing prefix for swarming endpoints. It's here so that it
   804  // can be a constant between the swarmingURLBuilder and the frontend.
   805  const URLBase = "/swarming/task"
   806  
   807  // swarmingURLBuilder is a logdog.URLBuilder that builds Milo swarming log
   808  // links.
   809  //
   810  // It should be the swarming task id.
   811  type swarmingURLBuilder string
   812  
   813  func (b swarmingURLBuilder) BuildLink(l *annopb.AnnotationLink) *ui.Link {
   814  	switch t := l.Value.(type) {
   815  	case *annopb.AnnotationLink_LogdogStream:
   816  		ls := t.LogdogStream
   817  
   818  		link := ui.NewLink(l.Label, fmt.Sprintf("%s/%s/%s", URLBase, b, ls.Name), "")
   819  		if link.Label == "" {
   820  			link.Label = ls.Name
   821  		}
   822  		link.AriaLabel = fmt.Sprintf("log link for %s", link.Label)
   823  		return link
   824  
   825  	case *annopb.AnnotationLink_Url:
   826  		return ui.NewLink(l.Label, t.Url, fmt.Sprintf("step link for %s", l.Label))
   827  
   828  	default:
   829  		return nil
   830  	}
   831  }
   832  
   833  func swarmingTags(v []string) map[string]string {
   834  	res := make(map[string]string, len(v))
   835  	for _, tag := range v {
   836  		var value string
   837  		parts := strings.SplitN(tag, ":", 2)
   838  		if len(parts) == 2 {
   839  			value = parts[1]
   840  		}
   841  		res[parts[0]] = value
   842  	}
   843  	return res
   844  }
   845  
   846  // BuildID is swarming's notion of a Build. See buildsource.ID.
   847  type BuildID struct {
   848  	// (Required) The Swarming TaskID.
   849  	TaskID string
   850  
   851  	// (Optional) The Swarming host. If empty, will use the
   852  	// milo-instance-configured swarming host.
   853  	Host string
   854  }
   855  
   856  // getSwarmingHost returns default hostname if host is empty.
   857  // If host is not empty and not allowed, returns an error.
   858  func getSwarmingHost(c context.Context, host string) (string, error) {
   859  	settings := config.GetSettings(c)
   860  	if settings.Swarming == nil {
   861  		err := errors.New("swarming not in settings")
   862  		logging.WithError(err).Errorf(c, "Go configure swarming in the settings page.")
   863  		return "", err
   864  	}
   865  
   866  	if host == "" || host == settings.Swarming.DefaultHost {
   867  		return settings.Swarming.DefaultHost, nil
   868  	}
   869  	// If it is specified, validate the hostname.
   870  	for _, allowed := range settings.Swarming.AllowedHosts {
   871  		if host == allowed {
   872  			return host, nil
   873  		}
   874  	}
   875  	return "", errors.New("unknown swarming host", grpcutil.InvalidArgumentTag)
   876  }
   877  
   878  // GetBuild returns a milo build from a swarming task id.
   879  func GetBuild(c context.Context, host, taskID string) (*ui.MiloBuildLegacy, error) {
   880  	if taskID == "" {
   881  		return nil, errors.New("no swarming task id", grpcutil.InvalidArgumentTag)
   882  	}
   883  
   884  	sf, err := newProdService(c, host)
   885  	if err != nil {
   886  		return nil, err
   887  	}
   888  	defer sf.Close()
   889  
   890  	return SwarmingBuildImpl(c, sf, taskID)
   891  }
   892  
   893  // RedirectsFromTask returns either
   894  //   - The ID of the buildbucket build corresponding to this task. OR
   895  //   - The build.proto logdog stream from this swarming task.
   896  //
   897  // If the task does not represent a buildbucket build, returns (0, "", nil).
   898  func RedirectsFromTask(c context.Context, host, taskID string) (int64, string, error) {
   899  	sf, err := newProdService(c, host)
   900  	if err != nil {
   901  		return 0, "", err
   902  	}
   903  	defer sf.Close()
   904  
   905  	res, err := sf.client.Task.Request(taskID).Context(c).Do()
   906  	switch err := err.(type) {
   907  	case *googleapi.Error:
   908  		switch err.Code {
   909  		case http.StatusNotFound:
   910  			return 0, "", errors.Annotate(err, "task %s/%s not found", host, taskID).Tag(grpcutil.NotFoundTag).Err()
   911  		case http.StatusBadRequest:
   912  			return 0, "", errors.Annotate(err, "bad request").Tag(grpcutil.InvalidArgumentTag).Err()
   913  		}
   914  	case error:
   915  		return 0, "", err
   916  	}
   917  
   918  	for _, t := range res.Tags {
   919  		const bbPrefix = "buildbucket_build_id:"
   920  		if strings.HasPrefix(t, bbPrefix) {
   921  			value := t[len(bbPrefix):]
   922  			id, err := strconv.ParseInt(value, 10, 64)
   923  			if err != nil {
   924  				logging.Errorf(c, "failed to parse buildbucket_build_id tag %q as int64: %s", value, err)
   925  				return 0, "", nil
   926  			}
   927  			return id, "", nil
   928  		}
   929  
   930  		const ldPrefix = "log_location:"
   931  		if strings.HasPrefix(t, ldPrefix) {
   932  			url := t[len(ldPrefix):]
   933  			url = strings.TrimPrefix(url, "logdog://")
   934  			return 0, url, nil
   935  		}
   936  	}
   937  	return 0, "", nil
   938  }