go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/led/job/job.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package job
    16  
    17  import (
    18  	"context"
    19  	"encoding/hex"
    20  	"fmt"
    21  	"path"
    22  	"sort"
    23  	"strings"
    24  	"time"
    25  
    26  	"github.com/golang/protobuf/ptypes"
    27  	"google.golang.org/protobuf/proto"
    28  	durpb "google.golang.org/protobuf/types/known/durationpb"
    29  
    30  	"go.chromium.org/luci/buildbucket/cmd/bbagent/bbinput"
    31  	"go.chromium.org/luci/common/clock"
    32  	"go.chromium.org/luci/common/data/rand/cryptorand"
    33  	"go.chromium.org/luci/common/data/stringset"
    34  	"go.chromium.org/luci/common/errors"
    35  	"go.chromium.org/luci/common/flag/flagenum"
    36  	"go.chromium.org/luci/led/job/experiments"
    37  	logdog_types "go.chromium.org/luci/logdog/common/types"
    38  	swarmingpb "go.chromium.org/luci/swarming/proto/api_v2"
    39  )
    40  
    41  type cipdInput struct {
    42  	Package string `json:"package"`
    43  	Version string `json:"version"`
    44  }
    45  
    46  type ledProperties struct {
    47  	LedRunID string `json:"led_run_id"`
    48  
    49  	RbeCasInput *swarmingpb.CASReference `json:"rbe_cas_input,omitempty"`
    50  
    51  	CIPDInput *cipdInput `json:"cipd_input,omitempty"`
    52  
    53  	ShadowedBucket string `json:"shadowed_bucket"`
    54  }
    55  
    56  // For accepting the "-resultdb" flag of "led launch".
    57  type RDBEnablement string
    58  
    59  func (r *RDBEnablement) String() string {
    60  	return string(*r)
    61  }
    62  
    63  func (r *RDBEnablement) Set(v string) error {
    64  	return RdbChoices.FlagSet(r, v)
    65  }
    66  
    67  const (
    68  	// Swarming/ResultDB integration will be forcefully enabled.
    69  	RDBOn RDBEnablement = "on"
    70  	// Swarming/ResultDB integration will be forcefully disabled.
    71  	RDBOff RDBEnablement = "off"
    72  )
    73  
    74  var RdbChoices = flagenum.Enum{
    75  	"on":  RDBOn,
    76  	"off": RDBOff,
    77  }
    78  
    79  func (jd *Definition) addLedProperties(ctx context.Context, uid string) (err error) {
    80  	// Set the "$recipe_engine/led" recipe properties.
    81  	bb := jd.GetBuildbucket()
    82  	if bb == nil {
    83  		panic("impossible: Buildbucket is nil while flattening to swarming")
    84  	}
    85  	bb.EnsureBasics()
    86  
    87  	bb.BbagentArgs.Build.CreateTime, err = ptypes.TimestampProto(clock.Now(ctx))
    88  	if err != nil {
    89  		return errors.Annotate(err, "populating creation time").Err()
    90  	}
    91  
    92  	buf := make([]byte, 32)
    93  	if _, err := cryptorand.Read(ctx, buf); err != nil {
    94  		return errors.Annotate(err, "generating random token").Err()
    95  	}
    96  	logdogPrefixSN, err := logdog_types.MakeStreamName("", "led", uid, hex.EncodeToString(buf))
    97  	if err != nil {
    98  		return errors.Annotate(err, "generating logdog token").Err()
    99  	}
   100  	logdogPrefix := string(logdogPrefixSN)
   101  	logdogProjectPrefix := path.Join(bb.BbagentArgs.Build.Infra.Logdog.Project, logdogPrefix)
   102  
   103  	// TODO(iannucci): change logdog project to something reserved to 'led' tasks.
   104  	// Though if we merge logdog into resultdb, this hopefully becomes moot.
   105  	bb.BbagentArgs.Build.Infra.Logdog.Prefix = logdogPrefix
   106  
   107  	// Pass the CIPD package or isolate containing the recipes code into
   108  	// the led recipe module. This gives the build the information it needs
   109  	// to launch child builds using the same version of the recipes code.
   110  	//
   111  	// The logdog prefix is unique to each led job, so it can be used as an
   112  	// ID for the job.
   113  	props := ledProperties{LedRunID: logdogProjectPrefix}
   114  	casUserPayload, err := jd.Info().CurrentIsolated()
   115  	if err != nil {
   116  		return errors.Annotate(err, "failed to get CAS user payload for the build").Err()
   117  	}
   118  	if exe := bb.GetBbagentArgs().GetBuild().GetExe(); exe.GetCipdPackage() != "" {
   119  		props.CIPDInput = &cipdInput{
   120  			Package: exe.CipdPackage,
   121  			Version: exe.CipdVersion,
   122  		}
   123  	} else if casUserPayload.GetDigest() != nil {
   124  		props.RbeCasInput = proto.Clone(casUserPayload).(*swarmingpb.CASReference)
   125  	}
   126  
   127  	// in case both isolate and rbe-cas properties are set in "$recipe_engine/led".
   128  	bb.WriteProperties(map[string]any{
   129  		"$recipe_engine/led": nil,
   130  	})
   131  	bb.WriteProperties(map[string]any{
   132  		"$recipe_engine/led": props,
   133  	})
   134  
   135  	streamName := "build.proto"
   136  	if bb.LegacyKitchen {
   137  		streamName = "annotations"
   138  	}
   139  
   140  	logdogHost := "logs.chromium.org"
   141  	if strings.Contains(jd.Info().SwarmingHostname(), "-dev") {
   142  		logdogHost = "luci-logdog-dev.appspot.com"
   143  	}
   144  
   145  	logdogTag := "log_location:logdog://" + path.Join(
   146  		logdogHost, logdogProjectPrefix, "+", streamName)
   147  
   148  	return jd.Edit(func(je Editor) {
   149  		je.Tags([]string{logdogTag, "allow_milo:1"})
   150  	})
   151  }
   152  
   153  type expiringDims struct {
   154  	absolute time.Duration // from scheduling task
   155  	relative time.Duration // from previous slice
   156  
   157  	// key -> values
   158  	dimensions map[string]stringset.Set
   159  }
   160  
   161  func (ed *expiringDims) addDimVals(key string, values ...string) {
   162  	if ed.dimensions == nil {
   163  		ed.dimensions = map[string]stringset.Set{}
   164  	}
   165  	if set, ok := ed.dimensions[key]; !ok {
   166  		ed.dimensions[key] = stringset.NewFromSlice(values...)
   167  	} else {
   168  		set.AddAll(values)
   169  	}
   170  }
   171  
   172  func (ed *expiringDims) updateFrom(other *expiringDims) {
   173  	for key, values := range other.dimensions {
   174  		ed.addDimVals(key, values.ToSlice()...)
   175  	}
   176  }
   177  
   178  func (ed *expiringDims) createWith(template *swarmingpb.TaskProperties) *swarmingpb.TaskProperties {
   179  	if len(template.Dimensions) != 0 {
   180  		panic("impossible; createWith called with dimensions already set")
   181  	}
   182  
   183  	ret := proto.Clone(template).(*swarmingpb.TaskProperties)
   184  
   185  	newDims := make([]*swarmingpb.StringPair, 0, len(ed.dimensions))
   186  	for _, key := range keysOf(ed.dimensions) {
   187  		for _, value := range ed.dimensions[key].ToSortedSlice() {
   188  			newDims = append(newDims, &swarmingpb.StringPair{
   189  				Key: key, Value: value})
   190  		}
   191  	}
   192  	ret.Dimensions = newDims
   193  
   194  	return ret
   195  }
   196  
   197  func (jd *Definition) makeExpiringSliceData() (ret []*expiringDims, err error) {
   198  	bb := jd.GetBuildbucket()
   199  	expirationSet := map[time.Duration]*expiringDims{}
   200  	nonExpiring := &expiringDims{}
   201  	getExpiringSlot := func(dimType, name string, protoDuration *durpb.Duration) (*expiringDims, error) {
   202  		var dur time.Duration
   203  		if protoDuration != nil {
   204  			var err error
   205  			if dur, err = ptypes.Duration(protoDuration); err != nil {
   206  				return nil, errors.Annotate(err, "parsing %s %q expiration", dimType, name).Err()
   207  			}
   208  		}
   209  		if dur > 0 {
   210  			data, ok := expirationSet[dur]
   211  			if !ok {
   212  				data = &expiringDims{absolute: dur}
   213  				expirationSet[dur] = data
   214  			}
   215  			return data, nil
   216  		}
   217  		return nil, nil
   218  	}
   219  	// Cache and dimension expiration have opposite defaults for 0 or negative
   220  	// times.
   221  	//
   222  	// Cache entries with WaitForWarmCache <= 0 mean that the dimension for the
   223  	// cache essentially expires at 0.
   224  	//
   225  	// Dimension entries with Expiration <= 0 mean that the dimension expires at
   226  	// 'infinity'
   227  	for _, cache := range bb.BbagentArgs.GetBuild().GetInfra().GetSwarming().GetCaches() {
   228  		slot, err := getExpiringSlot("cache", cache.Name, cache.WaitForWarmCache)
   229  		if err != nil {
   230  			return nil, err
   231  		}
   232  		if slot != nil {
   233  			slot.addDimVals("caches", cache.Name)
   234  		}
   235  	}
   236  	for _, dim := range bb.BbagentArgs.GetBuild().GetInfra().GetSwarming().GetTaskDimensions() {
   237  		slot, err := getExpiringSlot("dimension", dim.Key, dim.Expiration)
   238  		if err != nil {
   239  			return nil, err
   240  		}
   241  		if slot == nil {
   242  			slot = nonExpiring
   243  		}
   244  		slot.addDimVals(dim.Key, dim.Value)
   245  	}
   246  
   247  	ret = make([]*expiringDims, 0, len(expirationSet))
   248  	if len(expirationSet) > 0 {
   249  		for _, data := range expirationSet {
   250  			ret = append(ret, data)
   251  		}
   252  		sort.Slice(ret, func(i, j int) bool {
   253  			return ret[i].absolute < ret[j].absolute
   254  		})
   255  		ret[0].relative = ret[0].absolute
   256  		for i := range ret[1:] {
   257  			ret[i+1].relative = ret[i+1].absolute - ret[i].absolute
   258  		}
   259  	}
   260  	if total, err := ptypes.Duration(bb.BbagentArgs.Build.SchedulingTimeout); err == nil {
   261  		if len(ret) == 0 || ret[len(ret)-1].absolute < total {
   262  			// if the task's total expiration time is greater than the last slice's
   263  			// expiration, then use nonExpiring as the last slice.
   264  			nonExpiring.absolute = total
   265  			if len(ret) > 0 {
   266  				nonExpiring.relative = total - ret[len(ret)-1].absolute
   267  			} else {
   268  				nonExpiring.relative = total
   269  			}
   270  			ret = append(ret, nonExpiring)
   271  		} else {
   272  			// otherwise, add all of nonExpiring's guts to the last slice.
   273  			ret[len(ret)-1].updateFrom(nonExpiring)
   274  		}
   275  	}
   276  
   277  	// Ret now looks like:
   278  	//   rel @ 20s - caches:[a b c]
   279  	//   rel @ 40s - caches:[d e]
   280  	//   rel @ inf - caches:[f]
   281  	//
   282  	// We need to transform this into:
   283  	//   rel @ 20s - caches:[a b c d e f]
   284  	//   rel @ 40s - caches:[d e f]
   285  	//   rel @ inf - caches:[f]
   286  	//
   287  	// Since a slice expiring at 20s includes all the caches (and dimensions) of
   288  	// all slices expiring after it.
   289  	for i := len(ret) - 2; i >= 0; i-- {
   290  		ret[i].updateFrom(ret[i+1])
   291  	}
   292  
   293  	return
   294  }
   295  
   296  func (jd *Definition) generateCommand(ctx context.Context, ks KitchenSupport) ([]string, error) {
   297  	bb := jd.GetBuildbucket()
   298  
   299  	if bb.LegacyKitchen {
   300  		return ks.GenerateCommand(ctx, bb)
   301  	}
   302  
   303  	ret := []string{"bbagent${EXECUTABLE_SUFFIX}"}
   304  	if bb.FinalBuildProtoPath != "" {
   305  		ret = append(ret, "--output", path.Join("${ISOLATED_OUTDIR}", bb.FinalBuildProtoPath))
   306  	}
   307  	bb.BbagentArgs.Build.Infra.Buildbucket.Hostname = ""
   308  	if bb.BbagentArgs.CacheDir == "" {
   309  		bb.BbagentArgs.CacheDir = bb.BbagentArgs.Build.GetInfra().GetBbagent().GetCacheDir()
   310  	}
   311  	if bb.BbagentArgs.PayloadPath == "" {
   312  		bb.BbagentArgs.PayloadPath = "kitchen-checkout"
   313  	}
   314  	return append(ret, bbinput.Encode(bb.BbagentArgs)), nil
   315  }
   316  
   317  func (jd *Definition) generateCIPDPackages() (cipdPackages []*swarmingpb.CipdPackage) {
   318  	cipdPackages = ([]*swarmingpb.CipdPackage)(nil)
   319  	bb := jd.GetBuildbucket()
   320  	if !bb.BbagentDownloadCIPDPkgs() {
   321  		cipdPackages = append(cipdPackages, bb.CipdPackages...)
   322  		return
   323  	}
   324  
   325  	if agentSrc := bb.BbagentArgs.GetBuild().GetInfra().GetBuildbucket().GetAgent().GetSource(); agentSrc != nil {
   326  		if cipdSource := agentSrc.GetCipd(); cipdSource != nil {
   327  			cipdPackages = append(cipdPackages, &swarmingpb.CipdPackage{
   328  				Path:        ".",
   329  				PackageName: cipdSource.Package,
   330  				Version:     cipdSource.Version,
   331  			})
   332  		}
   333  	}
   334  	return
   335  }
   336  
   337  // FlattenToSwarming modifies this Definition to populate the Swarming field
   338  // from the Buildbucket field.
   339  //
   340  // After flattening, HighLevelEdit functionality will no longer work on this
   341  // Definition.
   342  //
   343  // `uid` and `parentTaskId`, if specified, override the user and parentTaskId
   344  // fields, respectively.
   345  func (jd *Definition) FlattenToSwarming(ctx context.Context, uid, parentTaskId string, ks KitchenSupport, resultdb RDBEnablement) error {
   346  	if sw := jd.GetSwarming(); sw != nil {
   347  		if uid != "" {
   348  			sw.Task.User = uid
   349  		}
   350  		if parentTaskId != "" {
   351  			sw.Task.ParentTaskId = parentTaskId
   352  		}
   353  		switch resultdb {
   354  		case RDBOff:
   355  			sw.Task.Resultdb = nil
   356  		case RDBOn:
   357  			if sw.Task.Realm != "" {
   358  				sw.Task.Resultdb = &swarmingpb.ResultDBCfg{
   359  					Enable: true,
   360  				}
   361  			} else {
   362  				return errors.Reason("ResultDB cannot be enabled on raw swarming tasks if the realm field is unset").Err()
   363  			}
   364  		default:
   365  		}
   366  		return nil
   367  	}
   368  	err := jd.addLedProperties(ctx, uid)
   369  	if err != nil {
   370  		return errors.Annotate(err, "adding led properties").Err()
   371  	}
   372  
   373  	expiringDims, err := jd.makeExpiringSliceData()
   374  	if err != nil {
   375  		return errors.Annotate(err, "calculating expirations").Err()
   376  	}
   377  
   378  	bb := jd.GetBuildbucket()
   379  	bbi := bb.GetBbagentArgs().GetBuild().GetInfra()
   380  	project := bb.GetBbagentArgs().GetBuild().GetBuilder().GetProject()
   381  	bucket := bb.GetBbagentArgs().GetBuild().GetBuilder().GetBucket()
   382  	if project == "" || bucket == "" {
   383  		return errors.Reason("incomplete Builder ID, need both `project` and `bucket` set").Err()
   384  	}
   385  	sw := &Swarming{
   386  		Hostname: jd.Info().SwarmingHostname(),
   387  		Task: &swarmingpb.NewTaskRequest{
   388  			Name:           jd.Info().TaskName(),
   389  			Realm:          fmt.Sprintf("%s:%s", project, bucket),
   390  			ParentTaskId:   parentTaskId,
   391  			Priority:       jd.Info().Priority(),
   392  			ServiceAccount: bbi.GetSwarming().GetTaskServiceAccount(),
   393  			Tags:           jd.Info().Tags(),
   394  			User:           uid,
   395  			TaskSlices:     make([]*swarmingpb.TaskSlice, len(expiringDims)),
   396  		},
   397  	}
   398  
   399  	// Enable swarming/resultdb integration.
   400  	enableRDB := (resultdb == RDBOn || (resultdb == "" && bbi.GetResultdb().GetInvocation() != ""))
   401  	if enableRDB {
   402  		// Clear the original build's ResultDB invocation.
   403  		bbi.Resultdb.Invocation = ""
   404  		sw.Task.Resultdb = &swarmingpb.ResultDBCfg{
   405  			Enable: true,
   406  		}
   407  	}
   408  
   409  	var casUserPayload *swarmingpb.CASReference
   410  	// Do not set CAS input to task slices if bbagent handles downloading packages.
   411  	if !bb.BbagentDownloadCIPDPkgs() {
   412  		casUserPayload, err = jd.Info().CurrentIsolated()
   413  		if err != nil {
   414  			return errors.Annotate(err, "failed to get CAS user payload for the build").Err()
   415  		}
   416  	}
   417  	baseProperties := &swarmingpb.TaskProperties{
   418  		CipdInput: &swarmingpb.CipdInput{
   419  			Packages: jd.generateCIPDPackages(),
   420  		},
   421  		CasInputRoot: casUserPayload,
   422  
   423  		EnvPrefixes:          bb.EnvPrefixes,
   424  		ExecutionTimeoutSecs: int32(bb.BbagentArgs.Build.ExecutionTimeout.GetSeconds()),
   425  
   426  		// TODO(iannucci): When build creation is done in Go, share this 3 minute
   427  		// constant between here and there.  Or, better, implement CreateBuild so we
   428  		// don't have to do this at all.
   429  		GracePeriodSecs: int32(bb.BbagentArgs.Build.GracePeriod.GetSeconds()) + 180,
   430  	}
   431  
   432  	if bb.Containment.GetContainmentType() != swarmingpb.ContainmentType_NOT_SPECIFIED {
   433  		baseProperties.Containment = bb.Containment
   434  	}
   435  
   436  	baseProperties.Env = make([]*swarmingpb.StringPair, len(bb.EnvVars)+1)
   437  	copy(baseProperties.Env, bb.EnvVars)
   438  	expEnvValue := "FALSE"
   439  	if bb.BbagentArgs.Build.Input.Experimental {
   440  		expEnvValue = "TRUE"
   441  	}
   442  	baseProperties.Env[len(baseProperties.Env)-1] = &swarmingpb.StringPair{
   443  		Key:   "BUILDBUCKET_EXPERIMENTAL",
   444  		Value: expEnvValue,
   445  	}
   446  
   447  	if caches := bb.BbagentArgs.Build.Infra.Swarming.GetCaches(); len(caches) > 0 {
   448  		baseProperties.Caches = make([]*swarmingpb.CacheEntry, len(caches))
   449  		for i, cache := range caches {
   450  			baseProperties.Caches[i] = &swarmingpb.CacheEntry{
   451  				Name: cache.Name,
   452  				Path: path.Join(bb.CacheDir(), cache.Path),
   453  			}
   454  		}
   455  	}
   456  
   457  	baseProperties.Command, err = jd.generateCommand(ctx, ks)
   458  	if err != nil {
   459  		return errors.Annotate(err, "generating Command").Err()
   460  	}
   461  
   462  	if exe := bb.BbagentArgs.Build.Exe; exe.GetCipdPackage() != "" && !bb.BbagentDownloadCIPDPkgs() {
   463  		baseProperties.CipdInput.Packages = append(baseProperties.CipdInput.Packages, &swarmingpb.CipdPackage{
   464  			PackageName: exe.CipdPackage,
   465  			Version:     exe.CipdVersion,
   466  			Path:        bb.PayloadPath(),
   467  		})
   468  	}
   469  
   470  	for i, dat := range expiringDims {
   471  		sw.Task.TaskSlices[i] = &swarmingpb.TaskSlice{
   472  			ExpirationSecs: int32(dat.relative.Seconds()),
   473  			Properties:     dat.createWith(baseProperties),
   474  		}
   475  	}
   476  
   477  	if err := experiments.Apply(ctx, bb.BbagentArgs.Build, sw.Task); err != nil {
   478  		return errors.Annotate(err, "applying experiments").Err()
   479  	}
   480  
   481  	jd.JobType = &Definition_Swarming{Swarming: sw}
   482  	return nil
   483  }