go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/led/job/job.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package job 16 17 import ( 18 "context" 19 "encoding/hex" 20 "fmt" 21 "path" 22 "sort" 23 "strings" 24 "time" 25 26 "github.com/golang/protobuf/ptypes" 27 "google.golang.org/protobuf/proto" 28 durpb "google.golang.org/protobuf/types/known/durationpb" 29 30 "go.chromium.org/luci/buildbucket/cmd/bbagent/bbinput" 31 "go.chromium.org/luci/common/clock" 32 "go.chromium.org/luci/common/data/rand/cryptorand" 33 "go.chromium.org/luci/common/data/stringset" 34 "go.chromium.org/luci/common/errors" 35 "go.chromium.org/luci/common/flag/flagenum" 36 "go.chromium.org/luci/led/job/experiments" 37 logdog_types "go.chromium.org/luci/logdog/common/types" 38 swarmingpb "go.chromium.org/luci/swarming/proto/api_v2" 39 ) 40 41 type cipdInput struct { 42 Package string `json:"package"` 43 Version string `json:"version"` 44 } 45 46 type ledProperties struct { 47 LedRunID string `json:"led_run_id"` 48 49 RbeCasInput *swarmingpb.CASReference `json:"rbe_cas_input,omitempty"` 50 51 CIPDInput *cipdInput `json:"cipd_input,omitempty"` 52 53 ShadowedBucket string `json:"shadowed_bucket"` 54 } 55 56 // For accepting the "-resultdb" flag of "led launch". 57 type RDBEnablement string 58 59 func (r *RDBEnablement) String() string { 60 return string(*r) 61 } 62 63 func (r *RDBEnablement) Set(v string) error { 64 return RdbChoices.FlagSet(r, v) 65 } 66 67 const ( 68 // Swarming/ResultDB integration will be forcefully enabled. 69 RDBOn RDBEnablement = "on" 70 // Swarming/ResultDB integration will be forcefully disabled. 71 RDBOff RDBEnablement = "off" 72 ) 73 74 var RdbChoices = flagenum.Enum{ 75 "on": RDBOn, 76 "off": RDBOff, 77 } 78 79 func (jd *Definition) addLedProperties(ctx context.Context, uid string) (err error) { 80 // Set the "$recipe_engine/led" recipe properties. 81 bb := jd.GetBuildbucket() 82 if bb == nil { 83 panic("impossible: Buildbucket is nil while flattening to swarming") 84 } 85 bb.EnsureBasics() 86 87 bb.BbagentArgs.Build.CreateTime, err = ptypes.TimestampProto(clock.Now(ctx)) 88 if err != nil { 89 return errors.Annotate(err, "populating creation time").Err() 90 } 91 92 buf := make([]byte, 32) 93 if _, err := cryptorand.Read(ctx, buf); err != nil { 94 return errors.Annotate(err, "generating random token").Err() 95 } 96 logdogPrefixSN, err := logdog_types.MakeStreamName("", "led", uid, hex.EncodeToString(buf)) 97 if err != nil { 98 return errors.Annotate(err, "generating logdog token").Err() 99 } 100 logdogPrefix := string(logdogPrefixSN) 101 logdogProjectPrefix := path.Join(bb.BbagentArgs.Build.Infra.Logdog.Project, logdogPrefix) 102 103 // TODO(iannucci): change logdog project to something reserved to 'led' tasks. 104 // Though if we merge logdog into resultdb, this hopefully becomes moot. 105 bb.BbagentArgs.Build.Infra.Logdog.Prefix = logdogPrefix 106 107 // Pass the CIPD package or isolate containing the recipes code into 108 // the led recipe module. This gives the build the information it needs 109 // to launch child builds using the same version of the recipes code. 110 // 111 // The logdog prefix is unique to each led job, so it can be used as an 112 // ID for the job. 113 props := ledProperties{LedRunID: logdogProjectPrefix} 114 casUserPayload, err := jd.Info().CurrentIsolated() 115 if err != nil { 116 return errors.Annotate(err, "failed to get CAS user payload for the build").Err() 117 } 118 if exe := bb.GetBbagentArgs().GetBuild().GetExe(); exe.GetCipdPackage() != "" { 119 props.CIPDInput = &cipdInput{ 120 Package: exe.CipdPackage, 121 Version: exe.CipdVersion, 122 } 123 } else if casUserPayload.GetDigest() != nil { 124 props.RbeCasInput = proto.Clone(casUserPayload).(*swarmingpb.CASReference) 125 } 126 127 // in case both isolate and rbe-cas properties are set in "$recipe_engine/led". 128 bb.WriteProperties(map[string]any{ 129 "$recipe_engine/led": nil, 130 }) 131 bb.WriteProperties(map[string]any{ 132 "$recipe_engine/led": props, 133 }) 134 135 streamName := "build.proto" 136 if bb.LegacyKitchen { 137 streamName = "annotations" 138 } 139 140 logdogHost := "logs.chromium.org" 141 if strings.Contains(jd.Info().SwarmingHostname(), "-dev") { 142 logdogHost = "luci-logdog-dev.appspot.com" 143 } 144 145 logdogTag := "log_location:logdog://" + path.Join( 146 logdogHost, logdogProjectPrefix, "+", streamName) 147 148 return jd.Edit(func(je Editor) { 149 je.Tags([]string{logdogTag, "allow_milo:1"}) 150 }) 151 } 152 153 type expiringDims struct { 154 absolute time.Duration // from scheduling task 155 relative time.Duration // from previous slice 156 157 // key -> values 158 dimensions map[string]stringset.Set 159 } 160 161 func (ed *expiringDims) addDimVals(key string, values ...string) { 162 if ed.dimensions == nil { 163 ed.dimensions = map[string]stringset.Set{} 164 } 165 if set, ok := ed.dimensions[key]; !ok { 166 ed.dimensions[key] = stringset.NewFromSlice(values...) 167 } else { 168 set.AddAll(values) 169 } 170 } 171 172 func (ed *expiringDims) updateFrom(other *expiringDims) { 173 for key, values := range other.dimensions { 174 ed.addDimVals(key, values.ToSlice()...) 175 } 176 } 177 178 func (ed *expiringDims) createWith(template *swarmingpb.TaskProperties) *swarmingpb.TaskProperties { 179 if len(template.Dimensions) != 0 { 180 panic("impossible; createWith called with dimensions already set") 181 } 182 183 ret := proto.Clone(template).(*swarmingpb.TaskProperties) 184 185 newDims := make([]*swarmingpb.StringPair, 0, len(ed.dimensions)) 186 for _, key := range keysOf(ed.dimensions) { 187 for _, value := range ed.dimensions[key].ToSortedSlice() { 188 newDims = append(newDims, &swarmingpb.StringPair{ 189 Key: key, Value: value}) 190 } 191 } 192 ret.Dimensions = newDims 193 194 return ret 195 } 196 197 func (jd *Definition) makeExpiringSliceData() (ret []*expiringDims, err error) { 198 bb := jd.GetBuildbucket() 199 expirationSet := map[time.Duration]*expiringDims{} 200 nonExpiring := &expiringDims{} 201 getExpiringSlot := func(dimType, name string, protoDuration *durpb.Duration) (*expiringDims, error) { 202 var dur time.Duration 203 if protoDuration != nil { 204 var err error 205 if dur, err = ptypes.Duration(protoDuration); err != nil { 206 return nil, errors.Annotate(err, "parsing %s %q expiration", dimType, name).Err() 207 } 208 } 209 if dur > 0 { 210 data, ok := expirationSet[dur] 211 if !ok { 212 data = &expiringDims{absolute: dur} 213 expirationSet[dur] = data 214 } 215 return data, nil 216 } 217 return nil, nil 218 } 219 // Cache and dimension expiration have opposite defaults for 0 or negative 220 // times. 221 // 222 // Cache entries with WaitForWarmCache <= 0 mean that the dimension for the 223 // cache essentially expires at 0. 224 // 225 // Dimension entries with Expiration <= 0 mean that the dimension expires at 226 // 'infinity' 227 for _, cache := range bb.BbagentArgs.GetBuild().GetInfra().GetSwarming().GetCaches() { 228 slot, err := getExpiringSlot("cache", cache.Name, cache.WaitForWarmCache) 229 if err != nil { 230 return nil, err 231 } 232 if slot != nil { 233 slot.addDimVals("caches", cache.Name) 234 } 235 } 236 for _, dim := range bb.BbagentArgs.GetBuild().GetInfra().GetSwarming().GetTaskDimensions() { 237 slot, err := getExpiringSlot("dimension", dim.Key, dim.Expiration) 238 if err != nil { 239 return nil, err 240 } 241 if slot == nil { 242 slot = nonExpiring 243 } 244 slot.addDimVals(dim.Key, dim.Value) 245 } 246 247 ret = make([]*expiringDims, 0, len(expirationSet)) 248 if len(expirationSet) > 0 { 249 for _, data := range expirationSet { 250 ret = append(ret, data) 251 } 252 sort.Slice(ret, func(i, j int) bool { 253 return ret[i].absolute < ret[j].absolute 254 }) 255 ret[0].relative = ret[0].absolute 256 for i := range ret[1:] { 257 ret[i+1].relative = ret[i+1].absolute - ret[i].absolute 258 } 259 } 260 if total, err := ptypes.Duration(bb.BbagentArgs.Build.SchedulingTimeout); err == nil { 261 if len(ret) == 0 || ret[len(ret)-1].absolute < total { 262 // if the task's total expiration time is greater than the last slice's 263 // expiration, then use nonExpiring as the last slice. 264 nonExpiring.absolute = total 265 if len(ret) > 0 { 266 nonExpiring.relative = total - ret[len(ret)-1].absolute 267 } else { 268 nonExpiring.relative = total 269 } 270 ret = append(ret, nonExpiring) 271 } else { 272 // otherwise, add all of nonExpiring's guts to the last slice. 273 ret[len(ret)-1].updateFrom(nonExpiring) 274 } 275 } 276 277 // Ret now looks like: 278 // rel @ 20s - caches:[a b c] 279 // rel @ 40s - caches:[d e] 280 // rel @ inf - caches:[f] 281 // 282 // We need to transform this into: 283 // rel @ 20s - caches:[a b c d e f] 284 // rel @ 40s - caches:[d e f] 285 // rel @ inf - caches:[f] 286 // 287 // Since a slice expiring at 20s includes all the caches (and dimensions) of 288 // all slices expiring after it. 289 for i := len(ret) - 2; i >= 0; i-- { 290 ret[i].updateFrom(ret[i+1]) 291 } 292 293 return 294 } 295 296 func (jd *Definition) generateCommand(ctx context.Context, ks KitchenSupport) ([]string, error) { 297 bb := jd.GetBuildbucket() 298 299 if bb.LegacyKitchen { 300 return ks.GenerateCommand(ctx, bb) 301 } 302 303 ret := []string{"bbagent${EXECUTABLE_SUFFIX}"} 304 if bb.FinalBuildProtoPath != "" { 305 ret = append(ret, "--output", path.Join("${ISOLATED_OUTDIR}", bb.FinalBuildProtoPath)) 306 } 307 bb.BbagentArgs.Build.Infra.Buildbucket.Hostname = "" 308 if bb.BbagentArgs.CacheDir == "" { 309 bb.BbagentArgs.CacheDir = bb.BbagentArgs.Build.GetInfra().GetBbagent().GetCacheDir() 310 } 311 if bb.BbagentArgs.PayloadPath == "" { 312 bb.BbagentArgs.PayloadPath = "kitchen-checkout" 313 } 314 return append(ret, bbinput.Encode(bb.BbagentArgs)), nil 315 } 316 317 func (jd *Definition) generateCIPDPackages() (cipdPackages []*swarmingpb.CipdPackage) { 318 cipdPackages = ([]*swarmingpb.CipdPackage)(nil) 319 bb := jd.GetBuildbucket() 320 if !bb.BbagentDownloadCIPDPkgs() { 321 cipdPackages = append(cipdPackages, bb.CipdPackages...) 322 return 323 } 324 325 if agentSrc := bb.BbagentArgs.GetBuild().GetInfra().GetBuildbucket().GetAgent().GetSource(); agentSrc != nil { 326 if cipdSource := agentSrc.GetCipd(); cipdSource != nil { 327 cipdPackages = append(cipdPackages, &swarmingpb.CipdPackage{ 328 Path: ".", 329 PackageName: cipdSource.Package, 330 Version: cipdSource.Version, 331 }) 332 } 333 } 334 return 335 } 336 337 // FlattenToSwarming modifies this Definition to populate the Swarming field 338 // from the Buildbucket field. 339 // 340 // After flattening, HighLevelEdit functionality will no longer work on this 341 // Definition. 342 // 343 // `uid` and `parentTaskId`, if specified, override the user and parentTaskId 344 // fields, respectively. 345 func (jd *Definition) FlattenToSwarming(ctx context.Context, uid, parentTaskId string, ks KitchenSupport, resultdb RDBEnablement) error { 346 if sw := jd.GetSwarming(); sw != nil { 347 if uid != "" { 348 sw.Task.User = uid 349 } 350 if parentTaskId != "" { 351 sw.Task.ParentTaskId = parentTaskId 352 } 353 switch resultdb { 354 case RDBOff: 355 sw.Task.Resultdb = nil 356 case RDBOn: 357 if sw.Task.Realm != "" { 358 sw.Task.Resultdb = &swarmingpb.ResultDBCfg{ 359 Enable: true, 360 } 361 } else { 362 return errors.Reason("ResultDB cannot be enabled on raw swarming tasks if the realm field is unset").Err() 363 } 364 default: 365 } 366 return nil 367 } 368 err := jd.addLedProperties(ctx, uid) 369 if err != nil { 370 return errors.Annotate(err, "adding led properties").Err() 371 } 372 373 expiringDims, err := jd.makeExpiringSliceData() 374 if err != nil { 375 return errors.Annotate(err, "calculating expirations").Err() 376 } 377 378 bb := jd.GetBuildbucket() 379 bbi := bb.GetBbagentArgs().GetBuild().GetInfra() 380 project := bb.GetBbagentArgs().GetBuild().GetBuilder().GetProject() 381 bucket := bb.GetBbagentArgs().GetBuild().GetBuilder().GetBucket() 382 if project == "" || bucket == "" { 383 return errors.Reason("incomplete Builder ID, need both `project` and `bucket` set").Err() 384 } 385 sw := &Swarming{ 386 Hostname: jd.Info().SwarmingHostname(), 387 Task: &swarmingpb.NewTaskRequest{ 388 Name: jd.Info().TaskName(), 389 Realm: fmt.Sprintf("%s:%s", project, bucket), 390 ParentTaskId: parentTaskId, 391 Priority: jd.Info().Priority(), 392 ServiceAccount: bbi.GetSwarming().GetTaskServiceAccount(), 393 Tags: jd.Info().Tags(), 394 User: uid, 395 TaskSlices: make([]*swarmingpb.TaskSlice, len(expiringDims)), 396 }, 397 } 398 399 // Enable swarming/resultdb integration. 400 enableRDB := (resultdb == RDBOn || (resultdb == "" && bbi.GetResultdb().GetInvocation() != "")) 401 if enableRDB { 402 // Clear the original build's ResultDB invocation. 403 bbi.Resultdb.Invocation = "" 404 sw.Task.Resultdb = &swarmingpb.ResultDBCfg{ 405 Enable: true, 406 } 407 } 408 409 var casUserPayload *swarmingpb.CASReference 410 // Do not set CAS input to task slices if bbagent handles downloading packages. 411 if !bb.BbagentDownloadCIPDPkgs() { 412 casUserPayload, err = jd.Info().CurrentIsolated() 413 if err != nil { 414 return errors.Annotate(err, "failed to get CAS user payload for the build").Err() 415 } 416 } 417 baseProperties := &swarmingpb.TaskProperties{ 418 CipdInput: &swarmingpb.CipdInput{ 419 Packages: jd.generateCIPDPackages(), 420 }, 421 CasInputRoot: casUserPayload, 422 423 EnvPrefixes: bb.EnvPrefixes, 424 ExecutionTimeoutSecs: int32(bb.BbagentArgs.Build.ExecutionTimeout.GetSeconds()), 425 426 // TODO(iannucci): When build creation is done in Go, share this 3 minute 427 // constant between here and there. Or, better, implement CreateBuild so we 428 // don't have to do this at all. 429 GracePeriodSecs: int32(bb.BbagentArgs.Build.GracePeriod.GetSeconds()) + 180, 430 } 431 432 if bb.Containment.GetContainmentType() != swarmingpb.ContainmentType_NOT_SPECIFIED { 433 baseProperties.Containment = bb.Containment 434 } 435 436 baseProperties.Env = make([]*swarmingpb.StringPair, len(bb.EnvVars)+1) 437 copy(baseProperties.Env, bb.EnvVars) 438 expEnvValue := "FALSE" 439 if bb.BbagentArgs.Build.Input.Experimental { 440 expEnvValue = "TRUE" 441 } 442 baseProperties.Env[len(baseProperties.Env)-1] = &swarmingpb.StringPair{ 443 Key: "BUILDBUCKET_EXPERIMENTAL", 444 Value: expEnvValue, 445 } 446 447 if caches := bb.BbagentArgs.Build.Infra.Swarming.GetCaches(); len(caches) > 0 { 448 baseProperties.Caches = make([]*swarmingpb.CacheEntry, len(caches)) 449 for i, cache := range caches { 450 baseProperties.Caches[i] = &swarmingpb.CacheEntry{ 451 Name: cache.Name, 452 Path: path.Join(bb.CacheDir(), cache.Path), 453 } 454 } 455 } 456 457 baseProperties.Command, err = jd.generateCommand(ctx, ks) 458 if err != nil { 459 return errors.Annotate(err, "generating Command").Err() 460 } 461 462 if exe := bb.BbagentArgs.Build.Exe; exe.GetCipdPackage() != "" && !bb.BbagentDownloadCIPDPkgs() { 463 baseProperties.CipdInput.Packages = append(baseProperties.CipdInput.Packages, &swarmingpb.CipdPackage{ 464 PackageName: exe.CipdPackage, 465 Version: exe.CipdVersion, 466 Path: bb.PayloadPath(), 467 }) 468 } 469 470 for i, dat := range expiringDims { 471 sw.Task.TaskSlices[i] = &swarmingpb.TaskSlice{ 472 ExpirationSecs: int32(dat.relative.Seconds()), 473 Properties: dat.createWith(baseProperties), 474 } 475 } 476 477 if err := experiments.Apply(ctx, bb.BbagentArgs.Build, sw.Task); err != nil { 478 return errors.Annotate(err, "applying experiments").Err() 479 } 480 481 jd.JobType = &Definition_Swarming{Swarming: sw} 482 return nil 483 }