go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/bisection/rerun/rerun.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package rerun handles rerun for a build. 16 package rerun 17 18 import ( 19 "context" 20 "encoding/json" 21 "fmt" 22 "strconv" 23 24 buildbucketpb "go.chromium.org/luci/buildbucket/proto" 25 "go.chromium.org/luci/common/errors" 26 "go.chromium.org/luci/common/logging" 27 "go.chromium.org/luci/gae/service/datastore" 28 "google.golang.org/protobuf/types/known/fieldmaskpb" 29 "google.golang.org/protobuf/types/known/structpb" 30 31 "go.chromium.org/luci/bisection/internal/buildbucket" 32 "go.chromium.org/luci/bisection/model" 33 pb "go.chromium.org/luci/bisection/proto/v1" 34 "go.chromium.org/luci/bisection/testfailureanalysis" 35 "go.chromium.org/luci/bisection/util" 36 "go.chromium.org/luci/bisection/util/datastoreutil" 37 ) 38 39 // TriggerOptions contains information how the rerun should be triggered. 40 type TriggerOptions struct { 41 // The builder we should trigger the rerun on. Required. 42 Builder *buildbucketpb.BuilderID 43 // The gitiles commit for the revision that we want to run the rerun build. Required. 44 GitilesCommit *buildbucketpb.GitilesCommit 45 // The buildbucket ID that the rerun build should copy the properties 46 // and dimension from. Required. 47 SampleBuildID int64 48 // Extra properties we want the rerun build to have. 49 ExtraProperties map[string]any 50 // Extra dimensions we want the rerun build to have. 51 ExtraDimensions map[string]string 52 // Priority of the rerun build. 53 Priority int32 54 } 55 56 // TriggerRerun triggers a rerun build given the options. 57 func TriggerRerun(c context.Context, options *TriggerOptions) (*buildbucketpb.Build, error) { 58 err := validateOptions(options) 59 if err != nil { 60 return nil, errors.Annotate(err, "validate rerun options").Err() 61 } 62 logging.Infof(c, "triggerRerun with commit %s", options.GitilesCommit.Id) 63 properties, dimensions, err := getRerunPropertiesAndDimensions(c, options.SampleBuildID, options.ExtraProperties, options.ExtraDimensions) 64 if err != nil { 65 logging.Errorf(c, "Failed getRerunPropertiesAndDimension for build %d", options.SampleBuildID) 66 return nil, err 67 } 68 req := &buildbucketpb.ScheduleBuildRequest{ 69 Builder: options.Builder, 70 Properties: properties, 71 Dimensions: dimensions, 72 Tags: getRerunTags(c, options.SampleBuildID), 73 GitilesCommit: options.GitilesCommit, 74 Priority: options.Priority, 75 } 76 build, err := buildbucket.ScheduleBuild(c, req) 77 if err != nil { 78 logging.Errorf(c, "Failed trigger rerun for build %d: %w", options.SampleBuildID, err) 79 return nil, err 80 } 81 logging.Infof(c, "Rerun build %d triggered for build: %d", build.GetId(), options.SampleBuildID) 82 return build, nil 83 } 84 85 func validateOptions(options *TriggerOptions) error { 86 if options == nil { 87 return errors.New("option must not be nil") 88 } 89 if options.Builder == nil { 90 return errors.New("builder must not be nil") 91 } 92 if options.GitilesCommit == nil { 93 return errors.New("gitiles commit must not be nil") 94 } 95 if options.SampleBuildID == 0 { 96 return errors.New("sample build id must be specified") 97 } 98 return nil 99 } 100 101 // getRerunTags returns the build bucket tags for the rerun build 102 func getRerunTags(c context.Context, bbid int64) []*buildbucketpb.StringPair { 103 return []*buildbucketpb.StringPair{ 104 { 105 // analyzed_build_id is the buildbucket ID of the build which we want to rerun. 106 Key: "analyzed_build_id", 107 Value: strconv.FormatInt(bbid, 10), 108 }, 109 } 110 } 111 112 // getRerunPropertiesAndDimensions returns the properties and dimensions for a rerun of a buildID. 113 // If the builder is a tester, the dimension will be derived from its parent build. 114 func getRerunPropertiesAndDimensions(c context.Context, bbid int64, props map[string]any, dims map[string]string) (*structpb.Struct, []*buildbucketpb.RequestedDimension, error) { 115 mask := &buildbucketpb.BuildMask{ 116 Fields: &fieldmaskpb.FieldMask{ 117 Paths: []string{"input.properties", "builder", "infra.swarming.task_dimensions", "infra.backend.task_dimensions"}, 118 }, 119 } 120 build, err := buildbucket.GetBuild(c, bbid, mask) 121 if err != nil { 122 return nil, nil, errors.Annotate(err, "failed to get properties for build %d", bbid).Err() 123 } 124 properties, err := getRerunProperties(c, build, props) 125 if err != nil { 126 return nil, nil, err 127 } 128 parentBuildIDStr, found := build.GetInput().GetProperties().GetFields()["parent_build_id"] 129 130 // If builder is not a tester, return the dimension derived by this build. 131 if !found { 132 dimens := getRerunDimensions(c, build, dims) 133 return properties, dimens, nil 134 } 135 parentBuildID, err := strconv.Atoi(parentBuildIDStr.GetStringValue()) 136 if err != nil { 137 return nil, nil, errors.Annotate(err, "parse parent_build_id %s", parentBuildIDStr).Err() 138 } 139 // If builder is a tester, return the dimension derived by the parent build. 140 parentBuild, err := buildbucket.GetBuild(c, int64(parentBuildID), mask) 141 if err != nil { 142 return nil, nil, errors.Annotate(err, "failed to get properties for parent build %d", int64(parentBuildID)).Err() 143 } 144 dimens := getRerunDimensions(c, parentBuild, dims) 145 return properties, dimens, nil 146 } 147 148 func getRerunProperties(c context.Context, build *buildbucketpb.Build, props map[string]any) (*structpb.Struct, error) { 149 fields := map[string]any{} 150 properties := build.GetInput().GetProperties() 151 if properties != nil { 152 m := properties.GetFields() 153 if builderGroup, ok := m["builder_group"]; ok { 154 fields["builder_group"] = builderGroup 155 fields["target_builder"] = map[string]string{ 156 "builder": build.Builder.Builder, 157 "group": builderGroup.GetStringValue(), 158 } 159 } 160 if bootstrapProperties, ok := m["$bootstrap/properties"]; ok { 161 fields["$bootstrap/properties"] = bootstrapProperties 162 } 163 } 164 165 for k, v := range props { 166 fields[k] = v 167 } 168 169 spb, err := toStructPB(fields) 170 if err != nil { 171 return nil, fmt.Errorf("cannot convert %v to structpb: %w", fields, err) 172 } 173 return spb, nil 174 } 175 176 func getRerunDimensions(c context.Context, build *buildbucketpb.Build, dims map[string]string) []*buildbucketpb.RequestedDimension { 177 result := []*buildbucketpb.RequestedDimension{} 178 179 // Only copy these dimensions from the analyzed builder to the rerun job request. 180 allowedDimensions := map[string]bool{"os": true, "gpu": true} 181 182 if dimens := util.GetTaskDimensions(build); dimens != nil { 183 dimens := util.GetTaskDimensions(build) 184 for _, d := range dimens { 185 if _, ok := allowedDimensions[d.Key]; ok { 186 result = append(result, &buildbucketpb.RequestedDimension{ 187 Key: d.Key, 188 Value: d.Value, 189 }) 190 } 191 } 192 } 193 194 // Add extra dimension from dims 195 for k, v := range dims { 196 result = append(result, &buildbucketpb.RequestedDimension{ 197 Key: k, 198 Value: v, 199 }) 200 } 201 202 return result 203 } 204 205 // CreateRerunBuildModel creates a CompileRerunBuild (and SingleRerun) in datastore 206 func CreateRerunBuildModel(c context.Context, build *buildbucketpb.Build, rerunType model.RerunBuildType, suspect *model.Suspect, nsa *model.CompileNthSectionAnalysis, priority int32) (*model.CompileRerunBuild, error) { 207 if rerunType == model.RerunBuildType_CulpritVerification && suspect == nil { 208 return nil, fmt.Errorf("CreateRerunBuildModel requires suspect when type is CulpritVerification") 209 } 210 if rerunType == model.RerunBuildType_NthSection && nsa == nil { 211 return nil, fmt.Errorf("CreateRerunBuildModel requires nth section analysis when type is NthSection") 212 } 213 214 gitilesCommit := *build.GetInput().GetGitilesCommit() 215 startTime := build.StartTime.AsTime() 216 createTime := build.CreateTime.AsTime() 217 rerunBuild := &model.CompileRerunBuild{ 218 Id: build.GetId(), 219 LuciBuild: model.LuciBuild{ 220 BuildId: build.GetId(), 221 Project: build.Builder.Project, 222 Bucket: build.Builder.Bucket, 223 Builder: build.Builder.Builder, 224 CreateTime: createTime, 225 StartTime: startTime, 226 Status: build.GetStatus(), 227 GitilesCommit: buildbucketpb.GitilesCommit{ 228 Host: gitilesCommit.Host, 229 Project: gitilesCommit.Project, 230 Ref: gitilesCommit.Ref, 231 Id: gitilesCommit.Id, 232 }, 233 }, 234 } 235 err := datastore.Put(c, rerunBuild) 236 if err != nil { 237 logging.Errorf(c, "Error in creating CompileRerunBuild model for build %d", build.GetId()) 238 return nil, err 239 } 240 dimensions, err := buildbucket.GetBuildTaskDimension(c, build.GetId()) 241 if err != nil { 242 return nil, errors.Annotate(err, "get build task dimension bbid %v", build.GetId()).Err() 243 } 244 // Create the first SingleRerun for CompileRerunBuild 245 // It will be updated when we receive updates from recipe 246 singleRerun := &model.SingleRerun{ 247 RerunBuild: datastore.KeyForObj(c, rerunBuild), 248 Status: pb.RerunStatus_RERUN_STATUS_IN_PROGRESS, 249 GitilesCommit: buildbucketpb.GitilesCommit{ 250 Host: gitilesCommit.Host, 251 Project: gitilesCommit.Project, 252 Ref: gitilesCommit.Ref, 253 Id: gitilesCommit.Id, 254 }, 255 CreateTime: createTime, 256 StartTime: startTime, 257 Type: rerunType, 258 Priority: priority, 259 Dimensions: dimensions, 260 } 261 262 if rerunType == model.RerunBuildType_CulpritVerification { 263 singleRerun.Analysis = suspect.ParentAnalysis.Parent() 264 singleRerun.Suspect = datastore.KeyForObj(c, suspect) 265 } 266 if rerunType == model.RerunBuildType_NthSection { 267 singleRerun.Analysis = nsa.ParentAnalysis 268 singleRerun.NthSectionAnalysis = datastore.KeyForObj(c, nsa) 269 } 270 271 err = datastore.Put(c, singleRerun) 272 if err != nil { 273 logging.Errorf(c, "Error in creating SingleRerun model for build %d", build.GetId()) 274 return nil, err 275 } 276 277 return rerunBuild, nil 278 } 279 280 // UpdateCompileRerunStatus updates the start/end time and status of rerun builds and single rerun (when we received buildbucket pubsub messages) 281 func UpdateCompileRerunStatus(c context.Context, bbid int64) error { 282 logging.Infof(c, "UpdateCompileRerunStatus for build %d", bbid) 283 rerunModel := &model.CompileRerunBuild{ 284 Id: bbid, 285 } 286 287 err := datastore.Get(c, rerunModel) 288 if err == datastore.ErrNoSuchEntity { 289 // There are cases where we cannot find datastore entries, like 290 // luci-bisection-dev receives pubsub message for a prod run 291 // In this case, just log and return nil 292 logging.Warningf(c, "Couldn't find compile rerun to update status: %d", bbid) 293 return nil 294 } 295 if err != nil { 296 return errors.Annotate(err, "couldn't get rerun model %d", bbid).Err() 297 } 298 299 lastRerun, err := datastoreutil.GetLastRerunForRerunBuild(c, rerunModel) 300 if err != nil { 301 return errors.Annotate(err, "failed getting last rerun for build %d", rerunModel.Id).Err() 302 } 303 304 build, err := buildbucket.GetBuild(c, bbid, &buildbucketpb.BuildMask{ 305 Fields: &fieldmaskpb.FieldMask{ 306 Paths: []string{"id", "builder", "end_time", "start_time", "status"}, 307 }, 308 }) 309 if err != nil { 310 return errors.Annotate(err, "couldn't get build %d", bbid).Err() 311 } 312 313 startTime := build.StartTime.AsTime() 314 endTime := build.EndTime.AsTime() 315 316 err = datastore.RunInTransaction(c, func(ctx context.Context) error { 317 e := datastore.Get(c, rerunModel) 318 if e != nil { 319 return e 320 } 321 rerunModel.StartTime = startTime 322 rerunModel.EndTime = endTime 323 rerunModel.Status = build.Status 324 return datastore.Put(c, rerunModel) 325 }, nil) 326 327 if err != nil { 328 return errors.Annotate(err, "couldn't save rerun model %d", bbid).Err() 329 } 330 331 err = datastore.RunInTransaction(c, func(ctx context.Context) error { 332 e := datastore.Get(c, lastRerun) 333 if e != nil { 334 return e 335 } 336 buildEnded := build.Status&buildbucketpb.Status_ENDED_MASK == buildbucketpb.Status_ENDED_MASK 337 if buildEnded && !lastRerun.HasEnded() { 338 // Edge case: when the build ends but the rerun isn't ended, 339 // this suggests that there is a infra failure in the rerun build 340 // which prevent it from sending back the update via the UpdateAnalysisProgress RPC. 341 // TODO (nqmtuan): Perhaps we need to update Analysis and NthSection analysis status too? 342 lastRerun.Status = pb.RerunStatus_RERUN_STATUS_INFRA_FAILED 343 lastRerun.EndTime = endTime 344 } 345 lastRerun.StartTime = startTime 346 return datastore.Put(c, lastRerun) 347 }, nil) 348 349 if err != nil { 350 return errors.Annotate(err, "failed saving last rerun for build %d", rerunModel.Id).Err() 351 } 352 return nil 353 } 354 355 // UpdateTestRerunStatus is called when we receive updates from buildbucket 356 // for test rerun build. 357 func UpdateTestRerunStatus(ctx context.Context, build *buildbucketpb.Build) error { 358 bbid := build.Id 359 logging.Infof(ctx, "UpdateTestRerunStatus for build %d", bbid) 360 rerunFailed := false 361 singleRerun := &model.TestSingleRerun{ 362 ID: bbid, 363 } 364 365 err := datastore.RunInTransaction(ctx, func(ctx context.Context) error { 366 err := datastore.Get(ctx, singleRerun) 367 if err == datastore.ErrNoSuchEntity { 368 // There are cases where we cannot find datastore entries, like 369 // luci-bisection-dev receives pubsub message for a prod run. 370 // In this case, just log and return nil. 371 logging.Warningf(ctx, "Couldn't find test rerun to update status : %d", bbid) 372 return nil 373 } 374 if err != nil { 375 return errors.Annotate(err, "couldn't get TestSingleRerun %d", bbid).Err() 376 } 377 378 singleRerun.LUCIBuild.StartTime = build.StartTime.AsTime() 379 singleRerun.LUCIBuild.EndTime = build.EndTime.AsTime() 380 singleRerun.LUCIBuild.Status = build.Status 381 buildEnded := build.Status&buildbucketpb.Status_ENDED_MASK == buildbucketpb.Status_ENDED_MASK 382 383 if buildEnded && !singleRerun.HasEnded() { 384 // Edge case: when the build ends but the rerun isn't ended, 385 // this suggests that there is a infra failure in the rerun build 386 // which prevent it from sending back the update via the UpdateTestAnalysisProgress RPC. 387 singleRerun.Status = pb.RerunStatus_RERUN_STATUS_INFRA_FAILED 388 rerunFailed = true 389 } 390 391 err = datastore.Put(ctx, singleRerun) 392 if err != nil { 393 return errors.Annotate(err, "couldn't save single rerun %d", bbid).Err() 394 } 395 return nil 396 }, nil) 397 398 if err != nil { 399 return errors.Annotate(err, "saving test single rerun").Err() 400 } 401 402 if rerunFailed { 403 tfa, err := datastoreutil.GetTestFailureAnalysis(ctx, singleRerun.AnalysisKey.IntID()) 404 if err != nil { 405 return errors.Annotate(err, "get test failure analysis").Err() 406 } 407 // Update analysis and nthsection analysis if applicable. 408 // The reason why we put it here instead of the above transaction 409 // was because read-after-write within a transaction does not work. 410 // (https://cloud.google.com/datastore/docs/concepts/transactions#isolation_and_consistency) 411 err = testfailureanalysis.UpdateAnalysisStatusWhenError(ctx, tfa) 412 if err != nil { 413 return errors.Annotate(err, "update analysis status when error").Err() 414 } 415 } 416 return nil 417 } 418 419 // TODO (nqmtuan): Move this into a helper class if it turns out we need to use 420 // it for more than one place 421 // toStructPB convert an any s to structpb.Struct, as long as s is marshallable. 422 // s can be a general Go type, structpb.Struct type, or mixed. 423 // For example, s can be a map of mixed type, like 424 // {"key1": "val1", "key2": structpb.NewStringValue("val2")} 425 func toStructPB(s any) (*structpb.Struct, error) { 426 // We used json as an intermediate format to convert 427 j, err := json.Marshal(s) 428 if err != nil { 429 return nil, err 430 } 431 var m map[string]any 432 if err := json.Unmarshal(j, &m); err != nil { 433 return nil, err 434 } 435 return structpb.NewStruct(m) 436 }