go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/bisection/culpritverification/verify_culprit.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package culpritverification verifies if a suspect is a culprit. 16 package culpritverification 17 18 import ( 19 "context" 20 "fmt" 21 22 "google.golang.org/protobuf/proto" 23 24 "go.chromium.org/luci/bisection/compilefailureanalysis/heuristic" 25 "go.chromium.org/luci/bisection/compilefailureanalysis/statusupdater" 26 cpvt "go.chromium.org/luci/bisection/culpritverification/task" 27 "go.chromium.org/luci/bisection/internal/config" 28 "go.chromium.org/luci/bisection/internal/gitiles" 29 "go.chromium.org/luci/bisection/model" 30 pb "go.chromium.org/luci/bisection/proto/v1" 31 "go.chromium.org/luci/bisection/rerun" 32 "go.chromium.org/luci/bisection/util" 33 "go.chromium.org/luci/bisection/util/datastoreutil" 34 "go.chromium.org/luci/bisection/util/loggingutil" 35 36 taskpb "go.chromium.org/luci/bisection/task/proto" 37 buildbucketpb "go.chromium.org/luci/buildbucket/proto" 38 "go.chromium.org/luci/common/errors" 39 "go.chromium.org/luci/common/logging" 40 "go.chromium.org/luci/common/retry/transient" 41 "go.chromium.org/luci/gae/service/datastore" 42 "go.chromium.org/luci/gae/service/info" 43 ) 44 45 // RegisterTaskClass registers the task class for tq dispatcher 46 func RegisterTaskClass() { 47 compileHandler := func(ctx context.Context, payload proto.Message) error { 48 task := payload.(*taskpb.CulpritVerificationTask) 49 analysisID := task.GetAnalysisId() 50 suspectID := task.GetSuspectId() 51 parentKey := task.GetParentKey() 52 return handleTQError(ctx, processCulpritVerificationTask(ctx, analysisID, suspectID, parentKey)) 53 } 54 testHandler := func(ctx context.Context, payload proto.Message) error { 55 task := payload.(*taskpb.TestFailureCulpritVerificationTask) 56 return handleTQError(ctx, processTestFailureTask(ctx, task)) 57 } 58 cpvt.RegisterTaskClass(compileHandler, testHandler) 59 } 60 61 func handleTQError(ctx context.Context, err error) error { 62 if err != nil { 63 err := errors.Annotate(err, "run culprit verification").Err() 64 logging.Errorf(ctx, err.Error()) 65 // If the error is transient, return err to retry 66 if transient.Tag.In(err) { 67 return err 68 } 69 return nil 70 } 71 return nil 72 } 73 74 func processCulpritVerificationTask(c context.Context, analysisID int64, suspectID int64, parentKeyStr string) error { 75 c, err := loggingutil.UpdateLoggingWithAnalysisID(c, analysisID) 76 if err != nil { 77 // not critical, just log 78 err := errors.Annotate(err, "failed UpdateLoggingWithAnalysisID %d", analysisID) 79 logging.Errorf(c, "%v", err) 80 } 81 82 cfa, err := datastoreutil.GetCompileFailureAnalysis(c, analysisID) 83 if err != nil { 84 return errors.Annotate(err, "failed getting CompileFailureAnalysis").Err() 85 } 86 87 parentKey, err := datastore.NewKeyEncoded(parentKeyStr) 88 if err != nil { 89 return errors.Annotate(err, "couldn't decode parent key for suspect").Err() 90 } 91 92 suspect, err := datastoreutil.GetSuspect(c, suspectID, parentKey) 93 if err != nil { 94 return errors.Annotate(err, "couldn't get suspect").Err() 95 } 96 return VerifySuspect(c, suspect, cfa.FirstFailedBuildId, analysisID) 97 } 98 99 // VerifySuspect verifies if a suspect is indeed the culprit. 100 // analysisID is CompileFailureAnalysis ID. It is meant to be propagated all the way to the 101 // recipe, so we can identify the analysis in buildbucket. 102 func VerifySuspect(c context.Context, suspect *model.Suspect, failedBuildID int64, analysisID int64) error { 103 logging.Infof(c, "Verifying suspect %d for build %d", datastore.KeyForObj(c, suspect).IntID(), failedBuildID) 104 105 // Check if the analysis has found any culprits, if yes, exit early 106 cfa, err := datastoreutil.GetCompileFailureAnalysis(c, analysisID) 107 if err != nil { 108 return err 109 } 110 111 defer updateSuspectStatus(c, suspect, cfa) 112 113 if len(cfa.VerifiedCulprits) > 0 { 114 logging.Infof(c, "culprit found for analysis %d, no need to trigger any verification runs", analysisID) 115 return nil 116 } 117 118 // Check if there is any suspect with the same commit being verified 119 // If yes, we don't run verification for this suspect anymore 120 suspectExist, err := checkSuspectWithSameCommitExist(c, cfa, suspect) 121 if err != nil { 122 return errors.Annotate(err, "checkSuspectWithSameCommitExist").Err() 123 } 124 if suspectExist { 125 return nil 126 } 127 128 // Get failed compile targets 129 compileFailure, err := datastoreutil.GetCompileFailureForAnalysisID(c, analysisID) 130 if err != nil { 131 return err 132 } 133 failedTargets := compileFailure.OutputTargets 134 135 // Get the changelog for the suspect 136 repoURL := gitiles.GetRepoUrl(c, &suspect.GitilesCommit) 137 changeLogs, err := gitiles.GetChangeLogsForSingleRevision(c, repoURL, suspect.GitilesCommit.Id) 138 if err != nil { 139 // This is non-critical, we just log and continue 140 logging.Errorf(c, "Cannot get changelog for revision %s: %s", suspect.GitilesCommit.Id, err) 141 } else { 142 // Check if any failed files is newly added in the change log. 143 // If it is the case, the parent revision cannot compile failed targets. 144 // In such cases, we do not pass the failed targets to recipe, instead 145 // we will compile all targets. 146 if hasNewTarget(c, compileFailure.FailedFiles, changeLogs) { 147 failedTargets = []string{} 148 } 149 } 150 151 // Get rerun build property 152 props := map[string]any{ 153 "analysis_id": analysisID, 154 "bisection_host": fmt.Sprintf("%s.appspot.com", info.AppID(c)), 155 // For culprit verification, we should remove builder cache 156 "should_clobber": true, 157 } 158 if len(failedTargets) > 0 { 159 props["compile_targets"] = failedTargets 160 } 161 162 // Verify the suspect 163 priority, err := getSuspectPriority(c, suspect) 164 if err != nil { 165 return errors.Annotate(err, "failed getting priority").Err() 166 } 167 168 // TODO(nqmtuan): Pass in the project. 169 // For now, hardcode to "chromium", since we only support chromium for compile failure. 170 suspectBuild, parentBuild, err := VerifySuspectCommit(c, "chromium", suspect, failedBuildID, props, priority) 171 if err != nil { 172 logging.Errorf(c, "Error triggering rerun for build %d: %s", failedBuildID, err) 173 return err 174 } 175 suspectRerunBuildModel, err := rerun.CreateRerunBuildModel(c, suspectBuild, model.RerunBuildType_CulpritVerification, suspect, nil, priority) 176 if err != nil { 177 return err 178 } 179 180 parentRerunBuildModel, err := rerun.CreateRerunBuildModel(c, parentBuild, model.RerunBuildType_CulpritVerification, suspect, nil, priority) 181 if err != nil { 182 return err 183 } 184 185 err = datastore.RunInTransaction(c, func(ctx context.Context) error { 186 e := datastore.Get(c, suspect) 187 if e != nil { 188 return e 189 } 190 suspect.VerificationStatus = model.SuspectVerificationStatus_UnderVerification 191 suspect.SuspectRerunBuild = datastore.KeyForObj(c, suspectRerunBuildModel) 192 suspect.ParentRerunBuild = datastore.KeyForObj(c, parentRerunBuildModel) 193 return datastore.Put(c, suspect) 194 }, nil) 195 196 if err != nil { 197 return err 198 } 199 return nil 200 } 201 202 func checkSuspectWithSameCommitExist(c context.Context, cfa *model.CompileFailureAnalysis, suspect *model.Suspect) (bool, error) { 203 suspects, err := datastoreutil.FetchSuspectsForAnalysis(c, cfa) 204 if err != nil { 205 return false, errors.Annotate(err, "fetchSuspectsForAnalysis").Err() 206 } 207 for _, s := range suspects { 208 // Need to be of different suspect 209 if s.Id != suspect.Id { 210 if s.GitilesCommit.Id == suspect.GitilesCommit.Id { 211 if s.VerificationStatus != model.SuspectVerificationStatus_Unverified { 212 return true, nil 213 } 214 } 215 } 216 } 217 return false, nil 218 } 219 220 func hasNewTarget(c context.Context, failedFiles []string, changelog *model.ChangeLog) bool { 221 for _, file := range failedFiles { 222 for _, diff := range changelog.ChangeLogDiffs { 223 if diff.Type == model.ChangeType_ADD || diff.Type == model.ChangeType_COPY || diff.Type == model.ChangeType_RENAME { 224 if heuristic.IsSameFile(diff.NewPath, file) { 225 return true 226 } 227 } 228 } 229 } 230 return false 231 } 232 233 // VerifyCommit checks if a commit is the culprit of a build failure. 234 // Returns 2 builds: 235 // - The 1st build is the rerun build for the commit 236 // - The 2nd build is the rerun build for the parent commit 237 func VerifySuspectCommit(c context.Context, project string, suspect *model.Suspect, failedBuildID int64, props map[string]any, priority int32) (*buildbucketpb.Build, *buildbucketpb.Build, error) { 238 commit := &suspect.GitilesCommit 239 240 // Query Gitiles to get parent commit 241 parentCommit, err := getParentCommit(c, commit) 242 if err != nil { 243 return nil, nil, errors.Annotate(err, "get parent commit for commit %s", commit.Id).Err() 244 } 245 builder, err := config.GetCompileBuilder(c, project) 246 if err != nil { 247 return nil, nil, errors.Annotate(err, "get compile builder").Err() 248 } 249 options := &rerun.TriggerOptions{ 250 Builder: util.BuilderFromConfigBuilder(builder), 251 GitilesCommit: commit, 252 SampleBuildID: failedBuildID, 253 ExtraProperties: props, 254 ExtraDimensions: nil, 255 Priority: priority, 256 } 257 // Trigger a rerun with commit and parent commit 258 build1, err := rerun.TriggerRerun(c, options) 259 if err != nil { 260 return nil, nil, err 261 } 262 263 options.GitilesCommit = parentCommit 264 build2, err := rerun.TriggerRerun(c, options) 265 if err != nil { 266 return nil, nil, err 267 } 268 269 return build1, build2, nil 270 } 271 272 func getSuspectPriority(c context.Context, suspect *model.Suspect) (int32, error) { 273 // TODO (nqmtuan): Support priority for nth-section case 274 // For now let's return the baseline for culprit verification 275 // We can add offset later 276 confidence := heuristic.GetConfidenceLevel(suspect.Score) 277 var pri int32 = 0 278 switch confidence { 279 case pb.SuspectConfidenceLevel_HIGH: 280 pri = rerun.PriorityCulpritVerificationHighConfidence 281 case pb.SuspectConfidenceLevel_MEDIUM: 282 pri = rerun.PriorityCulpritVerificationMediumConfidence 283 case pb.SuspectConfidenceLevel_LOW: 284 pri = rerun.PriorityCulpritVerificationLowConfidence 285 } 286 287 // Check if the same suspect has any running build 288 otherSuspects, err := datastoreutil.GetOtherSuspectsWithSameCL(c, suspect) 289 if err != nil { 290 return 0, errors.Annotate(err, "failed GetOtherSuspectsWithSameCL %d", suspect.Id).Err() 291 } 292 293 // If there is a running/finished suspect run -> lower priority of this run 294 for _, s := range otherSuspects { 295 if s.VerificationStatus == model.SuspectVerificationStatus_UnderVerification || s.VerificationStatus == model.SuspectVerificationStatus_ConfirmedCulprit || s.VerificationStatus == model.SuspectVerificationStatus_Vindicated { 296 pri += rerun.PriorityAnotherVerificationBuildExistOffset 297 break 298 } 299 } 300 301 // Offset the priority based on run duration 302 cfa, err := datastoreutil.GetCompileFailureAnalysis(c, suspect.ParentAnalysis.Parent().IntID()) 303 if err != nil { 304 return 0, errors.Annotate(err, "couldn't get analysis for suspect %d", suspect.Id).Err() 305 } 306 pri, err = rerun.OffsetPriorityBasedOnRunDuration(c, pri, cfa) 307 if err != nil { 308 return 0, errors.Annotate(err, "couldn't OffsetPriorityBasedOnRunDuration for suspect %d", suspect.Id).Err() 309 } 310 311 // Offset the priority if it is a tree closer 312 if cfa.IsTreeCloser { 313 pri += rerun.PriorityTreeClosureOffset 314 } 315 316 return rerun.CapPriority(pri), nil 317 } 318 319 func updateSuspectStatus(c context.Context, suspect *model.Suspect, cfa *model.CompileFailureAnalysis) { 320 // If after VerifySuspect, the suspect verification status is not 321 // SuspectVerificationStatus_UnderVerification, it means no reruns have been scheduled 322 // so we should set the status back to SuspectVerificationStatus_Unverified 323 if suspect.VerificationStatus != model.SuspectVerificationStatus_UnderVerification { 324 err := datastore.RunInTransaction(c, func(c context.Context) error { 325 // Update suspect status 326 e := datastore.Get(c, suspect) 327 if e != nil { 328 return e 329 } 330 suspect.VerificationStatus = model.SuspectVerificationStatus_Unverified 331 return datastore.Put(c, suspect) 332 }, nil) 333 334 if err != nil { 335 logging.Errorf(c, errors.Annotate(err, "set suspect verification status").Err().Error()) 336 } 337 // Also update the analysis status this case, because 338 // the analysis may ended, given the suspect is no longer under verification 339 err = statusupdater.UpdateAnalysisStatus(c, cfa) 340 if err != nil { 341 logging.Errorf(c, errors.Annotate(err, "set analysis status").Err().Error()) 342 } 343 } 344 } 345 346 func ShouldRunCulpritVerification(c context.Context, cfa *model.CompileFailureAnalysis) (bool, error) { 347 project, err := datastoreutil.GetProjectForCompileFailureAnalysis(c, cfa) 348 if err != nil { 349 return false, errors.Annotate(err, "get project for compile failure analysis").Err() 350 } 351 cfg, err := config.Project(c, project) 352 if err != nil { 353 return false, errors.Annotate(err, "config project").Err() 354 } 355 return cfg.CompileAnalysisConfig.CulpritVerificationEnabled, nil 356 } 357 358 func getParentCommit(ctx context.Context, commit *buildbucketpb.GitilesCommit) (*buildbucketpb.GitilesCommit, error) { 359 repoURL := gitiles.GetRepoUrl(ctx, commit) 360 p, err := gitiles.GetParentCommit(ctx, repoURL, commit.Id) 361 if err != nil { 362 return nil, err 363 } 364 return &buildbucketpb.GitilesCommit{ 365 Host: commit.Host, 366 Project: commit.Project, 367 Ref: commit.Ref, 368 Id: p, 369 }, nil 370 }