go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/bisection/compilefailureanalysis/compile_failure_analysis.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package compilefailureanalysis is the component for analyzing 16 // compile failures. 17 // It has 2 main components: heuristic analysis and nth_section analysis 18 package compilefailureanalysis 19 20 import ( 21 "context" 22 "fmt" 23 24 "go.chromium.org/luci/bisection/compilefailureanalysis/compilelog" 25 "go.chromium.org/luci/bisection/compilefailureanalysis/heuristic" 26 "go.chromium.org/luci/bisection/compilefailureanalysis/nthsection" 27 "go.chromium.org/luci/bisection/compilefailureanalysis/statusupdater" 28 "go.chromium.org/luci/bisection/culpritverification" 29 "go.chromium.org/luci/bisection/internal/buildbucket" 30 "go.chromium.org/luci/bisection/internal/lucinotify" 31 "go.chromium.org/luci/bisection/model" 32 pb "go.chromium.org/luci/bisection/proto/v1" 33 "go.chromium.org/luci/bisection/util/datastoreutil" 34 "go.chromium.org/luci/bisection/util/loggingutil" 35 36 "go.chromium.org/luci/common/clock" 37 "go.chromium.org/luci/common/errors" 38 "go.chromium.org/luci/common/logging" 39 "go.chromium.org/luci/gae/service/datastore" 40 ) 41 42 // AnalyzeFailure receives failure information and perform analysis. 43 // Note that this assumes that the failure is new (i.e. the client of this 44 // function should make sure this is not a duplicate analysis) 45 func AnalyzeFailure( 46 c context.Context, 47 cf *model.CompileFailure, 48 firstFailedBuildID int64, 49 lastPassedBuildID int64, 50 ) (*model.CompileFailureAnalysis, error) { 51 logging.Infof(c, "AnalyzeFailure firstFailed = %d", firstFailedBuildID) 52 regressionRange, e := findRegressionRange(c, firstFailedBuildID, lastPassedBuildID) 53 if e != nil { 54 return nil, e 55 } 56 57 logging.Infof(c, "Regression range: %v", regressionRange) 58 59 // Get failed targets 60 compileLogs, e := compilelog.GetCompileLogs(c, firstFailedBuildID) 61 if e != nil { 62 return nil, e 63 } 64 failedTargets := compilelog.GetFailedTargets(compileLogs) 65 66 e = datastore.RunInTransaction(c, func(c context.Context) error { 67 e := datastore.Get(c, cf) 68 if e != nil { 69 return e 70 } 71 cf.OutputTargets = failedTargets 72 return datastore.Put(c, cf) 73 }, nil) 74 75 if e != nil { 76 return nil, e 77 } 78 79 // Creates a new CompileFailureAnalysis entity in datastore 80 analysis := &model.CompileFailureAnalysis{ 81 CompileFailure: datastore.KeyForObj(c, cf), 82 CreateTime: clock.Now(c), 83 Status: pb.AnalysisStatus_RUNNING, 84 RunStatus: pb.AnalysisRunStatus_STARTED, 85 FirstFailedBuildId: firstFailedBuildID, 86 LastPassedBuildId: lastPassedBuildID, 87 InitialRegressionRange: regressionRange, 88 } 89 90 e = datastore.Put(c, analysis) 91 if e != nil { 92 return nil, e 93 } 94 c = loggingutil.SetAnalysisID(c, analysis.Id) 95 96 // Check if the analysis is for tree closer, if yes, set the flag. 97 err := setTreeCloser(c, analysis) 98 if err != nil { 99 // Non-critical, just continue 100 err := errors.Annotate(err, "failed to check tree closer").Err() 101 logging.Errorf(c, err.Error()) 102 } 103 104 // Heuristic analysis 105 heuristicResult, e := heuristic.Analyze(c, analysis, regressionRange, compileLogs) 106 if e != nil { 107 // As this is only heuristic analysis, we log the error and continue with nthsection analysis 108 logging.Errorf(c, "Error during heuristic analysis for build %d: %v", firstFailedBuildID, e) 109 } 110 111 // If heuristic analysis does not return error, we proceed to verify its results (if any) 112 if e == nil { 113 shouldRunCulpritVerification, err := culpritverification.ShouldRunCulpritVerification(c, analysis) 114 if err != nil { 115 return nil, errors.Annotate(err, "couldn't fetch config for culprit verification. Build %d", firstFailedBuildID).Err() 116 } 117 if shouldRunCulpritVerification { 118 if !analysis.ShouldCancel { 119 if err := verifyHeuristicResults(c, heuristicResult, firstFailedBuildID, analysis.Id); err != nil { 120 // Do not return error here, just log 121 logging.Errorf(c, "Error verifying heuristic result for build %d: %s", firstFailedBuildID, err) 122 } 123 } 124 } 125 } 126 127 // Nth-section analysis 128 shouldRunNthSection, err := nthsection.ShouldRunNthSectionAnalysis(c, analysis) 129 if err != nil { 130 return nil, errors.Annotate(err, "couldn't fetch config for nthsection. Build %d", firstFailedBuildID).Err() 131 } 132 if shouldRunNthSection { 133 _, e = nthsection.Analyze(c, analysis) 134 if e != nil { 135 e = errors.Annotate(e, "error during nthsection analysis for build %d", firstFailedBuildID).Err() 136 logging.Errorf(c, e.Error()) 137 } 138 } 139 140 // Update status of analysis 141 err = statusupdater.UpdateAnalysisStatus(c, analysis) 142 if err != nil { 143 return nil, errors.Annotate(err, "couldn't update analysis status. Build %d", firstFailedBuildID).Err() 144 } 145 146 return analysis, nil 147 } 148 149 // verifyHeuristicResults verifies if the suspects of heuristic analysis are the real culprit. 150 // analysisID is CompileFailureAnalysis ID. It is meant to be propagated all the way to the 151 // recipe, so we can identify the analysis in buildbucket. 152 func verifyHeuristicResults(c context.Context, heuristicAnalysis *model.CompileHeuristicAnalysis, failedBuildID int64, analysisID int64) error { 153 // TODO (nqmtuan): Move the verification into a task queue 154 suspects, err := getHeuristicSuspectsToVerify(c, heuristicAnalysis) 155 if err != nil { 156 return err 157 } 158 for _, suspect := range suspects { 159 err := culpritverification.VerifySuspect(c, suspect, failedBuildID, analysisID) 160 if err != nil { 161 // Just log the error and continue for other suspects 162 logging.Errorf(c, "Error in verifying suspect %d for analysis %d", suspect.Id, analysisID) 163 } 164 } 165 return nil 166 } 167 168 // In case heuristic analysis returns too many results, we don't want to verify all of them. 169 // Instead, we want to be selective in what we want to verify. 170 // For now, we will just take top 3 results of heuristic analysis. 171 func getHeuristicSuspectsToVerify(c context.Context, heuristicAnalysis *model.CompileHeuristicAnalysis) ([]*model.Suspect, error) { 172 // Getting the suspects for heuristic analysis 173 suspects := []*model.Suspect{} 174 q := datastore.NewQuery("Suspect").Ancestor(datastore.KeyForObj(c, heuristicAnalysis)).Order("-score") 175 err := datastore.GetAll(c, q, &suspects) 176 if err != nil { 177 return nil, err 178 } 179 180 // Get top 3 suspects to verify 181 nSuspects := 3 182 if nSuspects > len(suspects) { 183 nSuspects = len(suspects) 184 } 185 return suspects[:nSuspects], nil 186 } 187 188 // findRegressionRange takes in the first failed and last passed buildID 189 // and returns the regression range based on GitilesCommit. 190 func findRegressionRange( 191 c context.Context, 192 firstFailedBuildID int64, 193 lastPassedBuildID int64, 194 ) (*pb.RegressionRange, error) { 195 firstFailedBuild, err := buildbucket.GetBuild(c, firstFailedBuildID, nil) 196 if err != nil { 197 return nil, fmt.Errorf("error getting build %d: %w", firstFailedBuildID, err) 198 } 199 200 lastPassedBuild, err := buildbucket.GetBuild(c, lastPassedBuildID, nil) 201 if err != nil { 202 return nil, fmt.Errorf("error getting build %d: %w", lastPassedBuildID, err) 203 } 204 205 if firstFailedBuild.GetInput().GetGitilesCommit() == nil || lastPassedBuild.GetInput().GetGitilesCommit() == nil { 206 return nil, fmt.Errorf("couldn't get gitiles commit for builds (%d, %d)", lastPassedBuildID, firstFailedBuildID) 207 } 208 209 return &pb.RegressionRange{ 210 FirstFailed: firstFailedBuild.GetInput().GetGitilesCommit(), 211 LastPassed: lastPassedBuild.GetInput().GetGitilesCommit(), 212 }, nil 213 } 214 215 // setTreeCloser checks and updates the analysis if it is for a treecloser failure. 216 func setTreeCloser(c context.Context, cfa *model.CompileFailureAnalysis) error { 217 fb, err := datastoreutil.GetBuild(c, cfa.CompileFailure.Parent().IntID()) 218 if err != nil { 219 return errors.Annotate(err, "getBuild").Err() 220 } 221 if fb == nil { 222 return fmt.Errorf("couldn't find build for analysis %d", cfa.Id) 223 } 224 225 // TODO (nqmtuan): Pass in step name when we support arbitrary 226 // step name which may not be "compile" 227 isTreeCloser, err := lucinotify.CheckTreeCloser(c, fb.Project, fb.Bucket, fb.Builder, "compile") 228 if err != nil { 229 return err 230 } 231 232 return datastore.RunInTransaction(c, func(c context.Context) error { 233 e := datastore.Get(c, cfa) 234 if e != nil { 235 return e 236 } 237 cfa.IsTreeCloser = isTreeCloser 238 return datastore.Put(c, cfa) 239 }, nil) 240 }