go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/bisection/compilefailureanalysis/heuristic/changelog_analyzer.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package heuristic 16 17 import ( 18 "context" 19 "fmt" 20 "path/filepath" 21 "strings" 22 23 "go.chromium.org/luci/bisection/model" 24 "go.chromium.org/luci/bisection/util" 25 26 "go.chromium.org/luci/common/logging" 27 ) 28 29 // ScoringCriteria represents how we score in the heuristic analysis. 30 type ScoringCriteria struct { 31 // The score if the suspect touched the same file in the failure log. 32 TouchedSameFile int 33 // The score if the suspect touched a related file to a file in the failure log. 34 TouchedRelatedFile int 35 // The score if the suspect touched the same file and the same line as in the failure log. 36 TouchedSameLine int 37 } 38 39 // AnalyzeChangeLogs analyzes the changelogs based on the failure signals. 40 // Returns a dictionary that maps the commits and the result found. 41 func AnalyzeChangeLogs(c context.Context, signal *model.CompileFailureSignal, changelogs []*model.ChangeLog) (*model.HeuristicAnalysisResult, error) { 42 result := &model.HeuristicAnalysisResult{} 43 for _, changelog := range changelogs { 44 justification, err := AnalyzeOneChangeLog(c, signal, changelog) 45 commit := changelog.Commit 46 if err != nil { 47 logging.Errorf(c, "Error analyzing change log for commit %s. Error: %w", commit, err) 48 continue 49 } 50 51 // We only care about the relevant CLs 52 if justification.GetScore() <= 0 { 53 continue 54 } 55 56 reviewUrl, err := changelog.GetReviewUrl() 57 if err != nil { 58 logging.Errorf(c, "Error getting review URL for commit: %s. Error: %w", commit, err) 59 continue 60 } 61 reviewTitle, err := changelog.GetReviewTitle() 62 if err != nil { 63 // Just log the error from getting the review title - suspect should still be added 64 logging.Errorf(c, "Error getting review title for commit: %s. Error: %w", commit, err) 65 } 66 result.AddItem(commit, reviewUrl, reviewTitle, justification) 67 } 68 result.Sort() 69 return result, nil 70 } 71 72 // AnalyzeOneChangeLog analyzes one changelog(revision) and returns the 73 // justification of how likely that changelog is the culprit. 74 func AnalyzeOneChangeLog(c context.Context, signal *model.CompileFailureSignal, changelog *model.ChangeLog) (*model.SuspectJustification, error) { 75 // TODO (crbug.com/1295566): check DEPs file as well, if the CL touches DEPs. 76 // This is a nice-to-have feature, and is an edge case. 77 justification := &model.SuspectJustification{} 78 author := changelog.Author.Email 79 for _, email := range getNonBlamableEmail() { 80 if email == author { 81 return &model.SuspectJustification{IsNonBlamable: true}, nil 82 } 83 } 84 85 // Check files and line number extracted from output 86 criteria := &ScoringCriteria{ 87 TouchedSameFile: 10, 88 TouchedRelatedFile: 2, 89 TouchedSameLine: 20, 90 } 91 for file, lines := range signal.Files { 92 for _, diff := range changelog.ChangeLogDiffs { 93 e := updateJustification(c, justification, file, lines, diff, criteria, model.JustificationType_FAILURELOG) 94 if e != nil { 95 return nil, e 96 } 97 } 98 } 99 100 // Check for dependency. 101 criteria = &ScoringCriteria{ 102 TouchedSameFile: 2, 103 TouchedRelatedFile: 1, 104 } 105 106 // Calculate the score for dependencies using the DependencyMap 107 for _, diff := range changelog.ChangeLogDiffs { 108 oldPathName := util.GetCanonicalFileName(diff.OldPath) 109 newPathName := util.GetCanonicalFileName(diff.NewPath) 110 // Only check the dependency if either the old file or new file exists in the map 111 oldPathDeps, oldPathOk := signal.DependencyMap[oldPathName] 112 newPathDeps, newPathOk := signal.DependencyMap[newPathName] 113 if oldPathOk || newPathOk { 114 // Only process modified files once 115 deps := oldPathDeps 116 if oldPathName != newPathName { 117 deps = append(oldPathDeps, newPathDeps...) 118 } 119 for _, dep := range deps { 120 e := updateJustification(c, justification, dep, []int{}, diff, criteria, model.JustificationType_DEPENDENCY) 121 if e != nil { 122 return nil, e 123 } 124 } 125 } 126 } 127 128 justification.Sort() 129 return justification, nil 130 } 131 132 func updateJustification(c context.Context, justification *model.SuspectJustification, fileInLog string, lines []int, diff model.ChangeLogDiff, criteria *ScoringCriteria, justificationType model.JustificationType) error { 133 // TODO (crbug.com/1295566): In case of MODIFY, also query Gitiles for the 134 // changed region and compared with lines. If they intersect, increase the score. 135 // This may lead to a better score indicator. 136 137 // Get the relevant file paths from CLs 138 relevantFilePaths := []string{} 139 switch diff.Type { 140 case model.ChangeType_ADD, model.ChangeType_COPY, model.ChangeType_MODIFY: 141 relevantFilePaths = append(relevantFilePaths, diff.NewPath) 142 case model.ChangeType_RENAME: 143 relevantFilePaths = append(relevantFilePaths, diff.NewPath, diff.OldPath) 144 case model.ChangeType_DELETE: 145 relevantFilePaths = append(relevantFilePaths, diff.OldPath) 146 default: 147 return fmt.Errorf("Unsupported diff type %s", diff.Type) 148 } 149 for _, filePath := range relevantFilePaths { 150 score := 0 151 reason := "" 152 if IsSameFile(filePath, fileInLog) { 153 score = criteria.TouchedSameFile 154 reason = getReasonSameFile(filePath, diff.Type, justificationType) 155 } else if IsRelated(filePath, fileInLog) { 156 score = criteria.TouchedRelatedFile 157 reason = getReasonRelatedFile(filePath, diff.Type, fileInLog, justificationType) 158 } 159 if score > 0 { 160 justification.AddItem(score, filePath, reason, justificationType) 161 } 162 } 163 return nil 164 } 165 166 func getReasonSameFile(filePath string, changeType model.ChangeType, justificationType model.JustificationType) string { 167 m := getChangeTypeActionMap() 168 action := m[string(changeType)] 169 switch justificationType { 170 case model.JustificationType_FAILURELOG: 171 return fmt.Sprintf("The file \"%s\" was %s and it was in the failure log.", filePath, action) 172 case model.JustificationType_DEPENDENCY: 173 return fmt.Sprintf("The file \"%s\" was %s and it was in the dependency.", filePath, action) 174 default: 175 return "" 176 } 177 } 178 179 func getReasonRelatedFile(filePath string, changeType model.ChangeType, relatedFile string, justificationType model.JustificationType) string { 180 m := getChangeTypeActionMap() 181 action := m[string(changeType)] 182 switch justificationType { 183 case model.JustificationType_FAILURELOG: 184 return fmt.Sprintf("The file \"%s\" was %s. It was related to the file %s which was in the failure log.", filePath, action, relatedFile) 185 case model.JustificationType_DEPENDENCY: 186 return fmt.Sprintf("The file \"%s\" was %s. It was related to the dependency %s.", filePath, action, relatedFile) 187 default: 188 return "" 189 } 190 } 191 192 func getChangeTypeActionMap() map[string]string { 193 return map[string]string{ 194 model.ChangeType_ADD: "added", 195 model.ChangeType_COPY: "copied", 196 model.ChangeType_RENAME: "renamed", 197 model.ChangeType_MODIFY: "modified", 198 model.ChangeType_DELETE: "deleted", 199 } 200 } 201 202 // IsSameFile makes the best effort in guessing if the file in the failure log 203 // is the same as the file in the changelog or not. 204 // Args: 205 // fullFilePath: Full path of a file committed to git repo. 206 // fileInLog: File path appearing in a failure log. It may or may not be a full path. 207 // Example: 208 // ("chrome/test/base/chrome_process_util.h", "base/chrome_process_util.h") -> True 209 // ("a/b/x.cc", "a/b/x.cc") -> True 210 // ("c/x.cc", "a/b/c/x.cc") -> False 211 func IsSameFile(fullFilePath string, fileInLog string) bool { 212 // In some cases, fileInLog is prepended with "src/", we want a relative path to src/ 213 fileInLog = strings.TrimPrefix(fileInLog, "src/") 214 if fileInLog == fullFilePath { 215 return true 216 } 217 return strings.HasSuffix(fullFilePath, fmt.Sprintf("/%s", fileInLog)) 218 } 219 220 // IsRelated checks if 2 files are related. 221 // Example: 222 // file.h <-> file_impl.cc 223 // x.h <-> x.cc 224 func IsRelated(fullFilePath string, fileInLog string) bool { 225 filePathExt := strings.TrimPrefix(filepath.Ext(fullFilePath), ".") 226 fileInLogExt := strings.TrimPrefix(filepath.Ext(fileInLog), ".") 227 if !AreRelelatedExtensions(filePathExt, fileInLogExt) { 228 return false 229 } 230 231 if strings.HasSuffix(fileInLog, ".o") || strings.HasSuffix(fileInLog, ".obj") { 232 fileInLog = NormalizeObjectFilePath(fileInLog) 233 } 234 235 if IsSameFile(util.StripExtensionAndCommonSuffixFromFilePath(fullFilePath), util.StripExtensionAndCommonSuffixFromFilePath(fileInLog)) { 236 return true 237 } 238 239 return false 240 } 241 242 // NormalizeObjectFilePath normalizes the file path to an c/c++ object file. 243 // During compile, a/b/c/file.cc in TARGET will be compiled into object file 244 // obj/a/b/c/TARGET.file.o, thus 'obj/' and TARGET need to be removed from path. 245 func NormalizeObjectFilePath(filePath string) string { 246 if !(strings.HasSuffix(filePath, ".o") || strings.HasSuffix(filePath, ".obj")) { 247 return filePath 248 } 249 filePath = strings.TrimPrefix(filePath, "obj/") 250 dir := filepath.Dir(filePath) 251 fileName := filepath.Base(filePath) 252 parts := strings.Split(fileName, ".") 253 if len(parts) == 3 { 254 // Special cases for file.cc.obj and similar cases 255 if parts[1] != "c" && parts[1] != "cc" && parts[1] != "cpp" && parts[1] != "m" && parts[1] != "mm" { 256 fileName = fmt.Sprintf("%s.%s", parts[1], parts[2]) 257 } 258 } else if len(parts) > 3 { 259 fileName = strings.Join(parts[1:], ".") 260 } 261 if dir == "." { 262 return fileName 263 } 264 return fmt.Sprintf("%s/%s", dir, fileName) 265 } 266 267 // AreRelelatedExtensions checks if 2 extensions are related 268 func AreRelelatedExtensions(ext1 string, ext2 string) bool { 269 relations := [][]string{ 270 {"h", "hh", "c", "cc", "cpp", "m", "mm", "o", "obj"}, 271 {"py", "pyc"}, 272 {"gyp", "gypi"}, 273 } 274 for _, group := range relations { 275 found1 := false 276 found2 := false 277 for _, ext := range group { 278 if ext == ext1 { 279 found1 = true 280 } 281 if ext == ext2 { 282 found2 = true 283 } 284 } 285 if found1 && found2 { 286 return true 287 } 288 } 289 return false 290 } 291 292 // getNonBlamableEmail returns emails whose changes should never be flagged as culprits. 293 func getNonBlamableEmail() []string { 294 return []string{"chrome-release-bot@chromium.org"} 295 }