go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/bisection/compilefailureanalysis/heuristic/changelog_analyzer.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package heuristic
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"path/filepath"
    21  	"strings"
    22  
    23  	"go.chromium.org/luci/bisection/model"
    24  	"go.chromium.org/luci/bisection/util"
    25  
    26  	"go.chromium.org/luci/common/logging"
    27  )
    28  
    29  // ScoringCriteria represents how we score in the heuristic analysis.
    30  type ScoringCriteria struct {
    31  	// The score if the suspect touched the same file in the failure log.
    32  	TouchedSameFile int
    33  	// The score if the suspect touched a related file to a file in the failure log.
    34  	TouchedRelatedFile int
    35  	// The score if the suspect touched the same file and the same line as in the failure log.
    36  	TouchedSameLine int
    37  }
    38  
    39  // AnalyzeChangeLogs analyzes the changelogs based on the failure signals.
    40  // Returns a dictionary that maps the commits and the result found.
    41  func AnalyzeChangeLogs(c context.Context, signal *model.CompileFailureSignal, changelogs []*model.ChangeLog) (*model.HeuristicAnalysisResult, error) {
    42  	result := &model.HeuristicAnalysisResult{}
    43  	for _, changelog := range changelogs {
    44  		justification, err := AnalyzeOneChangeLog(c, signal, changelog)
    45  		commit := changelog.Commit
    46  		if err != nil {
    47  			logging.Errorf(c, "Error analyzing change log for commit %s. Error: %w", commit, err)
    48  			continue
    49  		}
    50  
    51  		// We only care about the relevant CLs
    52  		if justification.GetScore() <= 0 {
    53  			continue
    54  		}
    55  
    56  		reviewUrl, err := changelog.GetReviewUrl()
    57  		if err != nil {
    58  			logging.Errorf(c, "Error getting review URL for commit: %s. Error: %w", commit, err)
    59  			continue
    60  		}
    61  		reviewTitle, err := changelog.GetReviewTitle()
    62  		if err != nil {
    63  			// Just log the error from getting the review title - suspect should still be added
    64  			logging.Errorf(c, "Error getting review title for commit: %s. Error: %w", commit, err)
    65  		}
    66  		result.AddItem(commit, reviewUrl, reviewTitle, justification)
    67  	}
    68  	result.Sort()
    69  	return result, nil
    70  }
    71  
    72  // AnalyzeOneChangeLog analyzes one changelog(revision) and returns the
    73  // justification of how likely that changelog is the culprit.
    74  func AnalyzeOneChangeLog(c context.Context, signal *model.CompileFailureSignal, changelog *model.ChangeLog) (*model.SuspectJustification, error) {
    75  	// TODO (crbug.com/1295566): check DEPs file as well, if the CL touches DEPs.
    76  	// This is a nice-to-have feature, and is an edge case.
    77  	justification := &model.SuspectJustification{}
    78  	author := changelog.Author.Email
    79  	for _, email := range getNonBlamableEmail() {
    80  		if email == author {
    81  			return &model.SuspectJustification{IsNonBlamable: true}, nil
    82  		}
    83  	}
    84  
    85  	// Check files and line number extracted from output
    86  	criteria := &ScoringCriteria{
    87  		TouchedSameFile:    10,
    88  		TouchedRelatedFile: 2,
    89  		TouchedSameLine:    20,
    90  	}
    91  	for file, lines := range signal.Files {
    92  		for _, diff := range changelog.ChangeLogDiffs {
    93  			e := updateJustification(c, justification, file, lines, diff, criteria, model.JustificationType_FAILURELOG)
    94  			if e != nil {
    95  				return nil, e
    96  			}
    97  		}
    98  	}
    99  
   100  	// Check for dependency.
   101  	criteria = &ScoringCriteria{
   102  		TouchedSameFile:    2,
   103  		TouchedRelatedFile: 1,
   104  	}
   105  
   106  	// Calculate the score for dependencies using the DependencyMap
   107  	for _, diff := range changelog.ChangeLogDiffs {
   108  		oldPathName := util.GetCanonicalFileName(diff.OldPath)
   109  		newPathName := util.GetCanonicalFileName(diff.NewPath)
   110  		// Only check the dependency if either the old file or new file exists in the map
   111  		oldPathDeps, oldPathOk := signal.DependencyMap[oldPathName]
   112  		newPathDeps, newPathOk := signal.DependencyMap[newPathName]
   113  		if oldPathOk || newPathOk {
   114  			// Only process modified files once
   115  			deps := oldPathDeps
   116  			if oldPathName != newPathName {
   117  				deps = append(oldPathDeps, newPathDeps...)
   118  			}
   119  			for _, dep := range deps {
   120  				e := updateJustification(c, justification, dep, []int{}, diff, criteria, model.JustificationType_DEPENDENCY)
   121  				if e != nil {
   122  					return nil, e
   123  				}
   124  			}
   125  		}
   126  	}
   127  
   128  	justification.Sort()
   129  	return justification, nil
   130  }
   131  
   132  func updateJustification(c context.Context, justification *model.SuspectJustification, fileInLog string, lines []int, diff model.ChangeLogDiff, criteria *ScoringCriteria, justificationType model.JustificationType) error {
   133  	// TODO (crbug.com/1295566): In case of MODIFY, also query Gitiles for the
   134  	// changed region and compared with lines. If they intersect, increase the score.
   135  	// This may lead to a better score indicator.
   136  
   137  	// Get the relevant file paths from CLs
   138  	relevantFilePaths := []string{}
   139  	switch diff.Type {
   140  	case model.ChangeType_ADD, model.ChangeType_COPY, model.ChangeType_MODIFY:
   141  		relevantFilePaths = append(relevantFilePaths, diff.NewPath)
   142  	case model.ChangeType_RENAME:
   143  		relevantFilePaths = append(relevantFilePaths, diff.NewPath, diff.OldPath)
   144  	case model.ChangeType_DELETE:
   145  		relevantFilePaths = append(relevantFilePaths, diff.OldPath)
   146  	default:
   147  		return fmt.Errorf("Unsupported diff type %s", diff.Type)
   148  	}
   149  	for _, filePath := range relevantFilePaths {
   150  		score := 0
   151  		reason := ""
   152  		if IsSameFile(filePath, fileInLog) {
   153  			score = criteria.TouchedSameFile
   154  			reason = getReasonSameFile(filePath, diff.Type, justificationType)
   155  		} else if IsRelated(filePath, fileInLog) {
   156  			score = criteria.TouchedRelatedFile
   157  			reason = getReasonRelatedFile(filePath, diff.Type, fileInLog, justificationType)
   158  		}
   159  		if score > 0 {
   160  			justification.AddItem(score, filePath, reason, justificationType)
   161  		}
   162  	}
   163  	return nil
   164  }
   165  
   166  func getReasonSameFile(filePath string, changeType model.ChangeType, justificationType model.JustificationType) string {
   167  	m := getChangeTypeActionMap()
   168  	action := m[string(changeType)]
   169  	switch justificationType {
   170  	case model.JustificationType_FAILURELOG:
   171  		return fmt.Sprintf("The file \"%s\" was %s and it was in the failure log.", filePath, action)
   172  	case model.JustificationType_DEPENDENCY:
   173  		return fmt.Sprintf("The file \"%s\" was %s and it was in the dependency.", filePath, action)
   174  	default:
   175  		return ""
   176  	}
   177  }
   178  
   179  func getReasonRelatedFile(filePath string, changeType model.ChangeType, relatedFile string, justificationType model.JustificationType) string {
   180  	m := getChangeTypeActionMap()
   181  	action := m[string(changeType)]
   182  	switch justificationType {
   183  	case model.JustificationType_FAILURELOG:
   184  		return fmt.Sprintf("The file \"%s\" was %s. It was related to the file %s which was in the failure log.", filePath, action, relatedFile)
   185  	case model.JustificationType_DEPENDENCY:
   186  		return fmt.Sprintf("The file \"%s\" was %s. It was related to the dependency %s.", filePath, action, relatedFile)
   187  	default:
   188  		return ""
   189  	}
   190  }
   191  
   192  func getChangeTypeActionMap() map[string]string {
   193  	return map[string]string{
   194  		model.ChangeType_ADD:    "added",
   195  		model.ChangeType_COPY:   "copied",
   196  		model.ChangeType_RENAME: "renamed",
   197  		model.ChangeType_MODIFY: "modified",
   198  		model.ChangeType_DELETE: "deleted",
   199  	}
   200  }
   201  
   202  // IsSameFile makes the best effort in guessing if the file in the failure log
   203  // is the same as the file in the changelog or not.
   204  // Args:
   205  // fullFilePath: Full path of a file committed to git repo.
   206  // fileInLog: File path appearing in a failure log. It may or may not be a full path.
   207  // Example:
   208  // ("chrome/test/base/chrome_process_util.h", "base/chrome_process_util.h") -> True
   209  // ("a/b/x.cc", "a/b/x.cc") -> True
   210  // ("c/x.cc", "a/b/c/x.cc") -> False
   211  func IsSameFile(fullFilePath string, fileInLog string) bool {
   212  	// In some cases, fileInLog is prepended with "src/", we want a relative path to src/
   213  	fileInLog = strings.TrimPrefix(fileInLog, "src/")
   214  	if fileInLog == fullFilePath {
   215  		return true
   216  	}
   217  	return strings.HasSuffix(fullFilePath, fmt.Sprintf("/%s", fileInLog))
   218  }
   219  
   220  // IsRelated checks if 2 files are related.
   221  // Example:
   222  // file.h <-> file_impl.cc
   223  // x.h <-> x.cc
   224  func IsRelated(fullFilePath string, fileInLog string) bool {
   225  	filePathExt := strings.TrimPrefix(filepath.Ext(fullFilePath), ".")
   226  	fileInLogExt := strings.TrimPrefix(filepath.Ext(fileInLog), ".")
   227  	if !AreRelelatedExtensions(filePathExt, fileInLogExt) {
   228  		return false
   229  	}
   230  
   231  	if strings.HasSuffix(fileInLog, ".o") || strings.HasSuffix(fileInLog, ".obj") {
   232  		fileInLog = NormalizeObjectFilePath(fileInLog)
   233  	}
   234  
   235  	if IsSameFile(util.StripExtensionAndCommonSuffixFromFilePath(fullFilePath), util.StripExtensionAndCommonSuffixFromFilePath(fileInLog)) {
   236  		return true
   237  	}
   238  
   239  	return false
   240  }
   241  
   242  // NormalizeObjectFilePath normalizes the file path to an c/c++ object file.
   243  // During compile, a/b/c/file.cc in TARGET will be compiled into object file
   244  // obj/a/b/c/TARGET.file.o, thus 'obj/' and TARGET need to be removed from path.
   245  func NormalizeObjectFilePath(filePath string) string {
   246  	if !(strings.HasSuffix(filePath, ".o") || strings.HasSuffix(filePath, ".obj")) {
   247  		return filePath
   248  	}
   249  	filePath = strings.TrimPrefix(filePath, "obj/")
   250  	dir := filepath.Dir(filePath)
   251  	fileName := filepath.Base(filePath)
   252  	parts := strings.Split(fileName, ".")
   253  	if len(parts) == 3 {
   254  		// Special cases for file.cc.obj and similar cases
   255  		if parts[1] != "c" && parts[1] != "cc" && parts[1] != "cpp" && parts[1] != "m" && parts[1] != "mm" {
   256  			fileName = fmt.Sprintf("%s.%s", parts[1], parts[2])
   257  		}
   258  	} else if len(parts) > 3 {
   259  		fileName = strings.Join(parts[1:], ".")
   260  	}
   261  	if dir == "." {
   262  		return fileName
   263  	}
   264  	return fmt.Sprintf("%s/%s", dir, fileName)
   265  }
   266  
   267  // AreRelelatedExtensions checks if 2 extensions are related
   268  func AreRelelatedExtensions(ext1 string, ext2 string) bool {
   269  	relations := [][]string{
   270  		{"h", "hh", "c", "cc", "cpp", "m", "mm", "o", "obj"},
   271  		{"py", "pyc"},
   272  		{"gyp", "gypi"},
   273  	}
   274  	for _, group := range relations {
   275  		found1 := false
   276  		found2 := false
   277  		for _, ext := range group {
   278  			if ext == ext1 {
   279  				found1 = true
   280  			}
   281  			if ext == ext2 {
   282  				found2 = true
   283  			}
   284  		}
   285  		if found1 && found2 {
   286  			return true
   287  		}
   288  	}
   289  	return false
   290  }
   291  
   292  // getNonBlamableEmail returns emails whose changes should never be flagged as culprits.
   293  func getNonBlamableEmail() []string {
   294  	return []string{"chrome-release-bot@chromium.org"}
   295  }