go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/bisection/compilefailureanalysis/compile_failure_analysis.go

go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/bisection/compilefailureanalysis/compile_failure_analysis.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package compilefailureanalysis is the component for analyzing
    16  // compile failures.
    17  // It has 2 main components: heuristic analysis and nth_section analysis
    18  package compilefailureanalysis
    19  
    20  import (
    21  	"context"
    22  	"fmt"
    23  
    24  	"go.chromium.org/luci/bisection/compilefailureanalysis/compilelog"
    25  	"go.chromium.org/luci/bisection/compilefailureanalysis/heuristic"
    26  	"go.chromium.org/luci/bisection/compilefailureanalysis/nthsection"
    27  	"go.chromium.org/luci/bisection/compilefailureanalysis/statusupdater"
    28  	"go.chromium.org/luci/bisection/culpritverification"
    29  	"go.chromium.org/luci/bisection/internal/buildbucket"
    30  	"go.chromium.org/luci/bisection/internal/lucinotify"
    31  	"go.chromium.org/luci/bisection/model"
    32  	pb "go.chromium.org/luci/bisection/proto/v1"
    33  	"go.chromium.org/luci/bisection/util/datastoreutil"
    34  	"go.chromium.org/luci/bisection/util/loggingutil"
    35  
    36  	"go.chromium.org/luci/common/clock"
    37  	"go.chromium.org/luci/common/errors"
    38  	"go.chromium.org/luci/common/logging"
    39  	"go.chromium.org/luci/gae/service/datastore"
    40  )
    41  
    42  // AnalyzeFailure receives failure information and perform analysis.
    43  // Note that this assumes that the failure is new (i.e. the client of this
    44  // function should make sure this is not a duplicate analysis)
    45  func AnalyzeFailure(
    46  	c context.Context,
    47  	cf *model.CompileFailure,
    48  	firstFailedBuildID int64,
    49  	lastPassedBuildID int64,
    50  ) (*model.CompileFailureAnalysis, error) {
    51  	logging.Infof(c, "AnalyzeFailure firstFailed = %d", firstFailedBuildID)
    52  	regressionRange, e := findRegressionRange(c, firstFailedBuildID, lastPassedBuildID)
    53  	if e != nil {
    54  		return nil, e
    55  	}
    56  
    57  	logging.Infof(c, "Regression range: %v", regressionRange)
    58  
    59  	// Get failed targets
    60  	compileLogs, e := compilelog.GetCompileLogs(c, firstFailedBuildID)
    61  	if e != nil {
    62  		return nil, e
    63  	}
    64  	failedTargets := compilelog.GetFailedTargets(compileLogs)
    65  
    66  	e = datastore.RunInTransaction(c, func(c context.Context) error {
    67  		e := datastore.Get(c, cf)
    68  		if e != nil {
    69  			return e
    70  		}
    71  		cf.OutputTargets = failedTargets
    72  		return datastore.Put(c, cf)
    73  	}, nil)
    74  
    75  	if e != nil {
    76  		return nil, e
    77  	}
    78  
    79  	// Creates a new CompileFailureAnalysis entity in datastore
    80  	analysis := &model.CompileFailureAnalysis{
    81  		CompileFailure:         datastore.KeyForObj(c, cf),
    82  		CreateTime:             clock.Now(c),
    83  		Status:                 pb.AnalysisStatus_RUNNING,
    84  		RunStatus:              pb.AnalysisRunStatus_STARTED,
    85  		FirstFailedBuildId:     firstFailedBuildID,
    86  		LastPassedBuildId:      lastPassedBuildID,
    87  		InitialRegressionRange: regressionRange,
    88  	}
    89  
    90  	e = datastore.Put(c, analysis)
    91  	if e != nil {
    92  		return nil, e
    93  	}
    94  	c = loggingutil.SetAnalysisID(c, analysis.Id)
    95  
    96  	// Check if the analysis is for tree closer, if yes, set the flag.
    97  	err := setTreeCloser(c, analysis)
    98  	if err != nil {
    99  		// Non-critical, just continue
   100  		err := errors.Annotate(err, "failed to check tree closer").Err()
   101  		logging.Errorf(c, err.Error())
   102  	}
   103  
   104  	// Heuristic analysis
   105  	heuristicResult, e := heuristic.Analyze(c, analysis, regressionRange, compileLogs)
   106  	if e != nil {
   107  		// As this is only heuristic analysis, we log the error and continue with nthsection analysis
   108  		logging.Errorf(c, "Error during heuristic analysis for build %d: %v", firstFailedBuildID, e)
   109  	}
   110  
   111  	// If heuristic analysis does not return error, we proceed to verify its results (if any)
   112  	if e == nil {
   113  		shouldRunCulpritVerification, err := culpritverification.ShouldRunCulpritVerification(c, analysis)
   114  		if err != nil {
   115  			return nil, errors.Annotate(err, "couldn't fetch config for culprit verification. Build %d", firstFailedBuildID).Err()
   116  		}
   117  		if shouldRunCulpritVerification {
   118  			if !analysis.ShouldCancel {
   119  				if err := verifyHeuristicResults(c, heuristicResult, firstFailedBuildID, analysis.Id); err != nil {
   120  					// Do not return error here, just log
   121  					logging.Errorf(c, "Error verifying heuristic result for build %d: %s", firstFailedBuildID, err)
   122  				}
   123  			}
   124  		}
   125  	}
   126  
   127  	// Nth-section analysis
   128  	shouldRunNthSection, err := nthsection.ShouldRunNthSectionAnalysis(c, analysis)
   129  	if err != nil {
   130  		return nil, errors.Annotate(err, "couldn't fetch config for nthsection. Build %d", firstFailedBuildID).Err()
   131  	}
   132  	if shouldRunNthSection {
   133  		_, e = nthsection.Analyze(c, analysis)
   134  		if e != nil {
   135  			e = errors.Annotate(e, "error during nthsection analysis for build %d", firstFailedBuildID).Err()
   136  			logging.Errorf(c, e.Error())
   137  		}
   138  	}
   139  
   140  	// Update status of analysis
   141  	err = statusupdater.UpdateAnalysisStatus(c, analysis)
   142  	if err != nil {
   143  		return nil, errors.Annotate(err, "couldn't update analysis status. Build %d", firstFailedBuildID).Err()
   144  	}
   145  
   146  	return analysis, nil
   147  }
   148  
   149  // verifyHeuristicResults verifies if the suspects of heuristic analysis are the real culprit.
   150  // analysisID is CompileFailureAnalysis ID. It is meant to be propagated all the way to the
   151  // recipe, so we can identify the analysis in buildbucket.
   152  func verifyHeuristicResults(c context.Context, heuristicAnalysis *model.CompileHeuristicAnalysis, failedBuildID int64, analysisID int64) error {
   153  	// TODO (nqmtuan): Move the verification into a task queue
   154  	suspects, err := getHeuristicSuspectsToVerify(c, heuristicAnalysis)
   155  	if err != nil {
   156  		return err
   157  	}
   158  	for _, suspect := range suspects {
   159  		err := culpritverification.VerifySuspect(c, suspect, failedBuildID, analysisID)
   160  		if err != nil {
   161  			// Just log the error and continue for other suspects
   162  			logging.Errorf(c, "Error in verifying suspect %d for analysis %d", suspect.Id, analysisID)
   163  		}
   164  	}
   165  	return nil
   166  }
   167  
   168  // In case heuristic analysis returns too many results, we don't want to verify all of them.
   169  // Instead, we want to be selective in what we want to verify.
   170  // For now, we will just take top 3 results of heuristic analysis.
   171  func getHeuristicSuspectsToVerify(c context.Context, heuristicAnalysis *model.CompileHeuristicAnalysis) ([]*model.Suspect, error) {
   172  	// Getting the suspects for heuristic analysis
   173  	suspects := []*model.Suspect{}
   174  	q := datastore.NewQuery("Suspect").Ancestor(datastore.KeyForObj(c, heuristicAnalysis)).Order("-score")
   175  	err := datastore.GetAll(c, q, &suspects)
   176  	if err != nil {
   177  		return nil, err
   178  	}
   179  
   180  	// Get top 3 suspects to verify
   181  	nSuspects := 3
   182  	if nSuspects > len(suspects) {
   183  		nSuspects = len(suspects)
   184  	}
   185  	return suspects[:nSuspects], nil
   186  }
   187  
   188  // findRegressionRange takes in the first failed and last passed buildID
   189  // and returns the regression range based on GitilesCommit.
   190  func findRegressionRange(
   191  	c context.Context,
   192  	firstFailedBuildID int64,
   193  	lastPassedBuildID int64,
   194  ) (*pb.RegressionRange, error) {
   195  	firstFailedBuild, err := buildbucket.GetBuild(c, firstFailedBuildID, nil)
   196  	if err != nil {
   197  		return nil, fmt.Errorf("error getting build %d: %w", firstFailedBuildID, err)
   198  	}
   199  
   200  	lastPassedBuild, err := buildbucket.GetBuild(c, lastPassedBuildID, nil)
   201  	if err != nil {
   202  		return nil, fmt.Errorf("error getting build %d: %w", lastPassedBuildID, err)
   203  	}
   204  
   205  	if firstFailedBuild.GetInput().GetGitilesCommit() == nil || lastPassedBuild.GetInput().GetGitilesCommit() == nil {
   206  		return nil, fmt.Errorf("couldn't get gitiles commit for builds (%d, %d)", lastPassedBuildID, firstFailedBuildID)
   207  	}
   208  
   209  	return &pb.RegressionRange{
   210  		FirstFailed: firstFailedBuild.GetInput().GetGitilesCommit(),
   211  		LastPassed:  lastPassedBuild.GetInput().GetGitilesCommit(),
   212  	}, nil
   213  }
   214  
   215  // setTreeCloser checks and updates the analysis if it is for a treecloser failure.
   216  func setTreeCloser(c context.Context, cfa *model.CompileFailureAnalysis) error {
   217  	fb, err := datastoreutil.GetBuild(c, cfa.CompileFailure.Parent().IntID())
   218  	if err != nil {
   219  		return errors.Annotate(err, "getBuild").Err()
   220  	}
   221  	if fb == nil {
   222  		return fmt.Errorf("couldn't find build for analysis %d", cfa.Id)
   223  	}
   224  
   225  	// TODO (nqmtuan): Pass in step name when we support arbitrary
   226  	// step name which may not be "compile"
   227  	isTreeCloser, err := lucinotify.CheckTreeCloser(c, fb.Project, fb.Bucket, fb.Builder, "compile")
   228  	if err != nil {
   229  		return err
   230  	}
   231  
   232  	return datastore.RunInTransaction(c, func(c context.Context) error {
   233  		e := datastore.Get(c, cfa)
   234  		if e != nil {
   235  			return e
   236  		}
   237  		cfa.IsTreeCloser = isTreeCloser
   238  		return datastore.Put(c, cfa)
   239  	}, nil)
   240  }