go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/bisection/culpritverification/verify_culprit.go

go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/bisection/culpritverification/verify_culprit.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package culpritverification verifies if a suspect is a culprit.
    16  package culpritverification
    17  
    18  import (
    19  	"context"
    20  	"fmt"
    21  
    22  	"google.golang.org/protobuf/proto"
    23  
    24  	"go.chromium.org/luci/bisection/compilefailureanalysis/heuristic"
    25  	"go.chromium.org/luci/bisection/compilefailureanalysis/statusupdater"
    26  	cpvt "go.chromium.org/luci/bisection/culpritverification/task"
    27  	"go.chromium.org/luci/bisection/internal/config"
    28  	"go.chromium.org/luci/bisection/internal/gitiles"
    29  	"go.chromium.org/luci/bisection/model"
    30  	pb "go.chromium.org/luci/bisection/proto/v1"
    31  	"go.chromium.org/luci/bisection/rerun"
    32  	"go.chromium.org/luci/bisection/util"
    33  	"go.chromium.org/luci/bisection/util/datastoreutil"
    34  	"go.chromium.org/luci/bisection/util/loggingutil"
    35  
    36  	taskpb "go.chromium.org/luci/bisection/task/proto"
    37  	buildbucketpb "go.chromium.org/luci/buildbucket/proto"
    38  	"go.chromium.org/luci/common/errors"
    39  	"go.chromium.org/luci/common/logging"
    40  	"go.chromium.org/luci/common/retry/transient"
    41  	"go.chromium.org/luci/gae/service/datastore"
    42  	"go.chromium.org/luci/gae/service/info"
    43  )
    44  
    45  // RegisterTaskClass registers the task class for tq dispatcher
    46  func RegisterTaskClass() {
    47  	compileHandler := func(ctx context.Context, payload proto.Message) error {
    48  		task := payload.(*taskpb.CulpritVerificationTask)
    49  		analysisID := task.GetAnalysisId()
    50  		suspectID := task.GetSuspectId()
    51  		parentKey := task.GetParentKey()
    52  		return handleTQError(ctx, processCulpritVerificationTask(ctx, analysisID, suspectID, parentKey))
    53  	}
    54  	testHandler := func(ctx context.Context, payload proto.Message) error {
    55  		task := payload.(*taskpb.TestFailureCulpritVerificationTask)
    56  		return handleTQError(ctx, processTestFailureTask(ctx, task))
    57  	}
    58  	cpvt.RegisterTaskClass(compileHandler, testHandler)
    59  }
    60  
    61  func handleTQError(ctx context.Context, err error) error {
    62  	if err != nil {
    63  		err := errors.Annotate(err, "run culprit verification").Err()
    64  		logging.Errorf(ctx, err.Error())
    65  		// If the error is transient, return err to retry
    66  		if transient.Tag.In(err) {
    67  			return err
    68  		}
    69  		return nil
    70  	}
    71  	return nil
    72  }
    73  
    74  func processCulpritVerificationTask(c context.Context, analysisID int64, suspectID int64, parentKeyStr string) error {
    75  	c, err := loggingutil.UpdateLoggingWithAnalysisID(c, analysisID)
    76  	if err != nil {
    77  		// not critical, just log
    78  		err := errors.Annotate(err, "failed UpdateLoggingWithAnalysisID %d", analysisID)
    79  		logging.Errorf(c, "%v", err)
    80  	}
    81  
    82  	cfa, err := datastoreutil.GetCompileFailureAnalysis(c, analysisID)
    83  	if err != nil {
    84  		return errors.Annotate(err, "failed getting CompileFailureAnalysis").Err()
    85  	}
    86  
    87  	parentKey, err := datastore.NewKeyEncoded(parentKeyStr)
    88  	if err != nil {
    89  		return errors.Annotate(err, "couldn't decode parent key for suspect").Err()
    90  	}
    91  
    92  	suspect, err := datastoreutil.GetSuspect(c, suspectID, parentKey)
    93  	if err != nil {
    94  		return errors.Annotate(err, "couldn't get suspect").Err()
    95  	}
    96  	return VerifySuspect(c, suspect, cfa.FirstFailedBuildId, analysisID)
    97  }
    98  
    99  // VerifySuspect verifies if a suspect is indeed the culprit.
   100  // analysisID is CompileFailureAnalysis ID. It is meant to be propagated all the way to the
   101  // recipe, so we can identify the analysis in buildbucket.
   102  func VerifySuspect(c context.Context, suspect *model.Suspect, failedBuildID int64, analysisID int64) error {
   103  	logging.Infof(c, "Verifying suspect %d for build %d", datastore.KeyForObj(c, suspect).IntID(), failedBuildID)
   104  
   105  	// Check if the analysis has found any culprits, if yes, exit early
   106  	cfa, err := datastoreutil.GetCompileFailureAnalysis(c, analysisID)
   107  	if err != nil {
   108  		return err
   109  	}
   110  
   111  	defer updateSuspectStatus(c, suspect, cfa)
   112  
   113  	if len(cfa.VerifiedCulprits) > 0 {
   114  		logging.Infof(c, "culprit found for analysis %d, no need to trigger any verification runs", analysisID)
   115  		return nil
   116  	}
   117  
   118  	// Check if there is any suspect with the same commit being verified
   119  	// If yes, we don't run verification for this suspect anymore
   120  	suspectExist, err := checkSuspectWithSameCommitExist(c, cfa, suspect)
   121  	if err != nil {
   122  		return errors.Annotate(err, "checkSuspectWithSameCommitExist").Err()
   123  	}
   124  	if suspectExist {
   125  		return nil
   126  	}
   127  
   128  	// Get failed compile targets
   129  	compileFailure, err := datastoreutil.GetCompileFailureForAnalysisID(c, analysisID)
   130  	if err != nil {
   131  		return err
   132  	}
   133  	failedTargets := compileFailure.OutputTargets
   134  
   135  	// Get the changelog for the suspect
   136  	repoURL := gitiles.GetRepoUrl(c, &suspect.GitilesCommit)
   137  	changeLogs, err := gitiles.GetChangeLogsForSingleRevision(c, repoURL, suspect.GitilesCommit.Id)
   138  	if err != nil {
   139  		// This is non-critical, we just log and continue
   140  		logging.Errorf(c, "Cannot get changelog for revision %s: %s", suspect.GitilesCommit.Id, err)
   141  	} else {
   142  		// Check if any failed files is newly added in the change log.
   143  		// If it is the case, the parent revision cannot compile failed targets.
   144  		// In such cases, we do not pass the failed targets to recipe, instead
   145  		// we will compile all targets.
   146  		if hasNewTarget(c, compileFailure.FailedFiles, changeLogs) {
   147  			failedTargets = []string{}
   148  		}
   149  	}
   150  
   151  	// Get rerun build property
   152  	props := map[string]any{
   153  		"analysis_id":    analysisID,
   154  		"bisection_host": fmt.Sprintf("%s.appspot.com", info.AppID(c)),
   155  		// For culprit verification, we should remove builder cache
   156  		"should_clobber": true,
   157  	}
   158  	if len(failedTargets) > 0 {
   159  		props["compile_targets"] = failedTargets
   160  	}
   161  
   162  	// Verify the suspect
   163  	priority, err := getSuspectPriority(c, suspect)
   164  	if err != nil {
   165  		return errors.Annotate(err, "failed getting priority").Err()
   166  	}
   167  
   168  	// TODO(nqmtuan): Pass in the project.
   169  	// For now, hardcode to "chromium", since we only support chromium for compile failure.
   170  	suspectBuild, parentBuild, err := VerifySuspectCommit(c, "chromium", suspect, failedBuildID, props, priority)
   171  	if err != nil {
   172  		logging.Errorf(c, "Error triggering rerun for build %d: %s", failedBuildID, err)
   173  		return err
   174  	}
   175  	suspectRerunBuildModel, err := rerun.CreateRerunBuildModel(c, suspectBuild, model.RerunBuildType_CulpritVerification, suspect, nil, priority)
   176  	if err != nil {
   177  		return err
   178  	}
   179  
   180  	parentRerunBuildModel, err := rerun.CreateRerunBuildModel(c, parentBuild, model.RerunBuildType_CulpritVerification, suspect, nil, priority)
   181  	if err != nil {
   182  		return err
   183  	}
   184  
   185  	err = datastore.RunInTransaction(c, func(ctx context.Context) error {
   186  		e := datastore.Get(c, suspect)
   187  		if e != nil {
   188  			return e
   189  		}
   190  		suspect.VerificationStatus = model.SuspectVerificationStatus_UnderVerification
   191  		suspect.SuspectRerunBuild = datastore.KeyForObj(c, suspectRerunBuildModel)
   192  		suspect.ParentRerunBuild = datastore.KeyForObj(c, parentRerunBuildModel)
   193  		return datastore.Put(c, suspect)
   194  	}, nil)
   195  
   196  	if err != nil {
   197  		return err
   198  	}
   199  	return nil
   200  }
   201  
   202  func checkSuspectWithSameCommitExist(c context.Context, cfa *model.CompileFailureAnalysis, suspect *model.Suspect) (bool, error) {
   203  	suspects, err := datastoreutil.FetchSuspectsForAnalysis(c, cfa)
   204  	if err != nil {
   205  		return false, errors.Annotate(err, "fetchSuspectsForAnalysis").Err()
   206  	}
   207  	for _, s := range suspects {
   208  		// Need to be of different suspect
   209  		if s.Id != suspect.Id {
   210  			if s.GitilesCommit.Id == suspect.GitilesCommit.Id {
   211  				if s.VerificationStatus != model.SuspectVerificationStatus_Unverified {
   212  					return true, nil
   213  				}
   214  			}
   215  		}
   216  	}
   217  	return false, nil
   218  }
   219  
   220  func hasNewTarget(c context.Context, failedFiles []string, changelog *model.ChangeLog) bool {
   221  	for _, file := range failedFiles {
   222  		for _, diff := range changelog.ChangeLogDiffs {
   223  			if diff.Type == model.ChangeType_ADD || diff.Type == model.ChangeType_COPY || diff.Type == model.ChangeType_RENAME {
   224  				if heuristic.IsSameFile(diff.NewPath, file) {
   225  					return true
   226  				}
   227  			}
   228  		}
   229  	}
   230  	return false
   231  }
   232  
   233  // VerifyCommit checks if a commit is the culprit of a build failure.
   234  // Returns 2 builds:
   235  // - The 1st build is the rerun build for the commit
   236  // - The 2nd build is the rerun build for the parent commit
   237  func VerifySuspectCommit(c context.Context, project string, suspect *model.Suspect, failedBuildID int64, props map[string]any, priority int32) (*buildbucketpb.Build, *buildbucketpb.Build, error) {
   238  	commit := &suspect.GitilesCommit
   239  
   240  	// Query Gitiles to get parent commit
   241  	parentCommit, err := getParentCommit(c, commit)
   242  	if err != nil {
   243  		return nil, nil, errors.Annotate(err, "get parent commit for commit %s", commit.Id).Err()
   244  	}
   245  	builder, err := config.GetCompileBuilder(c, project)
   246  	if err != nil {
   247  		return nil, nil, errors.Annotate(err, "get compile builder").Err()
   248  	}
   249  	options := &rerun.TriggerOptions{
   250  		Builder:         util.BuilderFromConfigBuilder(builder),
   251  		GitilesCommit:   commit,
   252  		SampleBuildID:   failedBuildID,
   253  		ExtraProperties: props,
   254  		ExtraDimensions: nil,
   255  		Priority:        priority,
   256  	}
   257  	// Trigger a rerun with commit and parent commit
   258  	build1, err := rerun.TriggerRerun(c, options)
   259  	if err != nil {
   260  		return nil, nil, err
   261  	}
   262  
   263  	options.GitilesCommit = parentCommit
   264  	build2, err := rerun.TriggerRerun(c, options)
   265  	if err != nil {
   266  		return nil, nil, err
   267  	}
   268  
   269  	return build1, build2, nil
   270  }
   271  
   272  func getSuspectPriority(c context.Context, suspect *model.Suspect) (int32, error) {
   273  	// TODO (nqmtuan): Support priority for nth-section case
   274  	// For now let's return the baseline for culprit verification
   275  	// We can add offset later
   276  	confidence := heuristic.GetConfidenceLevel(suspect.Score)
   277  	var pri int32 = 0
   278  	switch confidence {
   279  	case pb.SuspectConfidenceLevel_HIGH:
   280  		pri = rerun.PriorityCulpritVerificationHighConfidence
   281  	case pb.SuspectConfidenceLevel_MEDIUM:
   282  		pri = rerun.PriorityCulpritVerificationMediumConfidence
   283  	case pb.SuspectConfidenceLevel_LOW:
   284  		pri = rerun.PriorityCulpritVerificationLowConfidence
   285  	}
   286  
   287  	// Check if the same suspect has any running build
   288  	otherSuspects, err := datastoreutil.GetOtherSuspectsWithSameCL(c, suspect)
   289  	if err != nil {
   290  		return 0, errors.Annotate(err, "failed GetOtherSuspectsWithSameCL %d", suspect.Id).Err()
   291  	}
   292  
   293  	// If there is a running/finished suspect run -> lower priority of this run
   294  	for _, s := range otherSuspects {
   295  		if s.VerificationStatus == model.SuspectVerificationStatus_UnderVerification || s.VerificationStatus == model.SuspectVerificationStatus_ConfirmedCulprit || s.VerificationStatus == model.SuspectVerificationStatus_Vindicated {
   296  			pri += rerun.PriorityAnotherVerificationBuildExistOffset
   297  			break
   298  		}
   299  	}
   300  
   301  	// Offset the priority based on run duration
   302  	cfa, err := datastoreutil.GetCompileFailureAnalysis(c, suspect.ParentAnalysis.Parent().IntID())
   303  	if err != nil {
   304  		return 0, errors.Annotate(err, "couldn't get analysis for suspect %d", suspect.Id).Err()
   305  	}
   306  	pri, err = rerun.OffsetPriorityBasedOnRunDuration(c, pri, cfa)
   307  	if err != nil {
   308  		return 0, errors.Annotate(err, "couldn't OffsetPriorityBasedOnRunDuration for suspect %d", suspect.Id).Err()
   309  	}
   310  
   311  	// Offset the priority if it is a tree closer
   312  	if cfa.IsTreeCloser {
   313  		pri += rerun.PriorityTreeClosureOffset
   314  	}
   315  
   316  	return rerun.CapPriority(pri), nil
   317  }
   318  
   319  func updateSuspectStatus(c context.Context, suspect *model.Suspect, cfa *model.CompileFailureAnalysis) {
   320  	// If after VerifySuspect, the suspect verification status is not
   321  	// SuspectVerificationStatus_UnderVerification, it means no reruns have been scheduled
   322  	// so we should set the status back to SuspectVerificationStatus_Unverified
   323  	if suspect.VerificationStatus != model.SuspectVerificationStatus_UnderVerification {
   324  		err := datastore.RunInTransaction(c, func(c context.Context) error {
   325  			// Update suspect status
   326  			e := datastore.Get(c, suspect)
   327  			if e != nil {
   328  				return e
   329  			}
   330  			suspect.VerificationStatus = model.SuspectVerificationStatus_Unverified
   331  			return datastore.Put(c, suspect)
   332  		}, nil)
   333  
   334  		if err != nil {
   335  			logging.Errorf(c, errors.Annotate(err, "set suspect verification status").Err().Error())
   336  		}
   337  		// Also update the analysis status this case, because
   338  		// the analysis may ended, given the suspect is no longer under verification
   339  		err = statusupdater.UpdateAnalysisStatus(c, cfa)
   340  		if err != nil {
   341  			logging.Errorf(c, errors.Annotate(err, "set analysis status").Err().Error())
   342  		}
   343  	}
   344  }
   345  
   346  func ShouldRunCulpritVerification(c context.Context, cfa *model.CompileFailureAnalysis) (bool, error) {
   347  	project, err := datastoreutil.GetProjectForCompileFailureAnalysis(c, cfa)
   348  	if err != nil {
   349  		return false, errors.Annotate(err, "get project for compile failure analysis").Err()
   350  	}
   351  	cfg, err := config.Project(c, project)
   352  	if err != nil {
   353  		return false, errors.Annotate(err, "config project").Err()
   354  	}
   355  	return cfg.CompileAnalysisConfig.CulpritVerificationEnabled, nil
   356  }
   357  
   358  func getParentCommit(ctx context.Context, commit *buildbucketpb.GitilesCommit) (*buildbucketpb.GitilesCommit, error) {
   359  	repoURL := gitiles.GetRepoUrl(ctx, commit)
   360  	p, err := gitiles.GetParentCommit(ctx, repoURL, commit.Id)
   361  	if err != nil {
   362  		return nil, err
   363  	}
   364  	return &buildbucketpb.GitilesCommit{
   365  		Host:    commit.Host,
   366  		Project: commit.Project,
   367  		Ref:     commit.Ref,
   368  		Id:      p,
   369  	}, nil
   370  }