go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/changepoints/analyze_changepoints.go

go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/changepoints/analyze_changepoints.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package changepoints handles change point detection and analysis.
    16  // See go/luci-test-variant-analysis-design for details.
    17  package changepoints
    18  
    19  import (
    20  	"context"
    21  	"math"
    22  
    23  	"cloud.google.com/go/spanner"
    24  
    25  	"go.chromium.org/luci/common/errors"
    26  	"go.chromium.org/luci/common/logging"
    27  	"go.chromium.org/luci/common/tsmon/field"
    28  	"go.chromium.org/luci/common/tsmon/metric"
    29  	rdbpb "go.chromium.org/luci/resultdb/proto/v1"
    30  	"go.chromium.org/luci/server/span"
    31  
    32  	"go.chromium.org/luci/analysis/internal/changepoints/bayesian"
    33  	"go.chromium.org/luci/analysis/internal/changepoints/bqexporter"
    34  	"go.chromium.org/luci/analysis/internal/changepoints/inputbuffer"
    35  	"go.chromium.org/luci/analysis/internal/changepoints/sources"
    36  	"go.chromium.org/luci/analysis/internal/changepoints/testvariantbranch"
    37  	"go.chromium.org/luci/analysis/internal/config"
    38  	"go.chromium.org/luci/analysis/internal/ingestion/control"
    39  	"go.chromium.org/luci/analysis/internal/ingestion/resultdb"
    40  	"go.chromium.org/luci/analysis/internal/tasks/taskspb"
    41  	"go.chromium.org/luci/analysis/pbutil"
    42  	pb "go.chromium.org/luci/analysis/proto/v1"
    43  )
    44  
    45  var (
    46  	verdictCounter = metric.NewCounter(
    47  		"analysis/changepoints/analyze/verdicts",
    48  		"The number of verdicts processed by analysis, classified by project and status.",
    49  		nil,
    50  		// The LUCI Project.
    51  		field.String("project"),
    52  		// Possible values:
    53  		// - "ingested": The verdict was ingested.
    54  		// - "skipped_no_source": The verdict was skipped because it has no source
    55  		//   data.
    56  		// - "skipped_no_commit_data": The verdict was skipped because its source
    57  		//   does not have enough commit data (e.g. commit position).
    58  		// - "skipped_out_of_order": The verdict was skipped because it was too
    59  		//   out of order.
    60  		// - "skipped_unsubmitted_code": The verdict was skipped because is was
    61  		//   from unsubmitted code.
    62  		// - "skipped_all_skipped_or_duplicate":  The verdict was skipped because
    63  		//   it contains only skipped or duplicate results.
    64  		field.String("status"),
    65  	)
    66  )
    67  
    68  // CheckPoint represents a single row in the TestVariantBranchCheckpoint table.
    69  type CheckPoint struct {
    70  	InvocationID        string
    71  	StartingTestID      string
    72  	StartingVariantHash string
    73  }
    74  
    75  // Analyze performs change point analyses based on incoming test verdicts.
    76  // sourcesMap contains the information about the source code being tested.
    77  func Analyze(ctx context.Context, tvs []*rdbpb.TestVariant, payload *taskspb.IngestTestResults, sourcesMap map[string]*pb.Sources, exporter *bqexporter.Exporter) error {
    78  	logging.Debugf(ctx, "Analyzing %d test variants for build %d", len(tvs), payload.Build.Id)
    79  
    80  	// Check that sourcesMap is not empty and has commit position data.
    81  	// This is for fast termination, as there should be only few items in
    82  	// sourcesMap to check.
    83  	if !sources.SourcesMapHasCommitData(sourcesMap) {
    84  		verdictCounter.Add(ctx, int64(len(tvs)), payload.Build.Project, "skipped_no_commit_data")
    85  		logging.Debugf(ctx, "Sourcemap has no commit data, skipping change point analysis")
    86  		return nil
    87  	}
    88  
    89  	// Instead of processing 10,000 test verdicts at a time, we will process by
    90  	// smaller batches. This will increase the robustness of the process, and
    91  	// in case something go wrong, we will not need to reprocess the whole 10,000
    92  	// verdicts.
    93  	// Also, the number of mutations per transaction is limit to 40,000. The
    94  	// mutations include the primary keys and the fields being updated. So we
    95  	// cannot process 10,000 test verdicts at once.
    96  	// TODO(nqmtuan): Consider putting this in config.
    97  	// Note: Changing this value may cause some test variants in retried tasks to
    98  	// get ingested twice.
    99  	batchSize := 1000
   100  	for startIndex := 0; startIndex < len(tvs); {
   101  		endIndex := startIndex + batchSize
   102  		if endIndex > len(tvs) {
   103  			endIndex = len(tvs)
   104  		}
   105  		batchTVs := tvs[startIndex:endIndex]
   106  		err := analyzeSingleBatch(ctx, batchTVs, payload, sourcesMap, exporter)
   107  		if err != nil {
   108  			return errors.Annotate(err, "analyzeSingleBatch").Err()
   109  		}
   110  		startIndex = int(endIndex)
   111  	}
   112  
   113  	return nil
   114  }
   115  
   116  func analyzeSingleBatch(ctx context.Context, tvs []*rdbpb.TestVariant, payload *taskspb.IngestTestResults, sourcesMap map[string]*pb.Sources, exporter *bqexporter.Exporter) error {
   117  	// Nothing to analyze.
   118  	if len(tvs) == 0 {
   119  		return nil
   120  	}
   121  
   122  	firstTV := tvs[0]
   123  	checkPoint := CheckPoint{
   124  		InvocationID:        control.BuildInvocationName(payload.GetBuild().Id),
   125  		StartingTestID:      firstTV.TestId,
   126  		StartingVariantHash: firstTV.VariantHash,
   127  	}
   128  
   129  	// Contains the test variant branches to be written to BigQuery.
   130  	bqExporterInput := make([]bqexporter.PartialBigQueryRow, 0, len(tvs))
   131  
   132  	commitTimestamp, err := span.ReadWriteTransaction(ctx, func(ctx context.Context) error {
   133  		// Check the TestVariantBranch table for the existence of the batch.
   134  		exist, err := hasCheckPoint(ctx, checkPoint)
   135  		if err != nil {
   136  			return errors.Annotate(err, "hasCheckPoint (%s, %s, %s)", checkPoint.InvocationID, firstTV.TestId, firstTV.VariantHash).Err()
   137  		}
   138  
   139  		// This batch has been processed, we can skip it.
   140  		if exist {
   141  			return nil
   142  		}
   143  
   144  		duplicateMap, newInvIDs, err := readDuplicateInvocations(ctx, tvs, payload.Build)
   145  		if err != nil {
   146  			return errors.Annotate(err, "duplicate map").Err()
   147  		}
   148  
   149  		// Only keep "relevant" test variants, and test variant with commit information.
   150  		filteredTVs, err := filterTestVariants(ctx, tvs, payload, duplicateMap, sourcesMap)
   151  		if err != nil {
   152  			return errors.Annotate(err, "filter test variants").Err()
   153  		}
   154  
   155  		// Query TestVariantBranch from spanner.
   156  		tvbks := testVariantBranchKeys(filteredTVs, payload.Build.Project, sourcesMap)
   157  
   158  		// The list of mutations for this transaction.
   159  		mutations := []*spanner.Mutation{}
   160  
   161  		// Buffers allocated once and re-used for processing
   162  		// all test variant branches.
   163  		var hs inputbuffer.HistorySerializer
   164  		var analysis Analyzer
   165  
   166  		// Handle each read test variant branch.
   167  		f := func(i int, tvb *testvariantbranch.Entry) error {
   168  			tv := filteredTVs[i]
   169  			if isOutOfOrderAndShouldBeDiscarded(tvb, sourcesMap[tv.SourcesId]) {
   170  				verdictCounter.Add(ctx, 1, payload.Build.Project, "skipped_out_of_order")
   171  				logging.Debugf(ctx, "Out of order verdict in build %d", payload.Build.Id)
   172  				return nil
   173  			}
   174  			// "Insert" the new test variant to input buffer.
   175  			tvb, err := insertIntoInputBuffer(tvb, tv, payload, duplicateMap, sourcesMap)
   176  			if err != nil {
   177  				return errors.Annotate(err, "insert into input buffer").Err()
   178  			}
   179  			inputSegments := analysis.Run(tvb)
   180  			tvb.ApplyRetentionPolicyForFinalizedSegments(payload.PartitionTime.AsTime())
   181  			mut, err := tvb.ToMutation(&hs)
   182  			if err != nil {
   183  				return errors.Annotate(err, "test variant branch to mutation").Err()
   184  			}
   185  			mutations = append(mutations, mut)
   186  			bqRow, err := bqexporter.ToPartialBigQueryRow(tvb, inputSegments)
   187  			if err != nil {
   188  				return errors.Annotate(err, "test variant branch to bigquery row").Err()
   189  			}
   190  			bqExporterInput = append(bqExporterInput, bqRow)
   191  			return nil
   192  		}
   193  		if err := testvariantbranch.ReadF(ctx, tvbks, f); err != nil {
   194  			return errors.Annotate(err, "read test variant branches").Err()
   195  		}
   196  
   197  		ingestedVerdictCount := len(mutations)
   198  
   199  		// Store new Invocations to Invocations table.
   200  		ingestedInvID := control.BuildInvocationID(payload.Build.Id)
   201  		invMuts := invocationsToMutations(ctx, payload.Build.Project, newInvIDs, ingestedInvID)
   202  		mutations = append(mutations, invMuts...)
   203  
   204  		// Store checkpoint in TestVariantBranchCheckpoint table.
   205  		mutations = append(mutations, checkPoint.ToMutation())
   206  		span.BufferWrite(ctx, mutations...)
   207  		verdictCounter.Add(ctx, int64(ingestedVerdictCount), payload.Build.Project, "ingested")
   208  		return nil
   209  	})
   210  
   211  	if err != nil {
   212  		return errors.Annotate(err, "analyze change point").Err()
   213  	}
   214  	// Export to BigQuery.
   215  	// Note: exportToBigQuery does not guarantee eventual export, in case it
   216  	// fails. Even though the task may be retried, bqtvbs will be empty, so
   217  	// the data will not be exported.
   218  	// This should not be a concern, since the export will happen again when the
   219  	// next test verdict comes, but it may result in some delay.
   220  	rowInputs := bqexporter.RowInputs{
   221  		Rows:            bqExporterInput,
   222  		CommitTimestamp: commitTimestamp,
   223  	}
   224  	err = exportToBigQuery(ctx, exporter, rowInputs)
   225  	if err != nil {
   226  		return errors.Annotate(err, "export to big query").Err()
   227  	}
   228  	return nil
   229  }
   230  
   231  // exportToBigQuery exports the data in bqRows to BigQuery.
   232  // commitTimestamp is the Spanner commit timestamp of the
   233  // test variant branches.
   234  func exportToBigQuery(ctx context.Context, exporter *bqexporter.Exporter, rowInputs bqexporter.RowInputs) error {
   235  	if len(rowInputs.Rows) == 0 {
   236  		return nil
   237  	}
   238  	cfg, err := config.Get(ctx)
   239  	if err != nil {
   240  		return errors.Annotate(err, "read config").Err()
   241  	}
   242  	if !cfg.GetTestVariantAnalysis().GetBigqueryExportEnabled() {
   243  		return nil
   244  	}
   245  
   246  	err = exporter.ExportTestVariantBranches(ctx, rowInputs)
   247  	if err != nil {
   248  		return errors.Annotate(err, "export test variant branches").Err()
   249  	}
   250  	return nil
   251  }
   252  
   253  // isOutOfOrderAndShouldBeDiscarded returns true if the verdict is out-of-order
   254  // and should be discarded.
   255  // This function returns false if the verdict is out-of-order but can still be
   256  // processed.
   257  // We only keep out-of-order verdict if either condition occurs:
   258  //   - The verdict commit position falls within the input buffer
   259  //     (commit position >= smallest start position), or
   260  //   - There is no finalizing or finalized segment (i.e. the entire known
   261  //     test history is inside the input buffer)
   262  func isOutOfOrderAndShouldBeDiscarded(tvb *testvariantbranch.Entry, src *pb.Sources) bool {
   263  	// No test variant branch. Should be ok to proceed.
   264  	if tvb == nil {
   265  		return false
   266  	}
   267  	if len(tvb.FinalizedSegments.GetSegments()) == 0 && tvb.FinalizingSegment == nil {
   268  		return false
   269  	}
   270  	position := sources.CommitPosition(src)
   271  	hotVerdicts := tvb.InputBuffer.HotBuffer.Verdicts
   272  	coldVerdicts := tvb.InputBuffer.ColdBuffer.Verdicts
   273  	minPos := math.MaxInt
   274  	if len(hotVerdicts) > 0 && minPos > hotVerdicts[0].CommitPosition {
   275  		minPos = hotVerdicts[0].CommitPosition
   276  	}
   277  	if len(coldVerdicts) > 0 && minPos > coldVerdicts[0].CommitPosition {
   278  		minPos = coldVerdicts[0].CommitPosition
   279  	}
   280  	return position < minPos
   281  }
   282  
   283  type Analyzer struct {
   284  	// MergeBuffer is a preallocated buffer used to store the result of
   285  	// merging hot and cold input buffers. Reusing the same buffer avoids
   286  	// allocating a new buffer for each test variant branch processed.
   287  	mergeBuffer []inputbuffer.PositionVerdict
   288  }
   289  
   290  // Run runs change point analysis and returns the
   291  // remaining segment in the input buffer after eviction.
   292  func (a *Analyzer) Run(tvb *testvariantbranch.Entry) []*inputbuffer.Segment {
   293  	predictor := bayesian.ChangepointPredictor{
   294  		ChangepointLikelihood: 0.0001,
   295  		// We are leaning toward consistently passing test results.
   296  		HasUnexpectedPrior: bayesian.BetaDistribution{
   297  			Alpha: 0.3,
   298  			Beta:  0.5,
   299  		},
   300  		UnexpectedAfterRetryPrior: bayesian.BetaDistribution{
   301  			Alpha: 0.5,
   302  			Beta:  0.5,
   303  		},
   304  	}
   305  	tvb.InputBuffer.MergeBuffer(&a.mergeBuffer)
   306  	changePoints := predictor.ChangePoints(a.mergeBuffer, bayesian.ConfidenceIntervalTail)
   307  	sib := tvb.InputBuffer.Segmentize(a.mergeBuffer, changePoints)
   308  	evictedSegment := sib.EvictSegments()
   309  	tvb.UpdateOutputBuffer(evictedSegment)
   310  	return sib.Segments
   311  }
   312  
   313  // insertIntoInputBuffer inserts the new test variant tv into the input buffer
   314  // of TestVariantBranch tvb.
   315  // If tvb is nil, it means it is not in spanner. In this case, return a new
   316  // TestVariantBranch object with a single element in the input buffer.
   317  func insertIntoInputBuffer(tvb *testvariantbranch.Entry, tv *rdbpb.TestVariant, payload *taskspb.IngestTestResults, duplicateMap map[string]bool, sourcesMap map[string]*pb.Sources) (*testvariantbranch.Entry, error) {
   318  	src := sourcesMap[tv.SourcesId]
   319  	if tvb == nil {
   320  		ref := pbutil.SourceRefFromSources(src)
   321  		tvb = &testvariantbranch.Entry{
   322  			IsNew:       true,
   323  			Project:     payload.GetBuild().GetProject(),
   324  			TestID:      tv.TestId,
   325  			VariantHash: tv.VariantHash,
   326  			RefHash:     pbutil.SourceRefHash(ref),
   327  			Variant:     pbutil.VariantFromResultDB(tv.Variant),
   328  			SourceRef:   ref,
   329  			InputBuffer: &inputbuffer.Buffer{
   330  				HotBufferCapacity:  inputbuffer.DefaultHotBufferCapacity,
   331  				ColdBufferCapacity: inputbuffer.DefaultColdBufferCapacity,
   332  			},
   333  		}
   334  	}
   335  
   336  	pv, err := testvariantbranch.ToPositionVerdict(tv, payload, duplicateMap, src)
   337  	if err != nil {
   338  		return nil, err
   339  	}
   340  	tvb.InsertToInputBuffer(pv)
   341  	return tvb, nil
   342  }
   343  
   344  // filterTestVariants only keeps test variants that satisfy all following
   345  // conditions:
   346  //   - Have commit position information.
   347  //   - Have at least 1 non-duplicate and non-skipped test result (the test
   348  //     result needs to be both non-duplicate and non-skipped).
   349  //   - Not from unsubmitted code (i.e. try run that did not result in submitted code)
   350  func filterTestVariants(ctx context.Context, tvs []*rdbpb.TestVariant, payload *taskspb.IngestTestResults, duplicateMap map[string]bool, sourcesMap map[string]*pb.Sources) ([]*rdbpb.TestVariant, error) {
   351  	results := []*rdbpb.TestVariant{}
   352  	presubmit := payload.PresubmitRun
   353  	project := payload.Build.Project
   354  	for _, tv := range tvs {
   355  		// Checks source map.
   356  		src, ok := sourcesMap[tv.SourcesId]
   357  		if !ok {
   358  			verdictCounter.Add(ctx, 1, project, "skipped_no_source")
   359  			continue
   360  		}
   361  		if !sources.HasCommitData(src) {
   362  			verdictCounter.Add(ctx, 1, project, "skipped_no_commit_data")
   363  			continue
   364  		}
   365  		// Checks unsubmitted code.
   366  		if sources.FromUnsubmittedCode(src, presubmit) {
   367  			verdictCounter.Add(ctx, 1, project, "skipped_unsubmitted_code")
   368  			continue
   369  		}
   370  		// Checks skips and duplicates.
   371  		allSkippedAndDuplicate := true
   372  		for _, r := range tv.Results {
   373  			invID, err := resultdb.InvocationFromTestResultName(r.Result.Name)
   374  			if err != nil {
   375  				return nil, errors.Annotate(err, "invocation from test result name").Err()
   376  			}
   377  			_, isDuplicate := duplicateMap[invID]
   378  			if r.Result.Status != rdbpb.TestStatus_SKIP && !isDuplicate {
   379  				results = append(results, tv)
   380  				allSkippedAndDuplicate = false
   381  				break
   382  			}
   383  		}
   384  		if allSkippedAndDuplicate {
   385  			verdictCounter.Add(ctx, 1, project, "skipped_all_skipped_or_duplicate")
   386  		}
   387  	}
   388  	return results, nil
   389  }
   390  
   391  func testVariantBranchKeys(tvs []*rdbpb.TestVariant, project string, sourcesMap map[string]*pb.Sources) []testvariantbranch.Key {
   392  	results := make([]testvariantbranch.Key, len(tvs))
   393  	for i, tv := range tvs {
   394  		src := sourcesMap[tv.SourcesId]
   395  		results[i] = testvariantbranch.Key{
   396  			Project:     project,
   397  			TestID:      tv.TestId,
   398  			VariantHash: tv.VariantHash,
   399  			RefHash:     testvariantbranch.RefHash(pbutil.SourceRefHash(pbutil.SourceRefFromSources(src))),
   400  		}
   401  	}
   402  	return results
   403  }