go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/changepoints/analyze_changepoints.go (about) 1 // Copyright 2023 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package changepoints handles change point detection and analysis. 16 // See go/luci-test-variant-analysis-design for details. 17 package changepoints 18 19 import ( 20 "context" 21 "math" 22 23 "cloud.google.com/go/spanner" 24 25 "go.chromium.org/luci/common/errors" 26 "go.chromium.org/luci/common/logging" 27 "go.chromium.org/luci/common/tsmon/field" 28 "go.chromium.org/luci/common/tsmon/metric" 29 rdbpb "go.chromium.org/luci/resultdb/proto/v1" 30 "go.chromium.org/luci/server/span" 31 32 "go.chromium.org/luci/analysis/internal/changepoints/bayesian" 33 "go.chromium.org/luci/analysis/internal/changepoints/bqexporter" 34 "go.chromium.org/luci/analysis/internal/changepoints/inputbuffer" 35 "go.chromium.org/luci/analysis/internal/changepoints/sources" 36 "go.chromium.org/luci/analysis/internal/changepoints/testvariantbranch" 37 "go.chromium.org/luci/analysis/internal/config" 38 "go.chromium.org/luci/analysis/internal/ingestion/control" 39 "go.chromium.org/luci/analysis/internal/ingestion/resultdb" 40 "go.chromium.org/luci/analysis/internal/tasks/taskspb" 41 "go.chromium.org/luci/analysis/pbutil" 42 pb "go.chromium.org/luci/analysis/proto/v1" 43 ) 44 45 var ( 46 verdictCounter = metric.NewCounter( 47 "analysis/changepoints/analyze/verdicts", 48 "The number of verdicts processed by analysis, classified by project and status.", 49 nil, 50 // The LUCI Project. 51 field.String("project"), 52 // Possible values: 53 // - "ingested": The verdict was ingested. 54 // - "skipped_no_source": The verdict was skipped because it has no source 55 // data. 56 // - "skipped_no_commit_data": The verdict was skipped because its source 57 // does not have enough commit data (e.g. commit position). 58 // - "skipped_out_of_order": The verdict was skipped because it was too 59 // out of order. 60 // - "skipped_unsubmitted_code": The verdict was skipped because is was 61 // from unsubmitted code. 62 // - "skipped_all_skipped_or_duplicate": The verdict was skipped because 63 // it contains only skipped or duplicate results. 64 field.String("status"), 65 ) 66 ) 67 68 // CheckPoint represents a single row in the TestVariantBranchCheckpoint table. 69 type CheckPoint struct { 70 InvocationID string 71 StartingTestID string 72 StartingVariantHash string 73 } 74 75 // Analyze performs change point analyses based on incoming test verdicts. 76 // sourcesMap contains the information about the source code being tested. 77 func Analyze(ctx context.Context, tvs []*rdbpb.TestVariant, payload *taskspb.IngestTestResults, sourcesMap map[string]*pb.Sources, exporter *bqexporter.Exporter) error { 78 logging.Debugf(ctx, "Analyzing %d test variants for build %d", len(tvs), payload.Build.Id) 79 80 // Check that sourcesMap is not empty and has commit position data. 81 // This is for fast termination, as there should be only few items in 82 // sourcesMap to check. 83 if !sources.SourcesMapHasCommitData(sourcesMap) { 84 verdictCounter.Add(ctx, int64(len(tvs)), payload.Build.Project, "skipped_no_commit_data") 85 logging.Debugf(ctx, "Sourcemap has no commit data, skipping change point analysis") 86 return nil 87 } 88 89 // Instead of processing 10,000 test verdicts at a time, we will process by 90 // smaller batches. This will increase the robustness of the process, and 91 // in case something go wrong, we will not need to reprocess the whole 10,000 92 // verdicts. 93 // Also, the number of mutations per transaction is limit to 40,000. The 94 // mutations include the primary keys and the fields being updated. So we 95 // cannot process 10,000 test verdicts at once. 96 // TODO(nqmtuan): Consider putting this in config. 97 // Note: Changing this value may cause some test variants in retried tasks to 98 // get ingested twice. 99 batchSize := 1000 100 for startIndex := 0; startIndex < len(tvs); { 101 endIndex := startIndex + batchSize 102 if endIndex > len(tvs) { 103 endIndex = len(tvs) 104 } 105 batchTVs := tvs[startIndex:endIndex] 106 err := analyzeSingleBatch(ctx, batchTVs, payload, sourcesMap, exporter) 107 if err != nil { 108 return errors.Annotate(err, "analyzeSingleBatch").Err() 109 } 110 startIndex = int(endIndex) 111 } 112 113 return nil 114 } 115 116 func analyzeSingleBatch(ctx context.Context, tvs []*rdbpb.TestVariant, payload *taskspb.IngestTestResults, sourcesMap map[string]*pb.Sources, exporter *bqexporter.Exporter) error { 117 // Nothing to analyze. 118 if len(tvs) == 0 { 119 return nil 120 } 121 122 firstTV := tvs[0] 123 checkPoint := CheckPoint{ 124 InvocationID: control.BuildInvocationName(payload.GetBuild().Id), 125 StartingTestID: firstTV.TestId, 126 StartingVariantHash: firstTV.VariantHash, 127 } 128 129 // Contains the test variant branches to be written to BigQuery. 130 bqExporterInput := make([]bqexporter.PartialBigQueryRow, 0, len(tvs)) 131 132 commitTimestamp, err := span.ReadWriteTransaction(ctx, func(ctx context.Context) error { 133 // Check the TestVariantBranch table for the existence of the batch. 134 exist, err := hasCheckPoint(ctx, checkPoint) 135 if err != nil { 136 return errors.Annotate(err, "hasCheckPoint (%s, %s, %s)", checkPoint.InvocationID, firstTV.TestId, firstTV.VariantHash).Err() 137 } 138 139 // This batch has been processed, we can skip it. 140 if exist { 141 return nil 142 } 143 144 duplicateMap, newInvIDs, err := readDuplicateInvocations(ctx, tvs, payload.Build) 145 if err != nil { 146 return errors.Annotate(err, "duplicate map").Err() 147 } 148 149 // Only keep "relevant" test variants, and test variant with commit information. 150 filteredTVs, err := filterTestVariants(ctx, tvs, payload, duplicateMap, sourcesMap) 151 if err != nil { 152 return errors.Annotate(err, "filter test variants").Err() 153 } 154 155 // Query TestVariantBranch from spanner. 156 tvbks := testVariantBranchKeys(filteredTVs, payload.Build.Project, sourcesMap) 157 158 // The list of mutations for this transaction. 159 mutations := []*spanner.Mutation{} 160 161 // Buffers allocated once and re-used for processing 162 // all test variant branches. 163 var hs inputbuffer.HistorySerializer 164 var analysis Analyzer 165 166 // Handle each read test variant branch. 167 f := func(i int, tvb *testvariantbranch.Entry) error { 168 tv := filteredTVs[i] 169 if isOutOfOrderAndShouldBeDiscarded(tvb, sourcesMap[tv.SourcesId]) { 170 verdictCounter.Add(ctx, 1, payload.Build.Project, "skipped_out_of_order") 171 logging.Debugf(ctx, "Out of order verdict in build %d", payload.Build.Id) 172 return nil 173 } 174 // "Insert" the new test variant to input buffer. 175 tvb, err := insertIntoInputBuffer(tvb, tv, payload, duplicateMap, sourcesMap) 176 if err != nil { 177 return errors.Annotate(err, "insert into input buffer").Err() 178 } 179 inputSegments := analysis.Run(tvb) 180 tvb.ApplyRetentionPolicyForFinalizedSegments(payload.PartitionTime.AsTime()) 181 mut, err := tvb.ToMutation(&hs) 182 if err != nil { 183 return errors.Annotate(err, "test variant branch to mutation").Err() 184 } 185 mutations = append(mutations, mut) 186 bqRow, err := bqexporter.ToPartialBigQueryRow(tvb, inputSegments) 187 if err != nil { 188 return errors.Annotate(err, "test variant branch to bigquery row").Err() 189 } 190 bqExporterInput = append(bqExporterInput, bqRow) 191 return nil 192 } 193 if err := testvariantbranch.ReadF(ctx, tvbks, f); err != nil { 194 return errors.Annotate(err, "read test variant branches").Err() 195 } 196 197 ingestedVerdictCount := len(mutations) 198 199 // Store new Invocations to Invocations table. 200 ingestedInvID := control.BuildInvocationID(payload.Build.Id) 201 invMuts := invocationsToMutations(ctx, payload.Build.Project, newInvIDs, ingestedInvID) 202 mutations = append(mutations, invMuts...) 203 204 // Store checkpoint in TestVariantBranchCheckpoint table. 205 mutations = append(mutations, checkPoint.ToMutation()) 206 span.BufferWrite(ctx, mutations...) 207 verdictCounter.Add(ctx, int64(ingestedVerdictCount), payload.Build.Project, "ingested") 208 return nil 209 }) 210 211 if err != nil { 212 return errors.Annotate(err, "analyze change point").Err() 213 } 214 // Export to BigQuery. 215 // Note: exportToBigQuery does not guarantee eventual export, in case it 216 // fails. Even though the task may be retried, bqtvbs will be empty, so 217 // the data will not be exported. 218 // This should not be a concern, since the export will happen again when the 219 // next test verdict comes, but it may result in some delay. 220 rowInputs := bqexporter.RowInputs{ 221 Rows: bqExporterInput, 222 CommitTimestamp: commitTimestamp, 223 } 224 err = exportToBigQuery(ctx, exporter, rowInputs) 225 if err != nil { 226 return errors.Annotate(err, "export to big query").Err() 227 } 228 return nil 229 } 230 231 // exportToBigQuery exports the data in bqRows to BigQuery. 232 // commitTimestamp is the Spanner commit timestamp of the 233 // test variant branches. 234 func exportToBigQuery(ctx context.Context, exporter *bqexporter.Exporter, rowInputs bqexporter.RowInputs) error { 235 if len(rowInputs.Rows) == 0 { 236 return nil 237 } 238 cfg, err := config.Get(ctx) 239 if err != nil { 240 return errors.Annotate(err, "read config").Err() 241 } 242 if !cfg.GetTestVariantAnalysis().GetBigqueryExportEnabled() { 243 return nil 244 } 245 246 err = exporter.ExportTestVariantBranches(ctx, rowInputs) 247 if err != nil { 248 return errors.Annotate(err, "export test variant branches").Err() 249 } 250 return nil 251 } 252 253 // isOutOfOrderAndShouldBeDiscarded returns true if the verdict is out-of-order 254 // and should be discarded. 255 // This function returns false if the verdict is out-of-order but can still be 256 // processed. 257 // We only keep out-of-order verdict if either condition occurs: 258 // - The verdict commit position falls within the input buffer 259 // (commit position >= smallest start position), or 260 // - There is no finalizing or finalized segment (i.e. the entire known 261 // test history is inside the input buffer) 262 func isOutOfOrderAndShouldBeDiscarded(tvb *testvariantbranch.Entry, src *pb.Sources) bool { 263 // No test variant branch. Should be ok to proceed. 264 if tvb == nil { 265 return false 266 } 267 if len(tvb.FinalizedSegments.GetSegments()) == 0 && tvb.FinalizingSegment == nil { 268 return false 269 } 270 position := sources.CommitPosition(src) 271 hotVerdicts := tvb.InputBuffer.HotBuffer.Verdicts 272 coldVerdicts := tvb.InputBuffer.ColdBuffer.Verdicts 273 minPos := math.MaxInt 274 if len(hotVerdicts) > 0 && minPos > hotVerdicts[0].CommitPosition { 275 minPos = hotVerdicts[0].CommitPosition 276 } 277 if len(coldVerdicts) > 0 && minPos > coldVerdicts[0].CommitPosition { 278 minPos = coldVerdicts[0].CommitPosition 279 } 280 return position < minPos 281 } 282 283 type Analyzer struct { 284 // MergeBuffer is a preallocated buffer used to store the result of 285 // merging hot and cold input buffers. Reusing the same buffer avoids 286 // allocating a new buffer for each test variant branch processed. 287 mergeBuffer []inputbuffer.PositionVerdict 288 } 289 290 // Run runs change point analysis and returns the 291 // remaining segment in the input buffer after eviction. 292 func (a *Analyzer) Run(tvb *testvariantbranch.Entry) []*inputbuffer.Segment { 293 predictor := bayesian.ChangepointPredictor{ 294 ChangepointLikelihood: 0.0001, 295 // We are leaning toward consistently passing test results. 296 HasUnexpectedPrior: bayesian.BetaDistribution{ 297 Alpha: 0.3, 298 Beta: 0.5, 299 }, 300 UnexpectedAfterRetryPrior: bayesian.BetaDistribution{ 301 Alpha: 0.5, 302 Beta: 0.5, 303 }, 304 } 305 tvb.InputBuffer.MergeBuffer(&a.mergeBuffer) 306 changePoints := predictor.ChangePoints(a.mergeBuffer, bayesian.ConfidenceIntervalTail) 307 sib := tvb.InputBuffer.Segmentize(a.mergeBuffer, changePoints) 308 evictedSegment := sib.EvictSegments() 309 tvb.UpdateOutputBuffer(evictedSegment) 310 return sib.Segments 311 } 312 313 // insertIntoInputBuffer inserts the new test variant tv into the input buffer 314 // of TestVariantBranch tvb. 315 // If tvb is nil, it means it is not in spanner. In this case, return a new 316 // TestVariantBranch object with a single element in the input buffer. 317 func insertIntoInputBuffer(tvb *testvariantbranch.Entry, tv *rdbpb.TestVariant, payload *taskspb.IngestTestResults, duplicateMap map[string]bool, sourcesMap map[string]*pb.Sources) (*testvariantbranch.Entry, error) { 318 src := sourcesMap[tv.SourcesId] 319 if tvb == nil { 320 ref := pbutil.SourceRefFromSources(src) 321 tvb = &testvariantbranch.Entry{ 322 IsNew: true, 323 Project: payload.GetBuild().GetProject(), 324 TestID: tv.TestId, 325 VariantHash: tv.VariantHash, 326 RefHash: pbutil.SourceRefHash(ref), 327 Variant: pbutil.VariantFromResultDB(tv.Variant), 328 SourceRef: ref, 329 InputBuffer: &inputbuffer.Buffer{ 330 HotBufferCapacity: inputbuffer.DefaultHotBufferCapacity, 331 ColdBufferCapacity: inputbuffer.DefaultColdBufferCapacity, 332 }, 333 } 334 } 335 336 pv, err := testvariantbranch.ToPositionVerdict(tv, payload, duplicateMap, src) 337 if err != nil { 338 return nil, err 339 } 340 tvb.InsertToInputBuffer(pv) 341 return tvb, nil 342 } 343 344 // filterTestVariants only keeps test variants that satisfy all following 345 // conditions: 346 // - Have commit position information. 347 // - Have at least 1 non-duplicate and non-skipped test result (the test 348 // result needs to be both non-duplicate and non-skipped). 349 // - Not from unsubmitted code (i.e. try run that did not result in submitted code) 350 func filterTestVariants(ctx context.Context, tvs []*rdbpb.TestVariant, payload *taskspb.IngestTestResults, duplicateMap map[string]bool, sourcesMap map[string]*pb.Sources) ([]*rdbpb.TestVariant, error) { 351 results := []*rdbpb.TestVariant{} 352 presubmit := payload.PresubmitRun 353 project := payload.Build.Project 354 for _, tv := range tvs { 355 // Checks source map. 356 src, ok := sourcesMap[tv.SourcesId] 357 if !ok { 358 verdictCounter.Add(ctx, 1, project, "skipped_no_source") 359 continue 360 } 361 if !sources.HasCommitData(src) { 362 verdictCounter.Add(ctx, 1, project, "skipped_no_commit_data") 363 continue 364 } 365 // Checks unsubmitted code. 366 if sources.FromUnsubmittedCode(src, presubmit) { 367 verdictCounter.Add(ctx, 1, project, "skipped_unsubmitted_code") 368 continue 369 } 370 // Checks skips and duplicates. 371 allSkippedAndDuplicate := true 372 for _, r := range tv.Results { 373 invID, err := resultdb.InvocationFromTestResultName(r.Result.Name) 374 if err != nil { 375 return nil, errors.Annotate(err, "invocation from test result name").Err() 376 } 377 _, isDuplicate := duplicateMap[invID] 378 if r.Result.Status != rdbpb.TestStatus_SKIP && !isDuplicate { 379 results = append(results, tv) 380 allSkippedAndDuplicate = false 381 break 382 } 383 } 384 if allSkippedAndDuplicate { 385 verdictCounter.Add(ctx, 1, project, "skipped_all_skipped_or_duplicate") 386 } 387 } 388 return results, nil 389 } 390 391 func testVariantBranchKeys(tvs []*rdbpb.TestVariant, project string, sourcesMap map[string]*pb.Sources) []testvariantbranch.Key { 392 results := make([]testvariantbranch.Key, len(tvs)) 393 for i, tv := range tvs { 394 src := sourcesMap[tv.SourcesId] 395 results[i] = testvariantbranch.Key{ 396 Project: project, 397 TestID: tv.TestId, 398 VariantHash: tv.VariantHash, 399 RefHash: testvariantbranch.RefHash(pbutil.SourceRefHash(pbutil.SourceRefFromSources(src))), 400 } 401 } 402 return results 403 }