go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/testresults/stability/query_stability.go (about) 1 // Copyright 2024 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package stability implements the test stability analysis used by the 16 // QueryStability RPC. 17 package stability 18 19 import ( 20 "context" 21 "sort" 22 "time" 23 24 "cloud.google.com/go/spanner" 25 26 "go.chromium.org/luci/common/sync/parallel" 27 "go.chromium.org/luci/server/span" 28 29 spanutil "go.chromium.org/luci/analysis/internal/span" 30 "go.chromium.org/luci/analysis/internal/testresults" 31 "go.chromium.org/luci/analysis/pbutil" 32 pb "go.chromium.org/luci/analysis/proto/v1" 33 ) 34 35 const ( 36 // The maximum number of workers to run in parallel. 37 // Given 100 is the maximum number of test variants queried at once, 38 // it is desirable that maxWorkers * batchSize >= 100. 39 maxWorkers = 10 40 41 // The size of each batch (in test variants). 42 batchSize = 10 43 ) 44 45 // QueryStabilityOptions specifies options for QueryStability(). 46 type QueryStabilityOptions struct { 47 // Project is the LUCI Project to query. 48 Project string 49 // SubRealms are the project-scoped realms (of the form "ci", 50 // NOT "chromium:ci") within the project to query. 51 SubRealms []string 52 // TestVariantPositions are the test variant positions to query. 53 TestVariantPositions []*pb.QueryTestVariantStabilityRequest_TestVariantPosition 54 // The test stability criteria to apply. 55 Criteria *pb.TestStabilityCriteria 56 // AsAtTime is latest parititon time to include in the results; 57 // outside of testing contexts, this should be the current time. 58 // QueryTestVariants returns data for the 14 day period leading 59 // up to this time. 60 AsAtTime time.Time 61 } 62 63 // run represents all executions of a test variant in a particular 64 // (lowest-level) ResultDB invocation. 65 type run struct { 66 // Whether at least one non-skipped test result in the run was 67 // expected. 68 expected bool 69 } 70 71 // QueryStability queries the stability of nominated test variants. 72 // Used to inform exoneration decisions. 73 // 74 // Must be called in a Spanner transactional context. Context must 75 // support multiple reads (i.e. NOT spanner.Single()) as request may 76 // batched over multiple reads. 77 func QueryStability(ctx context.Context, opts QueryStabilityOptions) ([]*pb.TestVariantStabilityAnalysis, error) { 78 batches := partitionQueryIntoBatches(opts.TestVariantPositions, batchSize) 79 80 err := parallel.WorkPool(maxWorkers, func(c chan<- func() error) { 81 for _, b := range batches { 82 // Assign batch to a local variable to ensure its current 83 // value is captured by function closures. 84 batch := b 85 c <- func() error { 86 var err error 87 batchOpts := opts 88 batchOpts.TestVariantPositions = batch.input 89 // queryStabilityShard ensures test variants appear 90 // in the output in the same order as they appear in the 91 // input. 92 batch.output, err = queryStabilityShard(ctx, batchOpts) 93 return err 94 } 95 } 96 }) 97 if err != nil { 98 return nil, err 99 } 100 101 // The order of test variants in the output should be the 102 // same as the input. Perform the inverse to what we did 103 // in batching. 104 analysis := make([]*pb.TestVariantStabilityAnalysis, 0, len(opts.TestVariantPositions)) 105 for _, b := range batches { 106 analysis = append(analysis, b.output...) 107 } 108 109 return analysis, nil 110 } 111 112 type queryStabilityBatch struct { 113 input []*pb.QueryTestVariantStabilityRequest_TestVariantPosition 114 output []*pb.TestVariantStabilityAnalysis 115 } 116 117 // partitionQueryIntoBatches partitions a list of test variant positions 118 // into batches. 119 func partitionQueryIntoBatches(tvps []*pb.QueryTestVariantStabilityRequest_TestVariantPosition, batchSize int) []*queryStabilityBatch { 120 var batches []*queryStabilityBatch 121 batchInput := make([]*pb.QueryTestVariantStabilityRequest_TestVariantPosition, 0, batchSize) 122 for _, tvp := range tvps { 123 if len(batchInput) >= batchSize { 124 batches = append(batches, &queryStabilityBatch{ 125 input: batchInput, 126 }) 127 batchInput = make([]*pb.QueryTestVariantStabilityRequest_TestVariantPosition, 0, batchSize) 128 } 129 batchInput = append(batchInput, tvp) 130 } 131 if len(batchInput) > 0 { 132 batches = append(batches, &queryStabilityBatch{ 133 input: batchInput, 134 }) 135 } 136 return batches 137 } 138 139 // queryStabilityShard reads test stability statistics for test variants. 140 // Must be called in a spanner transactional context. 141 func queryStabilityShard(ctx context.Context, opts QueryStabilityOptions) ([]*pb.TestVariantStabilityAnalysis, error) { 142 type changelist struct { 143 Host string 144 Change int64 145 } 146 type testVariantPosition struct { 147 TestID string 148 VariantHash string 149 SourceRefHash []byte 150 QuerySourcePosition int64 151 ExcludedChangelists []changelist 152 } 153 154 tvps := make([]testVariantPosition, 0, len(opts.TestVariantPositions)) 155 for _, ptv := range opts.TestVariantPositions { 156 variantHash := ptv.VariantHash 157 if variantHash == "" { 158 variantHash = pbutil.VariantHash(ptv.Variant) 159 } 160 161 excludedCLs := make([]changelist, 0, len(ptv.Sources.Changelists)) 162 for _, cl := range ptv.Sources.Changelists { 163 excludedCLs = append(excludedCLs, changelist{ 164 Host: testresults.CompressHost(cl.Host), 165 Change: cl.Change, 166 }) 167 } 168 169 tvps = append(tvps, testVariantPosition{ 170 TestID: ptv.TestId, 171 VariantHash: variantHash, 172 SourceRefHash: pbutil.SourceRefHash(pbutil.SourceRefFromSources(ptv.Sources)), 173 QuerySourcePosition: pbutil.SourcePosition(ptv.Sources), 174 ExcludedChangelists: excludedCLs, 175 }) 176 } 177 178 stmt := spanner.NewStatement(testStabilityQuery) 179 stmt.Params = map[string]any{ 180 "project": opts.Project, 181 "testVariantPositions": tvps, 182 "subRealms": opts.SubRealms, 183 "asAtTime": opts.AsAtTime, 184 "skip": int64(pb.TestResultStatus_SKIP), 185 } 186 187 results := make([]*pb.TestVariantStabilityAnalysis, 0, len(tvps)) 188 189 index := 0 190 var b spanutil.Buffer 191 err := span.Query(ctx, stmt).Do(func(row *spanner.Row) error { 192 var ( 193 testID, variantHash string 194 verdictsBefore, verdictsOnOrAfter []*sourceVerdict 195 sourcePositionBuckets []*sourcePositionBucket 196 runFlakyVerdictsBefore, runFlakyVerdictsOnOrAfter []*sourceVerdict 197 ) 198 199 err := b.FromSpanner( 200 row, 201 &testID, 202 &variantHash, 203 &verdictsBefore, &verdictsOnOrAfter, 204 &sourcePositionBuckets, 205 &runFlakyVerdictsBefore, &runFlakyVerdictsOnOrAfter, 206 ) 207 if err != nil { 208 return err 209 } 210 211 analysis := &pb.TestVariantStabilityAnalysis{} 212 if testID != tvps[index].TestID || variantHash != tvps[index].VariantHash { 213 // This should never happen, as the SQL statement is designed 214 // to return results in the same order as test variants requested. 215 panic("results in incorrect order") 216 } 217 sourcePosition := tvps[index].QuerySourcePosition 218 219 analysis.TestId = testID 220 analysis.Variant = opts.TestVariantPositions[index].Variant 221 analysis.VariantHash = opts.TestVariantPositions[index].VariantHash 222 analysis.FailureRate = applyFailureRateCriteria(verdictsBefore, verdictsOnOrAfter, sourcePosition, opts.Criteria.FailureRate) 223 analysis.FlakeRate = applyFlakeRateCriteria(sourcePositionBuckets, sourcePosition, runFlakyVerdictsBefore, runFlakyVerdictsOnOrAfter, opts.Criteria.FlakeRate) 224 results = append(results, analysis) 225 index++ 226 return nil 227 }) 228 if err != nil { 229 return nil, err 230 } 231 return results, nil 232 } 233 234 // truncateSourceVerdicts truncates verdicts such that the 235 // total runs in the truncated slice is no greater than maxRuns. 236 // 237 // If the total number of runs in verdicts is already less 238 // than or equal to maxRuns, no truncation occurs. 239 // 240 // As individual source verdicts can represent more than one 241 // run, truncation may occur inside a source verdict (dropping 242 // some of its runs) to achieve the a total of maxRuns in 243 // the returned slice. 244 func truncateSourceVerdicts(verdicts []*sourceVerdict, maxRuns int) []*sourceVerdict { 245 var runs int 246 var result []*sourceVerdict 247 for _, verdict := range verdicts { 248 if runs >= maxRuns { 249 break 250 } 251 252 remainingRuns := maxRuns - runs 253 truncatedVerdict := truncateSourceVerdict(verdict, remainingRuns) 254 runs += int(truncatedVerdict.ExpectedRuns + truncatedVerdict.UnexpectedRuns) 255 result = append(result, truncatedVerdict) 256 } 257 return result 258 } 259 260 // truncateSourceVerdict truncates a verdict such that its 261 // total runs is no greater than maxRuns. 262 // 263 // If multiple runs are to be removed from a verdict, the dropped 264 // runs are balanced between the unexpected and expected runs 265 // proportionately. 266 func truncateSourceVerdict(verdict *sourceVerdict, maxRuns int) *sourceVerdict { 267 // Copy the verdict as we may drop some of its runs. 268 vc := *verdict 269 270 excessRuns := (vc.ExpectedRuns + vc.UnexpectedRuns) - int64(maxRuns) 271 if excessRuns > 0 { 272 // Fairly share the runs to be removed between expected 273 // and unexpected runs. Round towards removing more 274 // expected runs than unexpected runs. 275 unexpectedRunsToRemove := excessRuns * vc.UnexpectedRuns / (vc.ExpectedRuns + vc.UnexpectedRuns) 276 expectedRunsToRemove := excessRuns - unexpectedRunsToRemove 277 278 vc.UnexpectedRuns -= unexpectedRunsToRemove 279 vc.ExpectedRuns -= expectedRunsToRemove 280 } 281 return &vc 282 } 283 284 // flattenSourceVerdictsToRuns transforms a list of source verdicts 285 // to a sequence of runs. 286 func flattenSourceVerdictsToRuns(verdicts []*sourceVerdict) []run { 287 var result []run 288 for _, verdict := range verdicts { 289 result = append(result, flattenSourceVerdictToRuns(verdict)...) 290 } 291 return result 292 } 293 294 // flattenSourceVerdictToRuns transforms a source verdict 295 // into a sequence of runs. As the order of runs within a source verdict 296 // is not known, they are put in an arbitrary (fair) order. 297 func flattenSourceVerdictToRuns(verdict *sourceVerdict) []run { 298 var result []run 299 unexpectedRuns := verdict.UnexpectedRuns 300 totalRuns := verdict.UnexpectedRuns + verdict.ExpectedRuns 301 302 var unexpectedOutput int64 303 var totalOutput int64 304 for totalOutput < totalRuns { 305 var expected bool 306 307 remainingUnexpectedRuns := (unexpectedRuns - unexpectedOutput) 308 remainingRuns := (totalRuns - totalOutput) 309 310 // What percentage of remaining runs are unexpected? 311 // 312 // In case of only unexpected runs, this will be 100. 313 // In case of only expected runs, this will be 0. 314 // Otherwise, it should be somewhere in the middle. 315 // 316 // Invariant: 0 <= remainingUnexpectedPercent <= 100. 317 remainingUnexpectedPercent := (remainingUnexpectedRuns * 100) / remainingRuns 318 319 // If we output an expected run now, what percentage 320 // of the runs output so far will be unexpected? 321 // 322 // Invariant: 0 <= unexpectedPercentIfOutputExpected < 100. 323 unexpectedPercentIfOutputExpected := (unexpectedOutput * 100) / (totalOutput + 1) 324 325 // Maintain fairness by alternating between expected 326 // and unexpected runs to keep the proportion of 327 // unexpected runs output so far and the proportion 328 // of unexpected runs remaining about equal. 329 // 330 // In case of only expected runs remaining, remainingUnexpectedPercent 331 // will equal zero so we will only output expected runs. 332 // 333 // In case of only unexpected runs remaining, remainingUnexpectedPercent 334 // will equal 100. As unexpectedPercentIfOutputExpected is 335 // always less than 100, we will only output unexpected runs. 336 if unexpectedPercentIfOutputExpected >= remainingUnexpectedPercent { 337 expected = true // output expected run. 338 } else { 339 expected = false // output unexpected run. 340 } 341 342 result = append(result, run{expected: expected}) 343 if !expected { 344 unexpectedOutput++ 345 } 346 totalOutput++ 347 } 348 return result 349 } 350 351 func reverseVerdicts(verdicts []*sourceVerdict) []*sourceVerdict { 352 reversed := make([]*sourceVerdict, 0, len(verdicts)) 353 for i := len(verdicts) - 1; i >= 0; i-- { 354 reversed = append(reversed, verdicts[i]) 355 } 356 return reversed 357 } 358 359 func reverseRuns(runs []run) []run { 360 reversed := make([]run, 0, len(runs)) 361 for i := len(runs) - 1; i >= 0; i-- { 362 reversed = append(reversed, runs[i]) 363 } 364 return reversed 365 } 366 367 func splitOn(verdicts []*sourceVerdict, sourcePosition int64) (on, other []*sourceVerdict) { 368 on = make([]*sourceVerdict, 0, len(verdicts)) 369 other = make([]*sourceVerdict, 0, len(verdicts)) 370 371 for _, v := range verdicts { 372 if v.SourcePosition == sourcePosition { 373 on = append(on, v) 374 } else { 375 other = append(other, v) 376 } 377 } 378 return on, other 379 } 380 381 // filterSourceVerdicts filters source verdicts so that at most one 382 // test run is present for each verdict obtained in presubmit. 383 func filterSourceVerdictsForFailureRateCriteria(svs []*sourceVerdict) []*sourceVerdict { 384 result := make([]*sourceVerdict, 0, len(svs)) 385 for _, sv := range svs { 386 item := &sourceVerdict{} 387 *item = *sv 388 389 if item.ChangelistChange.Valid && (item.UnexpectedRuns+item.ExpectedRuns) > 1 { 390 // For presubmit data, keep only one run, preferentially the unexpected run. 391 if item.UnexpectedRuns >= 1 { 392 item.UnexpectedRuns = 1 393 item.ExpectedRuns = 0 394 } else { 395 item.UnexpectedRuns = 0 396 item.ExpectedRuns = 1 397 } 398 } 399 400 result = append(result, item) 401 } 402 return result 403 } 404 405 // applyFailureRateCriteria applies the failure rate criteria to a test variant 406 // at a given source position. 407 // 408 // beforeExamples is a list of (up to) 10 source verdicts with source position 409 // just prior to the queried source position. The list shall be ordered with 410 // the source verdict nearest the queried source position appearing first. 411 // 412 // onOrAfterExamples is a list of (up to) 10 source verdicts with source position 413 // equal to, or just after, the queried source position. The list shall be 414 // ordered with the source verdict nearest the queried source position 415 // appearing first. 416 // 417 // Both sets of examples should have had the following filtering applied: 418 // - At most one source verdict per distinct CL (for source verdicts 419 // testing CLs; no such restrictions apply to postsubmit data). 420 // - Source verdicts must not be for the same CL as is being considered for 421 // exoneration (if any). 422 // - Source verdicts must not be for CLs authored by automation. 423 // 424 // criteria defines the failure rate thresholds to apply. 425 func applyFailureRateCriteria(beforeExamples, onOrAfterExamples []*sourceVerdict, sourcePosition int64, criteria *pb.TestStabilityCriteria_FailureRateCriteria) *pb.TestVariantStabilityAnalysis_FailureRate { 426 // Limit source verdicts from presubmit to contributing at most 1 run each. 427 // This is to avoid a single repeatedly retried bad CL from having an oversized 428 // influence on the exoneration decision. 429 beforeExamples = filterSourceVerdictsForFailureRateCriteria(beforeExamples) 430 onOrAfterExamples = filterSourceVerdictsForFailureRateCriteria(onOrAfterExamples) 431 432 onExamples, afterExamples := splitOn(onOrAfterExamples, sourcePosition) 433 434 onExamples = truncateSourceVerdicts(onExamples, 10) 435 onRuns := flattenSourceVerdictsToRuns(onExamples) 436 437 // The window size is 10, and the window will always contain any runs on the queried 438 // source position. Additional runs may come from source positions before or after. 439 // Note: For the passed examples, the first example is the one nearest to the query 440 // position, so truncating keeps only the examples closest to the queried source position. 441 beforeExamples = truncateSourceVerdicts(beforeExamples, 10-len(onRuns)) 442 afterExamples = truncateSourceVerdicts(afterExamples, 10-len(onRuns)) 443 444 beforeRuns := flattenSourceVerdictsToRuns(beforeExamples) 445 afterRuns := flattenSourceVerdictsToRuns(afterExamples) 446 447 consecutive := consecutiveUnexpectedCount(reverseRuns(afterRuns), onRuns, beforeRuns) 448 449 // Put runs in chronological order: 450 // 0 .... len(runs)-1 451 // <--- more recent <query position> less recent ---> 452 // 453 // We need to reverse afterRuns as it is sorted with the first element 454 // closest to the query position. 455 runs := append(append(reverseRuns(afterRuns), onRuns...), beforeRuns...) 456 maxFailuresInWindow := unexpectedRunsInWindow(runs, 10) 457 458 // Also put source verdicts in chronological order. 459 // 0 .... len(examples)-1 460 // <--- more recent <query position> less recent ---> 461 examples := append(append(reverseVerdicts(afterExamples), onExamples...), beforeExamples...) 462 463 return &pb.TestVariantStabilityAnalysis_FailureRate{ 464 IsMet: (consecutive >= int(criteria.ConsecutiveFailureThreshold) || 465 maxFailuresInWindow >= int(criteria.FailureThreshold)), 466 UnexpectedTestRuns: int32(maxFailuresInWindow), 467 ConsecutiveUnexpectedTestRuns: int32(consecutive), 468 RecentVerdicts: toPBFailureRateRecentVerdict(examples), 469 } 470 } 471 472 // unexpectedRunsInWindow considers all sliding windows of size 473 // windowSize over the slice runs, and returns the maximum number 474 // of unexpected test runs in any such window. 475 func unexpectedRunsInWindow(runs []run, windowSize int) int { 476 // If the number of runs is less than the window size, consider 477 // the runs that remain as a single window. 478 if len(runs) < windowSize { 479 windowSize = len(runs) 480 } 481 482 unexpectedCount := 0 483 for i := 0; i < windowSize; i++ { 484 if !runs[i].expected { 485 unexpectedCount++ 486 } 487 } 488 // Now failureCount = COUNT_UNEXPECTED(runs[0:windowSize]) 489 490 maxFailuresInWindow := unexpectedCount 491 for i := 1; i+windowSize-1 < len(runs); i++ { 492 // Slide the window one position. 493 if !runs[i-1].expected { 494 unexpectedCount-- 495 } 496 if !runs[i+windowSize-1].expected { 497 unexpectedCount++ 498 } 499 // Now failureCount = COUNT_UNEXPECTED(runs[i:windowSize+i]) 500 501 if unexpectedCount > maxFailuresInWindow { 502 maxFailuresInWindow = unexpectedCount 503 } 504 } 505 return maxFailuresInWindow 506 } 507 508 // consecutiveUnexpectedCount returns the number of consecutive unexpected runs 509 // present from the start or end of a series of runs, where those consecutive 510 // unexpected runs also include the query position. 511 // 512 // If the consecutive failures do not pass the query position, this method 513 // returns 0. 514 // If there are consecutive failures but none touch the start or end 515 // of the runs slice, this method also returns 0. 516 // 517 // Example: 518 // 519 // [U U U] [U U] [U E U E E U U E] = afterRuns, onRuns, beforeRuns 520 // 521 // The method returns 6, because of there is a chain of 6 consecutive 522 // failures starting at the front of the runs slice, and that chain 523 // passes by the query position. It is also continues to one run 524 // in the 'beforeRuns' slice. 525 // 526 // The following conventions apply to arguments: 527 // - the most recent runs (later source position) appear first 528 // in all runs slices. 529 // - onRuns represents runs exactly on the queried source position, 530 // - afterRuns represents runs with a source position greater than 531 // the queried source position 532 // - beforeRuns represents runs with a source position less than 533 // the queried source position 534 func consecutiveUnexpectedCount(afterRuns, onRuns, beforeRuns []run) int { 535 for _, r := range onRuns { 536 if r.expected { 537 // There is an expected run on the queried source position. 538 // The failures cannot be consecutive up to and including 539 // the source position from either side. 540 return 0 541 } 542 } 543 544 // The number of consecutive runs in afterRuns, starting from 545 // the side of the queried source position. 546 afterRunsConsecutive := len(afterRuns) 547 for i := len(afterRuns) - 1; i >= 0; i-- { 548 if afterRuns[i].expected { 549 // We encountered an expected run. 550 afterRunsConsecutive = (len(afterRuns) - 1) - i 551 break 552 } 553 } 554 555 // The number of consecutive runs in beforeRuns, starting from 556 // the side of the queried source position. 557 beforeRunsConsecutive := len(beforeRuns) 558 for i := 0; i < len(beforeRuns); i++ { 559 if beforeRuns[i].expected { 560 // We encountered an expected run. 561 beforeRunsConsecutive = i 562 break 563 } 564 } 565 566 if len(afterRuns) == afterRunsConsecutive { 567 // All runs after the source position are unexpected. 568 // Additionally, we know all runs on the source position are unexpected. 569 return len(afterRuns) + len(onRuns) + beforeRunsConsecutive 570 } 571 if len(beforeRuns) == beforeRunsConsecutive { 572 // All runs before the source position are unexpected. 573 // Additionally, we know all runs on the source position are unexpected. 574 return len(beforeRuns) + len(onRuns) + afterRunsConsecutive 575 } 576 return 0 577 } 578 579 // applyFlakeRateCriteria applies the flake rate criteria to a test variant 580 // at a given source position. 581 // 582 // buckets should be in ascending order by source position. 583 func applyFlakeRateCriteria(buckets []*sourcePositionBucket, querySourcePosition int64, beforeExamples, onOrAfterExamples []*sourceVerdict, criteria *pb.TestStabilityCriteria_FlakeRateCriteria) *pb.TestVariantStabilityAnalysis_FlakeRate { 584 // Query the soure position +/- 1 week. 585 window := queryBuckets(buckets, querySourcePosition, 7*24*time.Hour) 586 587 runFlaky, total := countVerdicts(window) 588 if total < int64(criteria.MinWindow) { 589 // If the sample size is not large enough, revert to querying the full 590 // 14 days of data. This exists to improve performance on infrequently 591 // run tests at the cost of some recency. 592 window = buckets 593 runFlaky, total = countVerdicts(window) 594 } 595 596 // Examples arrive sorted such that those closest to the queried source 597 // position are first. 598 // Flip and combine them so that the most recent (latest source position) 599 // are first. 600 allExamples := append(reverseVerdicts(onOrAfterExamples), beforeExamples...) 601 602 // Find examples from the window considered. 603 var examples []*sourceVerdict 604 var startPosition int64 605 var endPosition int64 606 if len(window) > 0 { 607 startPosition = window[0].StartSourcePosition 608 endPosition = window[len(window)-1].EndSourcePosition 609 610 for _, e := range allExamples { 611 if startPosition <= e.SourcePosition && e.SourcePosition <= endPosition { 612 examples = append(examples, e) 613 } 614 } 615 616 if len(examples) > 10 { 617 examples = examples[:10] 618 } 619 } 620 621 flakeRate := 0.0 622 if total > 0 { 623 flakeRate = float64(runFlaky) / float64(total) 624 } 625 626 return &pb.TestVariantStabilityAnalysis_FlakeRate{ 627 IsMet: runFlaky >= int64(criteria.FlakeThreshold) && flakeRate >= criteria.FlakeRateThreshold, 628 RunFlakyVerdicts: int32(runFlaky), 629 TotalVerdicts: int32(total), 630 FlakeExamples: toPBFlakeRateVerdictExample(examples), 631 StartPosition: startPosition, 632 EndPosition: endPosition, 633 } 634 } 635 636 func countVerdicts(buckets []*sourcePositionBucket) (runFlaky, total int64) { 637 for _, b := range buckets { 638 runFlaky += b.RunFlakyVerdicts 639 total += b.TotalVerdicts 640 } 641 return runFlaky, total 642 } 643 644 // queryBuckets returns the slice of buckets that corresponds to 645 // querying a time interval `interval` before and after a specified 646 // source position, `querySourcePosition`. 647 // 648 // For example, query position 123456 +/- 1 week. 649 // 650 // To convert a time interval to a range of source positions, 651 // this method computes an approximate time corresponding to each 652 // source position. The time assigned to a source position 653 // is the earliest partition time that source position (or a 654 // later position) has been observed. 655 // 656 // buckets should be in ascending order by source position. 657 func queryBuckets(buckets []*sourcePositionBucket, querySourcePosition int64, interval time.Duration) []*sourcePositionBucket { 658 if len(buckets) == 0 { 659 return buckets 660 } 661 662 // earliestSourcePositionAvailability[i] represents the earliest partition time 663 // observed for a test result with a source position at or after buckets[i].StartSourcePosition. 664 // 665 // Intuitively, it represents a best guess estimate about the time a source position 666 // was first available in the repository. It is also consistent in the sense that 667 // an earlier source position will never have a later time associated with it than 668 // a later source position. 669 earliestSourcePositionAvailability := make([]time.Time, len(buckets)) 670 earliestTime := time.Date(9999, 12, 31, 23, 59, 59, 0, time.UTC) 671 672 // Start at the more recent (larger) source position and work backwards 673 // to the past. 674 for i := len(buckets) - 1; i >= 0; i-- { 675 b := buckets[i] 676 if b.EndSourcePosition < b.StartSourcePosition { 677 panic("end source position should be equal to or after start source position") 678 } 679 if i < len(buckets)-2 && !(buckets[i].EndSourcePosition < buckets[i+1].StartSourcePosition) { 680 panic("end source position of bucket should be before start source position of next bucket") 681 } 682 // Regardless of the earliest time a source position in this bucket was observed, 683 // if a bucket with a later source position had an earlier time, we should use that. 684 // This is because the later source positions build upon earlier sources positions, 685 // so the earlier source positions must have been available at that time too. 686 if b.EarliestPartitionTime.Before(earliestTime) { 687 earliestTime = b.EarliestPartitionTime 688 } 689 earliestSourcePositionAvailability[i] = earliestTime 690 } 691 692 // Find the nearest bucket that includes, or is prior to, the queried source position. 693 queryIndex := 0 694 for i, b := range buckets { 695 if b.StartSourcePosition > querySourcePosition { 696 break 697 } 698 queryIndex = i 699 } 700 701 // The time approximately corresponding to the queried source position. 702 queryTime := earliestSourcePositionAvailability[queryIndex] 703 queryStartTime := queryTime.Add(-interval) 704 queryEndTime := queryTime.Add(interval) 705 706 startIndex := len(buckets) 707 for i, time := range earliestSourcePositionAvailability { 708 if !time.Before(queryStartTime) { // time >= queryStartTime 709 startIndex = i 710 break 711 } 712 } 713 714 endIndex := 0 715 for i := len(earliestSourcePositionAvailability) - 1; i >= 0; i-- { 716 time := earliestSourcePositionAvailability[i] 717 if !time.After(queryEndTime) { // time <= queryEndTime 718 endIndex = i 719 break 720 } 721 } 722 723 return buckets[startIndex : endIndex+1] 724 } 725 726 // sourceVerdict is used to store an example source verdict returned by 727 // a Spanner query. 728 type sourceVerdict struct { 729 SourcePosition int64 730 // Verdicts considered by the analysis have at most one CL tested, 731 // which is set below (if present). 732 ChangelistHost spanner.NullString 733 ChangelistChange spanner.NullInt64 734 ChangelistPatchset spanner.NullInt64 735 ChangelistOwnerKind spanner.NullString 736 IngestedInvocationIds []string 737 UnexpectedRuns int64 738 ExpectedRuns int64 739 } 740 741 func toPBFailureRateRecentVerdict(verdicts []*sourceVerdict) []*pb.TestVariantStabilityAnalysis_FailureRate_RecentVerdict { 742 results := make([]*pb.TestVariantStabilityAnalysis_FailureRate_RecentVerdict, 0, len(verdicts)) 743 for _, v := range verdicts { 744 var changelists []*pb.Changelist 745 if v.ChangelistHost.Valid { 746 changelists = append(changelists, &pb.Changelist{ 747 Host: testresults.DecompressHost(v.ChangelistHost.StringVal), 748 Change: v.ChangelistChange.Int64, 749 Patchset: int32(v.ChangelistPatchset.Int64), 750 OwnerKind: testresults.OwnerKindFromDB(v.ChangelistOwnerKind.StringVal), 751 }) 752 } 753 754 results = append(results, &pb.TestVariantStabilityAnalysis_FailureRate_RecentVerdict{ 755 Position: v.SourcePosition, 756 Changelists: changelists, 757 Invocations: sortStrings(v.IngestedInvocationIds), 758 UnexpectedRuns: int32(v.UnexpectedRuns), 759 TotalRuns: int32(v.ExpectedRuns + v.UnexpectedRuns), 760 }) 761 } 762 return results 763 } 764 765 func sortStrings(ids []string) []string { 766 idsCopy := make([]string, len(ids)) 767 copy(idsCopy, ids) 768 sort.Strings(idsCopy) 769 return idsCopy 770 } 771 772 func toPBFlakeRateVerdictExample(verdicts []*sourceVerdict) []*pb.TestVariantStabilityAnalysis_FlakeRate_VerdictExample { 773 results := make([]*pb.TestVariantStabilityAnalysis_FlakeRate_VerdictExample, 0, len(verdicts)) 774 for _, v := range verdicts { 775 var changelists []*pb.Changelist 776 if v.ChangelistHost.Valid { 777 changelists = append(changelists, &pb.Changelist{ 778 Host: testresults.DecompressHost(v.ChangelistHost.StringVal), 779 Change: v.ChangelistChange.Int64, 780 Patchset: int32(v.ChangelistPatchset.Int64), 781 OwnerKind: testresults.OwnerKindFromDB(v.ChangelistOwnerKind.StringVal), 782 }) 783 } 784 785 results = append(results, &pb.TestVariantStabilityAnalysis_FlakeRate_VerdictExample{ 786 Position: v.SourcePosition, 787 Changelists: changelists, 788 Invocations: sortStrings(v.IngestedInvocationIds), 789 }) 790 } 791 return results 792 } 793 794 // sourcePositionBucket represents a range of source positions for 795 // a given test variant. 796 type sourcePositionBucket struct { 797 BucketKey int64 798 // Starting source position. Inclusive. 799 StartSourcePosition int64 800 // Ending source position. Inclusive. 801 EndSourcePosition int64 802 // The earliest partition time of a test result in the bucket. 803 EarliestPartitionTime time.Time 804 // The total number of source verdicts in the bucket. 805 TotalVerdicts int64 806 // The total number of run-flaky source verdicts in the bucket. 807 RunFlakyVerdicts int64 808 } 809 810 var testStabilityQuery = ` 811 WITH test_variant_verdicts AS ( 812 SELECT 813 Index, 814 tv.TestId, 815 tv.VariantHash, 816 tv.QuerySourcePosition, 817 ARRAY( 818 -- Filter verdicts to at most one per changelist under test. 819 -- Don't filter verdicts without an unsubmitted changelist 820 -- under test (i.e. postsubmit data). 821 SELECT 822 ANY_VALUE( 823 STRUCT( 824 SourcePosition, 825 ChangelistHost, 826 ChangelistChange, 827 ChangelistPatchset, 828 ChangelistOwnerKind, 829 IngestedInvocationIds, 830 MaxPartitionTime, 831 MinPartitionTime, 832 UnexpectedRuns, 833 ExpectedRuns 834 ) 835 -- For any CL, prefer the verdict that is flaky. 836 -- Then prefer the verdict that is closest to the queried 837 -- source position. 838 HAVING MIN ABS(SourcePosition - tv.QuerySourcePosition) + IF(UnexpectedRuns > 0 AND ExpectedRuns > 0, -1000 * 1000 * 1000, 0) 839 ) AS Verdict, 840 FROM ( 841 -- Flatten test runs to source verdicts. 842 SELECT 843 SourcePosition, 844 ChangelistHost, 845 ChangelistChange, 846 ChangelistPatchset, 847 ANY_VALUE(ChangelistOwnerKind) AS ChangelistOwnerKind, 848 ANY_VALUE(HasDirtySources) AS HasDirtySources, 849 ANY_VALUE(IF(HasDirtySources, IngestedInvocationId, NULL)) AS DirtySourcesUniqifier, 850 ARRAY_AGG(DISTINCT IngestedInvocationId) as IngestedInvocationIds, 851 MAX(PartitionTime) as MaxPartitionTime, 852 MIN(PartitionTime) as MinPartitionTime, 853 COUNTIF(UnexpectedRun) as UnexpectedRuns, 854 COUNTIF(NOT UnexpectedRun) as ExpectedRuns, 855 FROM ( 856 -- Flatten test results to test runs. 857 SELECT 858 PartitionTime, 859 IngestedInvocationId, 860 RunIndex, 861 LOGICAL_AND(COALESCE(IsUnexpected, FALSE)) AS UnexpectedRun, 862 ANY_VALUE(SourcePosition) AS SourcePosition, 863 ANY_VALUE(ChangelistHosts)[SAFE_OFFSET(0)] AS ChangelistHost, 864 ANY_VALUE(ChangelistChanges)[SAFE_OFFSET(0)] AS ChangelistChange, 865 ANY_VALUE(ChangelistPatchsets)[SAFE_OFFSET(0)] AS ChangelistPatchset, 866 ANY_VALUE(ChangelistOwnerKinds)[SAFE_OFFSET(0)] AS ChangelistOwnerKind, 867 ANY_VALUE(HasDirtySources) AS HasDirtySources 868 FROM TestResults 869 WHERE Project = @project 870 AND PartitionTime >= TIMESTAMP_SUB(@asAtTime, INTERVAL 14 DAY) 871 AND PartitionTime < @asAtTime 872 AND TestId = tv.TestId 873 AND VariantHash = tv.VariantHash 874 AND SourceRefHash = tv.SourceRefHash 875 AND SubRealm IN UNNEST(@subRealms) 876 -- Exclude skipped results. 877 AND Status <> @skip 878 AND ( 879 ( 880 -- Either there must be no CL tested by this result. 881 ChangelistHosts IS NULL OR ARRAY_LENGTH(ChangelistHosts) = 0 882 ) 883 OR ( 884 -- Or there must be exactly one CL tested. 885 ARRAY_LENGTH(ChangelistHosts) = 1 886 887 -- And that CL may not be authored by automation. 888 -- Automatic uprev automation will happily upload out CL after CL 889 -- with essentially the same change, that breaks the same test. 890 -- This adds more noise than signal. 891 AND ChangelistOwnerKinds[SAFE_OFFSET(0)] <> 'A' 892 893 -- And that CL must not be one of changelists which we 894 -- are considering exonerating as a result of this RPC. 895 AND STRUCT(ChangelistHosts[SAFE_OFFSET(0)] as Host, ChangelistChanges[SAFE_OFFSET(0)] AS Change) 896 NOT IN UNNEST(tv.ExcludedChangelists) 897 ) 898 ) 899 GROUP BY PartitionTime, IngestedInvocationId, RunIndex 900 ) 901 GROUP BY 902 -- Base source position tested 903 SourcePosition, 904 -- Patchset applied ontop of base sources (if any) 905 ChangelistHost, ChangelistChange, ChangelistPatchset, 906 -- If sources are marked dirty, then sources must be treated as unique 907 -- per invocation. (I.E. then source verdict == test verdict). 908 IF(HasDirtySources, IngestedInvocationId, NULL) 909 ) 910 -- Deduplicate to at most one per CL. For source verdicts not related 911 -- to a CL, no deduplication shall occur. 912 GROUP BY 913 -- Changelist applied ontop of base sources (if any), excluding patchset number. 914 ChangelistHost, ChangelistChange, 915 -- If there is no CL under test, then the original source verdicts 916 -- may be kept. 917 IF(ChangelistHost IS NULL, SourcePosition, NULL), 918 IF(ChangelistHost IS NULL, DirtySourcesUniqifier, NULL) 919 ORDER BY Verdict.SourcePosition DESC, Verdict.MaxPartitionTime DESC 920 ) AS Verdicts, 921 FROM UNNEST(@testVariantPositions) tv WITH OFFSET Index 922 ) 923 924 SELECT 925 TestId, 926 VariantHash, 927 ARRAY( 928 SELECT AS STRUCT 929 SourcePosition, 930 ChangelistHost, 931 ChangelistChange, 932 ChangelistPatchset, 933 ChangelistOwnerKind, 934 IngestedInvocationIds, 935 UnexpectedRuns, 936 ExpectedRuns 937 FROM UNNEST(Verdicts) v 938 WHERE v.SourcePosition < QuerySourcePosition 939 ORDER BY SourcePosition DESC, MaxPartitionTime DESC 940 -- The actual criteria is for 10 runs, not 10 source verdicts, 941 -- but this is hard to implement in SQL so we'll do post-filtering 942 -- in the app. 943 LIMIT 10 944 ) as FailureRateVerdictsBefore, 945 ARRAY( 946 SELECT AS STRUCT 947 SourcePosition, 948 ChangelistHost, 949 ChangelistChange, 950 ChangelistPatchset, 951 ChangelistOwnerKind, 952 IngestedInvocationIds, 953 UnexpectedRuns, 954 ExpectedRuns 955 FROM UNNEST(Verdicts) v 956 WHERE v.SourcePosition >= QuerySourcePosition 957 ORDER BY SourcePosition ASC, MaxPartitionTime DESC 958 -- The actual criteria is for 10 runs, not 10 source verdicts, 959 -- but this is hard to implement in SQL so we'll do post-filtering 960 -- in the app. 961 LIMIT 10 962 ) as FailureRateVerdictsOnOrAfter, 963 ARRAY( 964 -- The design calls for us to: 965 -- 1. convert the query position to a partition time, 966 -- 2. calculate a window +/- 7 days from that time, 967 -- 3. convert that time window back to source position range 968 -- 4. query that source position range and count the number of 969 --- (and proportion of) flaky verdicts in the range. 970 -- 971 -- As Spanner does not have analytic functions, steps 1 and 3 are 972 -- hard to do in SQL. 973 -- 974 -- Returning all verdicts to the backend to run the analysis there 975 -- is also not viable: each test variant may have up to ~10,000 976 -- source verdicts per two week period. At 100 bytes per verdict, 977 -- this would imply a transfer of around 1 MB per test variant 978 -- (or 100 MB in total for 100 test variants) to the backend. 979 -- This is too much. 980 -- 981 -- Therefore, we use an approximate implementation. 982 -- We partition the source verdicts into 100 source position ranges, 983 -- maintaining the earliest partition time for each. This allows 984 -- steps 1-3 to be computed by AppEngine after the query returns. 985 -- 986 -- Each bucket also maintains counts of flaky verdicts and total 987 -- source verdicts. Because of this, there is no need to perform 988 -- a follow-up query; once we determine the source position window 989 -- to query, we simply count the verdicts in the buckets we 990 -- determined to be part of that window. 991 SELECT AS STRUCT 992 CAST(FLOOR(Index * 100 / ARRAY_LENGTH(SourcePositions)) AS INT64) as BucketKey, 993 MIN(SourcePosition) as StartSourcePosition, 994 MAX(SourcePosition) as EndSourcePosition, 995 MIN(EarliestPartitionTime) as EarliestPartitionTime, 996 SUM(TotalVerdicts) as TotalVerdicts, 997 SUM(RunFlakyVerdicts) as RunFlakyVerdicts, 998 FROM ( 999 SELECT 1000 ARRAY( 1001 -- Group source verdicts by source position first, 1002 -- so that buckets contain all of a source position 1003 -- or none of it. 1004 SELECT AS STRUCT 1005 SourcePosition, 1006 MIN(MinPartitionTime) as EarliestPartitionTime, 1007 COUNT(1) as TotalVerdicts, 1008 COUNTIF(UnexpectedRuns > 0 AND ExpectedRuns > 0) as RunFlakyVerdicts, 1009 FROM UNNEST(Verdicts) v 1010 GROUP BY SourcePosition 1011 ORDER BY SourcePosition 1012 ) AS SourcePositions 1013 ), UNNEST(SourcePositions) sp WITH OFFSET Index 1014 GROUP BY 1 1015 ORDER BY BucketKey 1016 ) AS FlakeRateBuckets, 1017 -- We do not yet know exactly the range of source positions 1018 -- that we will end up using for the flake rate criteria. 1019 -- Get (up to) 10 examples of flake each side of the 1020 -- query position, so that regardless of where the 1021 -- window falls, we will be able to get 10 examples. 1022 ARRAY( 1023 SELECT AS STRUCT 1024 SourcePosition, 1025 ChangelistHost, 1026 ChangelistChange, 1027 ChangelistPatchset, 1028 ChangelistOwnerKind, 1029 IngestedInvocationIds, 1030 UnexpectedRuns, 1031 ExpectedRuns 1032 FROM UNNEST(Verdicts) v WITH OFFSET Index 1033 WHERE UnexpectedRuns > 0 AND ExpectedRuns > 0 1034 AND v.SourcePosition < QuerySourcePosition 1035 ORDER BY SourcePosition DESC, MaxPartitionTime DESC 1036 LIMIT 10 1037 ) as FlakeExamplesBefore, 1038 ARRAY( 1039 SELECT AS STRUCT 1040 SourcePosition, 1041 ChangelistHost, 1042 ChangelistChange, 1043 ChangelistPatchset, 1044 ChangelistOwnerKind, 1045 IngestedInvocationIds, 1046 UnexpectedRuns, 1047 ExpectedRuns 1048 FROM UNNEST(Verdicts) v WITH OFFSET Index 1049 WHERE UnexpectedRuns > 0 AND ExpectedRuns > 0 1050 AND v.SourcePosition >= QuerySourcePosition 1051 ORDER BY SourcePosition ASC, MaxPartitionTime DESC 1052 LIMIT 10 1053 ) as FlakeExamplesOnOrAfter, 1054 FROM test_variant_verdicts 1055 ORDER BY Index 1056 `