go.fuchsia.dev/infra@v0.0.0-20240507153436-9b593402251b/cmd/autogardener/blamelist.go (about) 1 // Copyright 2022 The Fuchsia Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 package main 6 7 import ( 8 "context" 9 _ "embed" 10 "time" 11 12 "cloud.google.com/go/bigquery" 13 "go.fuchsia.dev/infra/functools" 14 ) 15 16 //go:embed queries/nearby_test_results.sql 17 var nearbyTestResultsQuery string 18 19 type nearbyTestResult struct { 20 // Absolute builder name, in the form "project/bucket/builder". 21 Builder string 22 Failed bool 23 CommitPosition int 24 } 25 26 func getNearbyTestResults( 27 ctx context.Context, 28 bqClient *bigquery.Client, 29 sig failureSignature, 30 windowEnd time.Time, 31 ) ([]nearbyTestResult, error) { 32 return runQuery[nearbyTestResult](ctx, bqClient, nearbyTestResultsQuery, 33 map[string]any{ 34 "test_id": sig.FailedTest, 35 // Use a smaller window to decrease the likelihood of treating 36 // failures from separate old breakages as the first failure of the 37 // current breakage. 38 "earliest_time": windowEnd.Add(-20 * time.Hour), 39 "latest_time": windowEnd, 40 // TODO(olivernewman): don't hardcode this. 41 "repo": "turquoise-internal.googlesource.com/integration", 42 }, 43 ) 44 } 45 46 // calculateBlamelistDistances computes, for each builder with a certain failure 47 // mode, the number of builds between each suspect commit and the first build 48 // (within the time window used by the query) that had that failure mode. 49 // 50 // This is analogous to the manual process of "lining up" CI builder blamelists 51 // to find a culprit. 52 func calculateBlamelistDistances(results []nearbyTestResult, suspects []suspectCommit) error { 53 byBuilder := make(map[string][]nearbyTestResult) 54 for _, tr := range results { 55 byBuilder[tr.Builder] = append(byBuilder[tr.Builder], tr) 56 } 57 // Sort results in chronological order (earliest first). 58 for _, results := range byBuilder { 59 functools.SortBy(results, func(tr nearbyTestResult) int { 60 return tr.CommitPosition 61 }) 62 } 63 64 // TODO(olivernewman): handle the case where a test has broken on separate 65 // occasions within the time window. It's not easy to distinguish this from 66 // flakiness, but we can make a best effort at least for high-frequency 67 // failure modes. 68 for builder, results := range byBuilder { 69 firstFailureIdx := -1 70 for i, result := range results { 71 if result.Failed { 72 firstFailureIdx = i 73 break 74 } 75 } 76 // Skip builders where the failure mode didn't occur at all. 77 if firstFailureIdx == -1 { 78 continue 79 } 80 for i, suspect := range suspects { 81 for buildIdx, result := range results { 82 if result.CommitPosition >= suspect.CommitPosition { 83 // TODO(olivernewman): Also take blamelist size into 84 // account. If we are X% confident that a culprit falls 85 // within a given blamelist of length N, then we're only 86 // X/N% confident in each member of the blamelist. So that 87 // confidence will increase as the blamelist size decreases. 88 dist := firstFailureIdx - buildIdx 89 suspects[i].BlamelistDistances[builder] = dist 90 break 91 } 92 } 93 } 94 } 95 return nil 96 } 97 98 // scoreBlamelistDistances computes a 0-100 likelihood score for a potential 99 // culprit commit based on a list of CI builder first-failure blamelist 100 // distances. It takes the amount of data points into account by incorporating a 101 // uncertainty level. 102 func scoreBlamelistDistances(distances []int) int { 103 if len(distances) == 0 { 104 return 0 105 } 106 var weightedDistances []int 107 for _, dist := range distances { 108 if dist < 0 { 109 // If the commit landed *after* the first failure that's an 110 // especially good indicator that it's unlikely to be the culprit, 111 // so apply a large multiplier to that data point (and negate it, so 112 // the weighted distances are all positive). We simply downweight 113 // the suspect instead of completely discarding it (by returning 114 // zero) because it's possible the first failure is a different 115 // failure mode (e.g. latent flakiness) than the failure mode we're 116 // trying to diagnose. 117 dist *= -10 118 } 119 weightedDistances = append(weightedDistances, dist) 120 } 121 avg := average(weightedDistances) 122 123 // Calculate an uncertainty level based on the number of builders that are 124 // providing data points. If a commit was in the first failure blamelist of 125 // only builder it may be a coincidence, whereas we get a stronger signal if 126 // it's in the first failure blamelist of multiple builders. 127 uncertainty := 12 - 4*(len(distances)-1) 128 if uncertainty < 0 { 129 uncertainty = 0 130 } 131 132 score := 100/(avg/2+1) - float64(uncertainty) 133 if score < 0 { 134 return 0 135 } 136 return int(score) 137 }