github.com/aclements/go-misc@v0.0.0-20240129233631-2f6ede80790c/findflakes/flaketest.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package main 6 7 import ( 8 "fmt" 9 "io" 10 "log" 11 ) 12 13 type FlakeTestResult struct { 14 All []FlakeRegion 15 } 16 17 type FlakeRegion struct { 18 // Times gives the times of all failures in this region, in 19 // increasing order. 20 // 21 // TODO: Remove some of the redundant fields? 22 Times []int 23 24 // First and Last are the indexes of the first and last 25 // failures in this flaky region. These are equivalent to 26 // Times[0] and Times[len(Times)-1], respectively. 27 First, Last int 28 29 // Failures is the number of failures in the region. This is 30 // equivalent to len(Times). 31 Failures int 32 33 // FailureProbability is the fraction of builds in this region 34 // that failed. 35 FailureProbability float64 36 37 // GoodnessOfFit is the goodness of fit test for this region 38 // against the maximum likelihood estimate geometric 39 // distribution for these failures. This is primarily for 40 // debugging. 41 GoodnessOfFit *AndersonDarlingTestResult 42 } 43 44 // FlakeTest finds ranges of commits over which the failure 45 // probability of a test is fairly consistent. The failures argument 46 // gives the indexes of commits with failing tests. 47 // 48 // This works by assuming flaky tests are a Bernoulli process. That 49 // is, they fail with some probability and each failure is independent 50 // of other failures. Using this assumption, it subdivides the failure 51 // events to find subranges where the distribution of times between 52 // failures is very similar to a geometric distribution (determined 53 // using an Anderson-Darling goodness-of-fit test). 54 func FlakeTest(failures []int) *FlakeTestResult { 55 result := &FlakeTestResult{} 56 result.subdivide(failures) 57 return result 58 } 59 60 // subdivide adds events to the flake test result if it has a strongly 61 // geometric interarrival distribution. Otherwise, it recursively 62 // subdivides events on the longest gap. 63 // 64 // events must be strictly monotonically increasing. 65 func (r *FlakeTestResult) subdivide(events []int) { 66 if len(events) == 1 { 67 // Isolated failure. 68 region := FlakeRegion{events, events[0], events[0], 1, 1, nil} 69 r.All = append(r.All, region) 70 return 71 } 72 73 mle, ad := interarrivalAnalysis(events) 74 if ad == nil || ad.P >= 0.05 { 75 // We failed to reject the null hypothesis that this 76 // isn't geometrically distributed. That's about as 77 // close as we're going to get to calling it 78 // geometrically distributed. 79 region := FlakeRegion{events, events[0], events[len(events)-1], len(events), mle.P, ad} 80 r.All = append(r.All, region) 81 return 82 } 83 84 // We reject the null hypothesis and accept the alternate 85 // hypothesis that this range of events is not a Bernoulli 86 // process. Subdivide on the longest gap, which is the least 87 // likely event in this range. 88 longestIndex, longestVal := 0, events[1]-events[0] 89 for i := 0; i < len(events)-1; i++ { 90 val := events[i+1] - events[i] 91 if val > longestVal { 92 longestIndex, longestVal = i, val 93 } 94 } 95 96 //fmt.Fprintln(os.Stderr, "subdividing", events[:longestIndex+1], events[longestIndex+1:], mle.P, ad.P) 97 98 // Find the more recent ranges first. 99 r.subdivide(events[longestIndex+1:]) 100 r.subdivide(events[:longestIndex+1]) 101 } 102 103 // interarrivalAnalysis returns the maximum likelihood estimated 104 // distribution for the times between events and the Anderson-Darling 105 // test for how closely the data matches this distribution. ad will be 106 // nil if there is no time between any of the events. 107 // 108 // events must be strictly monotonically increasing. 109 func interarrivalAnalysis(events []int) (mle *GeometricDist, ad *AndersonDarlingTestResult) { 110 interarrivalTimes := make([]int, len(events)-1) 111 sum := 0 112 for i := 0; i < len(events)-1; i++ { 113 delta := events[i+1] - events[i] - 1 114 interarrivalTimes[i] = delta 115 sum += delta 116 } 117 118 // Compute maximum likelihood estimate of geometric 119 // distribution underlying interarrivalTimes. 120 mle = &GeometricDist{P: float64(len(interarrivalTimes)) / float64(len(interarrivalTimes)+sum)} 121 if mle.P == 1 { 122 // This happens if there are no gaps between events. 123 // In this case Anderson-Darling is undefined because 124 // the CDF is 1. 125 return 126 } 127 128 // Compute Anderson-Darling goodness-of-fit for the observed 129 // distribution against the theoretical distribution. 130 var err error 131 ad, err = AndersonDarlingTest(interarrivalTimes, mle) 132 if err != nil { 133 log.Fatal("Anderson-Darling test failed: ", err) 134 } 135 136 return 137 } 138 139 func (r *FlakeTestResult) Dump(w io.Writer) { 140 for i := range r.All { 141 reg := &r.All[len(r.All)-i-1] 142 gof := 0.0 143 if reg.GoodnessOfFit != nil { 144 gof = reg.GoodnessOfFit.P 145 } 146 147 fmt.Fprintln(w, reg.First, 0, 0) 148 fmt.Fprintln(w, reg.First, reg.FailureProbability, gof) 149 fmt.Fprintln(w, reg.Last, reg.FailureProbability, gof) 150 fmt.Fprintln(w, reg.Last, 0, 0) 151 } 152 } 153 154 // StillHappening returns the probability that the flake is still 155 // happening as of time t. 156 func (r *FlakeRegion) StillHappening(t int) float64 { 157 if t < r.First { 158 return 0 159 } 160 dist := GeometricDist{P: r.FailureProbability, Start: r.Last + 1} 161 return 1 - dist.CDF(t) 162 } 163 164 // Bounds returns the time at which the probability that the failure 165 // started rises above p and the time at which the probability that 166 // the failure stopped falls below p. Note that this has no idea of 167 // the "current" time, so stop may be "in the future." 168 func (r *FlakeRegion) Bounds(p float64) (start, stop int) { 169 dist := GeometricDist{P: r.FailureProbability} 170 delta := dist.InvCDF(1 - p) 171 return r.First - delta, r.Last + delta 172 } 173 174 // StartedAtOrBefore returns the probability that the failure start at 175 // or before time t. 176 func (r *FlakeRegion) StartedAtOrBefore(t int) float64 { 177 if t > r.First { 178 return 1 179 } 180 dist := GeometricDist{P: r.FailureProbability} 181 return 1 - dist.CDF(r.First-t-1) 182 } 183 184 func (r *FlakeRegion) StartedAt(t int) float64 { 185 dist := GeometricDist{P: r.FailureProbability} 186 return dist.PMF(r.First - t) 187 } 188 189 // Culprit gives the probability P that the event at time T was 190 // responsible for a failure. 191 type Culprit struct { 192 P float64 193 T int 194 } 195 196 // Culprits returns the possible culprits for this failure up to a 197 // cumulative probability of cumProb or at most limit events. Culprits 198 // are returned in reverse time order (from most likely culprit to 199 // least likely). 200 func (r *FlakeRegion) Culprits(cumProb float64, limit int) []Culprit { 201 culprits := []Culprit{} 202 203 total := 0.0 204 for t := r.First; t >= 0 && t > r.First-limit; t-- { 205 p := r.StartedAt(t) 206 culprits = append(culprits, Culprit{P: p, T: t}) 207 total += p 208 if total > cumProb { 209 break 210 } 211 } 212 213 return culprits 214 }