github.com/aclements/go-misc@v0.0.0-20240129233631-2f6ede80790c/findflakes/flaketest.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"fmt"
     9  	"io"
    10  	"log"
    11  )
    12  
    13  type FlakeTestResult struct {
    14  	All []FlakeRegion
    15  }
    16  
    17  type FlakeRegion struct {
    18  	// Times gives the times of all failures in this region, in
    19  	// increasing order.
    20  	//
    21  	// TODO: Remove some of the redundant fields?
    22  	Times []int
    23  
    24  	// First and Last are the indexes of the first and last
    25  	// failures in this flaky region. These are equivalent to
    26  	// Times[0] and Times[len(Times)-1], respectively.
    27  	First, Last int
    28  
    29  	// Failures is the number of failures in the region. This is
    30  	// equivalent to len(Times).
    31  	Failures int
    32  
    33  	// FailureProbability is the fraction of builds in this region
    34  	// that failed.
    35  	FailureProbability float64
    36  
    37  	// GoodnessOfFit is the goodness of fit test for this region
    38  	// against the maximum likelihood estimate geometric
    39  	// distribution for these failures. This is primarily for
    40  	// debugging.
    41  	GoodnessOfFit *AndersonDarlingTestResult
    42  }
    43  
    44  // FlakeTest finds ranges of commits over which the failure
    45  // probability of a test is fairly consistent. The failures argument
    46  // gives the indexes of commits with failing tests.
    47  //
    48  // This works by assuming flaky tests are a Bernoulli process. That
    49  // is, they fail with some probability and each failure is independent
    50  // of other failures. Using this assumption, it subdivides the failure
    51  // events to find subranges where the distribution of times between
    52  // failures is very similar to a geometric distribution (determined
    53  // using an Anderson-Darling goodness-of-fit test).
    54  func FlakeTest(failures []int) *FlakeTestResult {
    55  	result := &FlakeTestResult{}
    56  	result.subdivide(failures)
    57  	return result
    58  }
    59  
    60  // subdivide adds events to the flake test result if it has a strongly
    61  // geometric interarrival distribution. Otherwise, it recursively
    62  // subdivides events on the longest gap.
    63  //
    64  // events must be strictly monotonically increasing.
    65  func (r *FlakeTestResult) subdivide(events []int) {
    66  	if len(events) == 1 {
    67  		// Isolated failure.
    68  		region := FlakeRegion{events, events[0], events[0], 1, 1, nil}
    69  		r.All = append(r.All, region)
    70  		return
    71  	}
    72  
    73  	mle, ad := interarrivalAnalysis(events)
    74  	if ad == nil || ad.P >= 0.05 {
    75  		// We failed to reject the null hypothesis that this
    76  		// isn't geometrically distributed. That's about as
    77  		// close as we're going to get to calling it
    78  		// geometrically distributed.
    79  		region := FlakeRegion{events, events[0], events[len(events)-1], len(events), mle.P, ad}
    80  		r.All = append(r.All, region)
    81  		return
    82  	}
    83  
    84  	// We reject the null hypothesis and accept the alternate
    85  	// hypothesis that this range of events is not a Bernoulli
    86  	// process. Subdivide on the longest gap, which is the least
    87  	// likely event in this range.
    88  	longestIndex, longestVal := 0, events[1]-events[0]
    89  	for i := 0; i < len(events)-1; i++ {
    90  		val := events[i+1] - events[i]
    91  		if val > longestVal {
    92  			longestIndex, longestVal = i, val
    93  		}
    94  	}
    95  
    96  	//fmt.Fprintln(os.Stderr, "subdividing", events[:longestIndex+1], events[longestIndex+1:], mle.P, ad.P)
    97  
    98  	// Find the more recent ranges first.
    99  	r.subdivide(events[longestIndex+1:])
   100  	r.subdivide(events[:longestIndex+1])
   101  }
   102  
   103  // interarrivalAnalysis returns the maximum likelihood estimated
   104  // distribution for the times between events and the Anderson-Darling
   105  // test for how closely the data matches this distribution. ad will be
   106  // nil if there is no time between any of the events.
   107  //
   108  // events must be strictly monotonically increasing.
   109  func interarrivalAnalysis(events []int) (mle *GeometricDist, ad *AndersonDarlingTestResult) {
   110  	interarrivalTimes := make([]int, len(events)-1)
   111  	sum := 0
   112  	for i := 0; i < len(events)-1; i++ {
   113  		delta := events[i+1] - events[i] - 1
   114  		interarrivalTimes[i] = delta
   115  		sum += delta
   116  	}
   117  
   118  	// Compute maximum likelihood estimate of geometric
   119  	// distribution underlying interarrivalTimes.
   120  	mle = &GeometricDist{P: float64(len(interarrivalTimes)) / float64(len(interarrivalTimes)+sum)}
   121  	if mle.P == 1 {
   122  		// This happens if there are no gaps between events.
   123  		// In this case Anderson-Darling is undefined because
   124  		// the CDF is 1.
   125  		return
   126  	}
   127  
   128  	// Compute Anderson-Darling goodness-of-fit for the observed
   129  	// distribution against the theoretical distribution.
   130  	var err error
   131  	ad, err = AndersonDarlingTest(interarrivalTimes, mle)
   132  	if err != nil {
   133  		log.Fatal("Anderson-Darling test failed: ", err)
   134  	}
   135  
   136  	return
   137  }
   138  
   139  func (r *FlakeTestResult) Dump(w io.Writer) {
   140  	for i := range r.All {
   141  		reg := &r.All[len(r.All)-i-1]
   142  		gof := 0.0
   143  		if reg.GoodnessOfFit != nil {
   144  			gof = reg.GoodnessOfFit.P
   145  		}
   146  
   147  		fmt.Fprintln(w, reg.First, 0, 0)
   148  		fmt.Fprintln(w, reg.First, reg.FailureProbability, gof)
   149  		fmt.Fprintln(w, reg.Last, reg.FailureProbability, gof)
   150  		fmt.Fprintln(w, reg.Last, 0, 0)
   151  	}
   152  }
   153  
   154  // StillHappening returns the probability that the flake is still
   155  // happening as of time t.
   156  func (r *FlakeRegion) StillHappening(t int) float64 {
   157  	if t < r.First {
   158  		return 0
   159  	}
   160  	dist := GeometricDist{P: r.FailureProbability, Start: r.Last + 1}
   161  	return 1 - dist.CDF(t)
   162  }
   163  
   164  // Bounds returns the time at which the probability that the failure
   165  // started rises above p and the time at which the probability that
   166  // the failure stopped falls below p. Note that this has no idea of
   167  // the "current" time, so stop may be "in the future."
   168  func (r *FlakeRegion) Bounds(p float64) (start, stop int) {
   169  	dist := GeometricDist{P: r.FailureProbability}
   170  	delta := dist.InvCDF(1 - p)
   171  	return r.First - delta, r.Last + delta
   172  }
   173  
   174  // StartedAtOrBefore returns the probability that the failure start at
   175  // or before time t.
   176  func (r *FlakeRegion) StartedAtOrBefore(t int) float64 {
   177  	if t > r.First {
   178  		return 1
   179  	}
   180  	dist := GeometricDist{P: r.FailureProbability}
   181  	return 1 - dist.CDF(r.First-t-1)
   182  }
   183  
   184  func (r *FlakeRegion) StartedAt(t int) float64 {
   185  	dist := GeometricDist{P: r.FailureProbability}
   186  	return dist.PMF(r.First - t)
   187  }
   188  
   189  // Culprit gives the probability P that the event at time T was
   190  // responsible for a failure.
   191  type Culprit struct {
   192  	P float64
   193  	T int
   194  }
   195  
   196  // Culprits returns the possible culprits for this failure up to a
   197  // cumulative probability of cumProb or at most limit events. Culprits
   198  // are returned in reverse time order (from most likely culprit to
   199  // least likely).
   200  func (r *FlakeRegion) Culprits(cumProb float64, limit int) []Culprit {
   201  	culprits := []Culprit{}
   202  
   203  	total := 0.0
   204  	for t := r.First; t >= 0 && t > r.First-limit; t-- {
   205  		p := r.StartedAt(t)
   206  		culprits = append(culprits, Culprit{P: p, T: t})
   207  		total += p
   208  		if total > cumProb {
   209  			break
   210  		}
   211  	}
   212  
   213  	return culprits
   214  }