go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/changepoints/bayesian/sequence_likelihood.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //	http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bayesian
    16  
    17  import (
    18  	"math"
    19  
    20  	"gonum.org/v1/gonum/mathext"
    21  )
    22  
    23  // BetaDistribution specifies the parameters of a Beta distribution.
    24  // See https://en.wikipedia.org/wiki/Beta_distribution.
    25  //
    26  // In this package, we use beta distribution as the prior for a
    27  // test's propensity to produce unexpected results, because it
    28  // simplifies the maths.
    29  //
    30  // Alpha = 1, Beta = 1 indicates an uninformative (uniform) prior.
    31  type BetaDistribution struct {
    32  	// Alpha parameter of Beta distribution.
    33  	// Must be greater than 0.
    34  	Alpha float64
    35  	// Beta parameter of the Beta distribution.
    36  	// Must be greater than 0.
    37  	Beta float64
    38  }
    39  
    40  // SequenceLikelihood is used to compute the likelihood of observing
    41  // a particular sequence of pass/fail results, assuming the failure rate
    42  // p of the test remains constant over the sequence, and p is unknown but
    43  // has the known prior Beta(alpha,beta) for some alpha and beta
    44  // where alpha > 0 and beta > 0.
    45  //
    46  // See go/bayesian-changepoint-estimation for details.
    47  type SequenceLikelihood struct {
    48  	a float64
    49  	b float64
    50  	// Stores ln(Beta(a, b)), where Beta is Euler's beta function,
    51  	// equal to (Gamma(a+b) / (Gamma(a) * Gamma(b)).
    52  	logK float64
    53  }
    54  
    55  func NewSequenceLikelihood(prior BetaDistribution) SequenceLikelihood {
    56  	if prior.Alpha <= 0 {
    57  		panic("alpha must be greater than zero")
    58  	}
    59  	if prior.Beta <= 0 {
    60  		panic("beta must be greater than zero")
    61  	}
    62  	return SequenceLikelihood{
    63  		a:    prior.Alpha,
    64  		b:    prior.Beta,
    65  		logK: Lgamma(prior.Alpha+prior.Beta) - (Lgamma(prior.Alpha) + Lgamma(prior.Beta)),
    66  	}
    67  }
    68  
    69  func Lgamma(x float64) float64 {
    70  	value, _ := math.Lgamma(x)
    71  	return value
    72  }
    73  
    74  // LogLikelihood returns the log-likelihood of observing a particular
    75  // sequence of pass/fail results (Y_0, Y_1 ... Y_{n-1}) of a known length.
    76  // i.e. returns log(P(Y)).
    77  //
    78  // See LogLikelihoodPartial for more details.
    79  func (s SequenceLikelihood) LogLikelihood(x, n int) float64 {
    80  	return s.LogLikelihoodPartial(x, n, 1.0)
    81  }
    82  
    83  // LogLikelihoodPartial returns the log-likelihood of:
    84  //   - observing a particular binary sequence Y_0, Y_1 ... Y_{n-1},
    85  //     knowing its length AND
    86  //   - concurrently, the underlying proportion p (= P(Y_i = 1)) that led
    87  //     to that sequence being less than or equal to maxProportion.
    88  //
    89  // i.e. the method returns log(P(Y AND p <= maxProportion)).
    90  //
    91  // The following assumptions are made:
    92  //   - Y ~ BernoilliProcess(p, n), i.e. Y is n independent samples of a
    93  //     bernoilli process with probability p of observing a 1.
    94  //   - p ~ Beta(a, b), i.e. p is unknown but taken from a Beta distribution
    95  //     with parameters a and b. If a = b = 1 is provided, the Beta distribution
    96  //     collapses to the uniform (uninformative) prior.
    97  //     In practice, where test failure rate is being modelled, a and b < 1
    98  //     make more sense as tests tend towards being either consistently
    99  //     failing or consistently passing.
   100  //
   101  // maxProportion must be between 0.0 and 1.0 (inclusive), and if it is 1.0,
   102  // the method returns P(Y).
   103  //
   104  // While the method returns the probability of observing a precise sequence Y,
   105  // the only statistics needed to evaluate this probability is x (the number
   106  // of elements that are true) and n (the total), so these are the only values
   107  // accepted as arguments. (As such, inputs are also constrained as 0 <= x <= n.)
   108  //
   109  // Log-likelihoods are returned, as for larger values of n (e.g. n > 100)
   110  // the probabilities for any particular sequence can get very small.
   111  // Working in log-probabilities avoids floating point precision issues.
   112  //
   113  // [1] https://en.wikipedia.org/wiki/Beta_distribution.
   114  func (s SequenceLikelihood) LogLikelihoodPartial(x, n int, maxProportion float64) float64 {
   115  	// The likelihood of observing a particular binary sequence Y
   116  	// (consisting of elements Y_0, Y_1 ... Y_{n-1}), sampled from
   117  	// a Bernoilli process with probability p is given by:
   118  	//   P(Y | p) = p^x * (1-p)^(n-x)
   119  	//
   120  	// Where x is the total number of 'true' values in the sequence
   121  	// and n is the total number of values in the sequence.
   122  	//
   123  	// However, we do not know p, only its prior, p ~ Beta(a, b).
   124  	//
   125  	// Thus the likelihood of observing a particular sequence
   126  	// (not knowing p) is given by integrating over the possible
   127  	// values of p as follows:
   128  	//  P(Y) = {Integral over p, from 0 to 1} beta_pdf(p) * p^x * (1-p)^(n-x) dp
   129  	//
   130  	// Where beta_pdf(p) is the probability density function of the
   131  	// Beta distribution, i.e.
   132  	//   beta_pdf(p) = K * p^(a-1) * (1-p)^(b-1), where K = 1 / Beta(a, b).
   133  	// And Beta is Euler's beta function.
   134  	//
   135  	// More generally, the joint probability of Y and some value of p
   136  	// is as follows:
   137  	// P(Y AND p <= maxProportion)
   138  	//    = {Integral over p, from 0 to maxProportion} beta_pdf(p) * p^x * (1-p)^(n-x) dp
   139  	//
   140  	// Simplifying we have:
   141  	//    = {Integral over p from 0 to maxProportion} (K * p^(a-1) * (1-p)^(b-1)) * p^x * (1-p)^(n-x) dp
   142  	//          // Expanding beta_pdf()
   143  	//    = K * {Integral over p from 0 to maxProportion} (p^(a-1) * (1-p)^(b-1)) * p^x * (1-p)^(n-x) dp
   144  	//          // Moving the constant K out of the integral.
   145  	//    = K * {Integral over p from 0 to maxProportion} p^(x+a-1) * (1-p)^(n-x+b-1) dp
   146  	//          // using the rule that b^i * b^j = b^(i+j)
   147  	//    = K * B(maxProportion; (x+a-1)+1, (n-x+b-1)+1)
   148  	//          // Rewriting integral in terms of Euler's Incomplete Beta function, B.
   149  	//    = K * B(maxProportion; x+a, n-x+b)
   150  	//          // Simplifying arguments.
   151  	// For a > 0, b > 0, x <= n, n >= 0, x >= 0.
   152  
   153  	// Fortunately for us, Euler's Incomplete Beta function B is well-studied and
   154  	// there are efficient algorithms for calculating it.
   155  	//
   156  	// The incomplete beta function B(k; c, d) can be rewritten in terms of the
   157  	// Beta function, Beta, and the regularized incomplete beta function I_k:
   158  	//   B(k; c, d) = Beta(c, d) * I_k(c, d).
   159  	//
   160  	// The function for I_k is available in gonum's mathext library as RegIncBeta.
   161  	//
   162  	// The Beta function itself can be rewritten in terms of the Gamma function:
   163  	//   Beta(c, d) = Gamma(c) * Gamma(d) / Gamma(c + d)
   164  	//
   165  	// Where the Gamma function is built into go's standard math library.
   166  	//
   167  	// As we are going to worth with logarithms, we use the properties that
   168  	// log(A*B) = log(A) + log(B), and that log(A/B) = log(A) - log(B).
   169  
   170  	c := float64(x) + s.a
   171  	d := float64(n-x) + s.b
   172  
   173  	// Implement Euler's Beta using the Gamma function, using:
   174  	// Beta(c, d) = Gamma(c) * Gamma(d) / Gamma(c + d).
   175  	//
   176  	// Note we are dealing with logarithms, so we use that:
   177  	// log(A*B) = log(A) + log(B), and that log(A/B) = log(A) - log(B).
   178  	logGammaC, _ := math.Lgamma(c)
   179  	logGammaD, _ := math.Lgamma(d)
   180  	logGammaCD, _ := math.Lgamma(c + d)
   181  	logBeta := logGammaC + logGammaD - logGammaCD
   182  
   183  	logRegIncBeta := 0.0
   184  	if maxProportion < 1.0 {
   185  		logRegIncBeta = math.Log(mathext.RegIncBeta(c, d, maxProportion))
   186  	}
   187  
   188  	// K * Beta(c, d) * I_{maxProportion}(c, d)
   189  	return s.logK + logBeta + logRegIncBeta
   190  }
   191  
   192  func AddLogLikelihoods(x []float64) float64 {
   193  	if len(x) == 1 {
   194  		return x[0]
   195  	}
   196  	maxValue := -math.MaxFloat64
   197  	for i := 0; i < len(x); i++ {
   198  		if x[i] > maxValue {
   199  			maxValue = x[i]
   200  		}
   201  	}
   202  
   203  	// Rearrange:
   204  	//
   205  	//   log(e^x + e^y + e^z + ...)
   206  	// = log(e^a * (e^(x-a) + e^(y-a) + e^(z-a))),
   207  	//   letting a = max(x, y, z, ...)
   208  	// = log(e^a) + log(e^(x-a) + e^(y-a) + e^(z-a))
   209  	// = a + log(e^(x-a) + e^(y-a) + e^(z-a))
   210  	//
   211  	// This keeps us running into limits of float64s
   212  	// (e.g. max exponent of 10^(+/-308)) when
   213  	// adding very small (or very large) values.
   214  	sum := 0.0
   215  	for i := 0; i < len(x); i++ {
   216  		sum += math.Exp(x[i] - maxValue)
   217  	}
   218  	return maxValue + math.Log(sum)
   219  }