go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/changepoints/bayesian/confidence_interval.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bayesian
    16  
    17  import (
    18  	"math"
    19  
    20  	"go.chromium.org/luci/analysis/internal/changepoints/inputbuffer"
    21  )
    22  
    23  const (
    24  	// Confidence interval tail is the default tail value for confidence analysis.
    25  	// 0.005 tail gives 99% confidence interval.
    26  	ConfidenceIntervalTail = 0.005
    27  )
    28  
    29  // changePointPositionConfidenceInterval returns the (100% - (2 * tail))
    30  // two-tailed confidence interval for a change point that occurs in the
    31  // given slice of history.
    32  // E.g. tail = 0.005 gives the 99% confidence interval.
    33  // For details, please see go/bayesian-changepoint-estimation.
    34  // In practice, after detecting a change point, we will run this function for
    35  // the combination of the left and the right segments of the change points.
    36  // For example, if there are 3 segments:
    37  // [3, 9], [10, 15], [16, 21].
    38  // To analyze the confidence interval around change point at position 16, we
    39  // will run this function with the slice of history between 10 and 21.
    40  func (a ChangepointPredictor) changePointPositionConfidenceInterval(history []inputbuffer.PositionVerdict, tail float64) (min int, max int) {
    41  	length := len(history)
    42  	if length == 0 {
    43  		panic("test history is empty")
    44  	}
    45  
    46  	// Stores the total for the entire history.
    47  	var total counts
    48  	for _, v := range history {
    49  		total = total.addVerdict(v)
    50  	}
    51  
    52  	// changePointLogLikelihoods stores the log-likelihood of the observing the
    53  	// given test verdict sequence.
    54  	// i.e. log(P(Y | C = k)),
    55  	// where Y is the observations, and "C = k" means there is a change point at
    56  	// position k.
    57  	// We only store the loglikelihood of positions that mark the start of a new
    58  	// commit, positions that are not the start of a new commit will be skipped.
    59  	// Store changePointIndices so we can map back to the original slice later.
    60  	changePointIndices := []int{}
    61  	changePointLogLikelihoods := []float64{}
    62  
    63  	// Create SequenceLikelihood to calculate the likelihood.
    64  	firstTrySL := NewSequenceLikelihood(a.HasUnexpectedPrior)
    65  	retrySL := NewSequenceLikelihood(a.UnexpectedAfterRetryPrior)
    66  
    67  	// left counts the statistics to the left of a position.
    68  	var left counts
    69  
    70  	// Advance past the first commit position.
    71  	i, pending := nextPosition(history, 0)
    72  	left = left.add(pending)
    73  
    74  	for i < length {
    75  		// right counts the statistics from [i:].
    76  		right := total.subtract(left)
    77  		leftLikelihood := firstTrySL.LogLikelihood(left.HasUnexpected, left.Runs) + retrySL.LogLikelihood(left.UnexpectedAfterRetry, left.Retried)
    78  		rightLikelihood := firstTrySL.LogLikelihood(right.HasUnexpected, right.Runs) + retrySL.LogLikelihood(right.UnexpectedAfterRetry, right.Retried)
    79  
    80  		// conditionalLikelihood is log(P(Y | C = i)).
    81  		conditionalLikelihood := leftLikelihood + rightLikelihood
    82  		changePointIndices = append(changePointIndices, i)
    83  		changePointLogLikelihoods = append(changePointLogLikelihoods, conditionalLikelihood)
    84  
    85  		// Advance to the next commit position.
    86  		nextIndex, pending := nextPosition(history, i)
    87  		left = left.add(pending)
    88  		i = nextIndex
    89  	}
    90  
    91  	// totalLogLikelihood calculates log(P(Y)) - log (1/D).
    92  	// where D is the length of changePointLogLikelihoods.
    93  	// P(Y) = SUM OVER k of P(C = k AND Y)
    94  	//      = SUM OVER k of ((P(Y | C = k)*P(C = k))
    95  	//      = SUM OVER k of ((P(Y | C = k)* 1/D)
    96  	//      = 1/D * SUM OVER k of (P(Y | C = k)
    97  	// because we assume a uniform distribution for P(C = k).
    98  	// So
    99  	// log(P(Y)) = log(1/D * SUM OVER k of (P(Y | C = k))
   100  	//           = log(1/D) + log(SUM OVER k of (P(Y | C = k)))
   101  	//           = log(1/D) + totalLogLikelihood
   102  	totalLogLikelihood := AddLogLikelihoods(changePointLogLikelihoods)
   103  
   104  	// Stores the likelihood of P(C <= t | Y).
   105  	cumulativeLikelihood := 0.0
   106  	min = 0
   107  	max = len(changePointLogLikelihoods) - 1
   108  
   109  	for k := 0; k < len(changePointLogLikelihoods); k++ {
   110  		// Calculates P(C = k | Y).
   111  		// P (C = k | Y) = P(C = k AND Y) / P(Y)
   112  		//               = P(Y | C = k) * P(C = k) / P(Y)
   113  		//               = P(Y | C = k) * (1 / D) / P(Y)
   114  		//               = exp (log (P(Y | C = k) * (1 / D) / P(Y)))
   115  		//               = exp (log(P(Y | C = k) + log (1/D) - log (P(Y))
   116  		//               = exp (changePointLogLikelihoods[k] + log (1/D) - log(P(Y))
   117  		//               = exp (changePointLogLikelihoods[k] + log (1/D) - (log(1/D) + totalLogLikelihood))
   118  		//               = exp (changePointLogLikelihoods[k] - totalLogLikelihood)
   119  		changePointLikelihood := math.Exp(changePointLogLikelihoods[k] - totalLogLikelihood)
   120  		cumulativeLikelihood += changePointLikelihood
   121  
   122  		if cumulativeLikelihood < tail {
   123  			min = k
   124  		}
   125  		if cumulativeLikelihood > (1.0-tail) && max == (len(changePointLogLikelihoods)-1) {
   126  			max = k
   127  		}
   128  	}
   129  
   130  	return changePointIndices[min], changePointIndices[max]
   131  }
   132  
   133  // ChangePoints runs change point detection and confidence
   134  // interval analysis for history.
   135  // history is sorted by commit position ascendingly (oldest commit first).
   136  func (a ChangepointPredictor) ChangePoints(history []inputbuffer.PositionVerdict, tail float64) []inputbuffer.ChangePoint {
   137  	changePointIndices := a.identifyChangePoints(history)
   138  
   139  	// For simplicity, we add a fake index to the end.
   140  	ids := make([]int, len(changePointIndices)+1)
   141  	copy(ids, changePointIndices)
   142  	ids[len(ids)-1] = len(history)
   143  
   144  	result := []inputbuffer.ChangePoint{}
   145  	startIndex := 0
   146  	for i := 1; i < len(ids); i++ {
   147  		vds := history[startIndex:ids[i]]
   148  		// min and max needs to be offset by startIndex.
   149  		min, max := a.changePointPositionConfidenceInterval(vds, ConfidenceIntervalTail)
   150  		result = append(result, inputbuffer.ChangePoint{
   151  			NominalIndex:        ids[i-1],
   152  			LowerBound99ThIndex: min + startIndex,
   153  			UpperBound99ThIndex: max + startIndex,
   154  		})
   155  		startIndex = ids[i-1]
   156  	}
   157  	return result
   158  }