go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/changepoints/bayesian/confidence_interval.go (about) 1 // Copyright 2023 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bayesian 16 17 import ( 18 "math" 19 20 "go.chromium.org/luci/analysis/internal/changepoints/inputbuffer" 21 ) 22 23 const ( 24 // Confidence interval tail is the default tail value for confidence analysis. 25 // 0.005 tail gives 99% confidence interval. 26 ConfidenceIntervalTail = 0.005 27 ) 28 29 // changePointPositionConfidenceInterval returns the (100% - (2 * tail)) 30 // two-tailed confidence interval for a change point that occurs in the 31 // given slice of history. 32 // E.g. tail = 0.005 gives the 99% confidence interval. 33 // For details, please see go/bayesian-changepoint-estimation. 34 // In practice, after detecting a change point, we will run this function for 35 // the combination of the left and the right segments of the change points. 36 // For example, if there are 3 segments: 37 // [3, 9], [10, 15], [16, 21]. 38 // To analyze the confidence interval around change point at position 16, we 39 // will run this function with the slice of history between 10 and 21. 40 func (a ChangepointPredictor) changePointPositionConfidenceInterval(history []inputbuffer.PositionVerdict, tail float64) (min int, max int) { 41 length := len(history) 42 if length == 0 { 43 panic("test history is empty") 44 } 45 46 // Stores the total for the entire history. 47 var total counts 48 for _, v := range history { 49 total = total.addVerdict(v) 50 } 51 52 // changePointLogLikelihoods stores the log-likelihood of the observing the 53 // given test verdict sequence. 54 // i.e. log(P(Y | C = k)), 55 // where Y is the observations, and "C = k" means there is a change point at 56 // position k. 57 // We only store the loglikelihood of positions that mark the start of a new 58 // commit, positions that are not the start of a new commit will be skipped. 59 // Store changePointIndices so we can map back to the original slice later. 60 changePointIndices := []int{} 61 changePointLogLikelihoods := []float64{} 62 63 // Create SequenceLikelihood to calculate the likelihood. 64 firstTrySL := NewSequenceLikelihood(a.HasUnexpectedPrior) 65 retrySL := NewSequenceLikelihood(a.UnexpectedAfterRetryPrior) 66 67 // left counts the statistics to the left of a position. 68 var left counts 69 70 // Advance past the first commit position. 71 i, pending := nextPosition(history, 0) 72 left = left.add(pending) 73 74 for i < length { 75 // right counts the statistics from [i:]. 76 right := total.subtract(left) 77 leftLikelihood := firstTrySL.LogLikelihood(left.HasUnexpected, left.Runs) + retrySL.LogLikelihood(left.UnexpectedAfterRetry, left.Retried) 78 rightLikelihood := firstTrySL.LogLikelihood(right.HasUnexpected, right.Runs) + retrySL.LogLikelihood(right.UnexpectedAfterRetry, right.Retried) 79 80 // conditionalLikelihood is log(P(Y | C = i)). 81 conditionalLikelihood := leftLikelihood + rightLikelihood 82 changePointIndices = append(changePointIndices, i) 83 changePointLogLikelihoods = append(changePointLogLikelihoods, conditionalLikelihood) 84 85 // Advance to the next commit position. 86 nextIndex, pending := nextPosition(history, i) 87 left = left.add(pending) 88 i = nextIndex 89 } 90 91 // totalLogLikelihood calculates log(P(Y)) - log (1/D). 92 // where D is the length of changePointLogLikelihoods. 93 // P(Y) = SUM OVER k of P(C = k AND Y) 94 // = SUM OVER k of ((P(Y | C = k)*P(C = k)) 95 // = SUM OVER k of ((P(Y | C = k)* 1/D) 96 // = 1/D * SUM OVER k of (P(Y | C = k) 97 // because we assume a uniform distribution for P(C = k). 98 // So 99 // log(P(Y)) = log(1/D * SUM OVER k of (P(Y | C = k)) 100 // = log(1/D) + log(SUM OVER k of (P(Y | C = k))) 101 // = log(1/D) + totalLogLikelihood 102 totalLogLikelihood := AddLogLikelihoods(changePointLogLikelihoods) 103 104 // Stores the likelihood of P(C <= t | Y). 105 cumulativeLikelihood := 0.0 106 min = 0 107 max = len(changePointLogLikelihoods) - 1 108 109 for k := 0; k < len(changePointLogLikelihoods); k++ { 110 // Calculates P(C = k | Y). 111 // P (C = k | Y) = P(C = k AND Y) / P(Y) 112 // = P(Y | C = k) * P(C = k) / P(Y) 113 // = P(Y | C = k) * (1 / D) / P(Y) 114 // = exp (log (P(Y | C = k) * (1 / D) / P(Y))) 115 // = exp (log(P(Y | C = k) + log (1/D) - log (P(Y)) 116 // = exp (changePointLogLikelihoods[k] + log (1/D) - log(P(Y)) 117 // = exp (changePointLogLikelihoods[k] + log (1/D) - (log(1/D) + totalLogLikelihood)) 118 // = exp (changePointLogLikelihoods[k] - totalLogLikelihood) 119 changePointLikelihood := math.Exp(changePointLogLikelihoods[k] - totalLogLikelihood) 120 cumulativeLikelihood += changePointLikelihood 121 122 if cumulativeLikelihood < tail { 123 min = k 124 } 125 if cumulativeLikelihood > (1.0-tail) && max == (len(changePointLogLikelihoods)-1) { 126 max = k 127 } 128 } 129 130 return changePointIndices[min], changePointIndices[max] 131 } 132 133 // ChangePoints runs change point detection and confidence 134 // interval analysis for history. 135 // history is sorted by commit position ascendingly (oldest commit first). 136 func (a ChangepointPredictor) ChangePoints(history []inputbuffer.PositionVerdict, tail float64) []inputbuffer.ChangePoint { 137 changePointIndices := a.identifyChangePoints(history) 138 139 // For simplicity, we add a fake index to the end. 140 ids := make([]int, len(changePointIndices)+1) 141 copy(ids, changePointIndices) 142 ids[len(ids)-1] = len(history) 143 144 result := []inputbuffer.ChangePoint{} 145 startIndex := 0 146 for i := 1; i < len(ids); i++ { 147 vds := history[startIndex:ids[i]] 148 // min and max needs to be offset by startIndex. 149 min, max := a.changePointPositionConfidenceInterval(vds, ConfidenceIntervalTail) 150 result = append(result, inputbuffer.ChangePoint{ 151 NominalIndex: ids[i-1], 152 LowerBound99ThIndex: min + startIndex, 153 UpperBound99ThIndex: max + startIndex, 154 }) 155 startIndex = ids[i-1] 156 } 157 return result 158 }