go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/changepoints/bayesian/bayesian.go (about) 1 // Copyright 2023 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package bayesian implements bayesian analysis for detecting change points. 16 package bayesian 17 18 import ( 19 "math" 20 21 "go.chromium.org/luci/analysis/internal/changepoints/inputbuffer" 22 ) 23 24 type ChangepointPredictor struct { 25 // Threshold for creating new change points. 26 ChangepointLikelihood float64 27 28 // The prior for the rate at which a test's runs have any 29 // unexpected test result. 30 // This is the prior for estimating the ratio 31 // HasUnexpected / Runs of a segment. 32 // 33 // Generally tests tend to be either consistently passing or 34 // consistently failing, with a bias towards consistently 35 // passing, so shape parameters Alpha < 1, Beta < 1, Alpha < Beta 36 // are typically selected (e.g. alpha = 0.3, beta = 0.5). 37 HasUnexpectedPrior BetaDistribution 38 39 // The prior for the rate at which a test's runs have 40 // only unexpected results, given they have at least 41 // two results and one is unexpected. 42 // 43 // This is the prior for estimating UnexpectedAfterRetry / Retried. 44 // Generally the result of retrying a fail inside a test run 45 // either leads to a pass (fairly consistently) or another failure 46 // (fairly consistently). Consequently, shape parameters Alpha < 1, 47 // Beta < 1 are advised (e.g. alpha = 0.5, beta = 0.5). 48 UnexpectedAfterRetryPrior BetaDistribution 49 } 50 51 // identifyChangePoints identifies all change point for given test history. 52 // 53 // This method requires the provided history to be sorted by commit position 54 // (either ascending or descending is fine). 55 // It allows multiple verdicts to be specified per commit position, by 56 // including those verdicts as adjacent elements in the history slice. 57 // 58 // This function returns the indices (in the history slice) of the change points 59 // identified. If an index i is returned, it means the history is segmented as 60 // history[:i] and history[i:]. 61 // The indices returned are sorted ascendingly (lowest index first). 62 func (a ChangepointPredictor) identifyChangePoints(history []inputbuffer.PositionVerdict) []int { 63 if len(history) == 0 { 64 panic("test history is empty") 65 } 66 67 relativeLikelihood, bestChangepoint := a.FindBestChangepoint(history) 68 if (relativeLikelihood + math.Log(a.ChangepointLikelihood)) <= 0 { 69 // Do not split. 70 return nil 71 } 72 // Identify further split points on the left and right hand sides, recursively. 73 result := a.identifyChangePoints(history[:bestChangepoint]) 74 result = append(result, bestChangepoint) 75 rightChangepoints := a.identifyChangePoints(history[bestChangepoint:]) 76 for _, changePoint := range rightChangepoints { 77 // Adjust the offset of splitpoints in the right half, 78 // from being relative to the start of the right half 79 // to being relative to the start of the entire history. 80 result = append(result, changePoint+bestChangepoint) 81 } 82 return result 83 } 84 85 // FindBestChangepoint finds the change point position that maximises 86 // the likelihood of observing the given test history. 87 // 88 // It returns the position of the change point in the history slice, 89 // as well as the change in log-likelihood attributable to the change point, 90 // relative to the `no change point` case. 91 // 92 // The semantics of the returned position are as follows: 93 // a position p means the history is segmented as 94 // history[:p] and history[p:]. 95 // If the returned position is 0, it means no change point position was 96 // better than the `no change point` case. 97 // 98 // This method requires the provided history to be sorted by 99 // commit position (either ascending or descending is fine). 100 // It allows multiple verdicts to be specified per 101 // commit position, by including those verdicts as adjacent 102 // elements in the history slice. 103 // 104 // Note that if multiple verdicts are specified per commit position, 105 // the returned position will only ever be between two commit 106 // positions in the history, i.e. it holds that 107 // history[position-1].CommitPosition != history[position].CommitPosition 108 // (or position == 0). 109 // 110 // This method assumes a uniform prior for all change point positions, 111 // including the no change point case. 112 // If we are to bias towards the no change point case, thresholding 113 // should be applied to relativeLikelihood before considering the 114 // change point real. 115 func (a ChangepointPredictor) FindBestChangepoint(history []inputbuffer.PositionVerdict) (relativeLikelihood float64, position int) { 116 length := len(history) 117 118 // Stores the total for the entire history. 119 var total counts 120 for _, v := range history { 121 total = total.addVerdict(v) 122 } 123 124 // Calculate the absolute log-likelihood of observing the 125 // history assuming there is no change point. 126 firstTrySL := NewSequenceLikelihood(a.HasUnexpectedPrior) 127 retrySL := NewSequenceLikelihood(a.UnexpectedAfterRetryPrior) 128 prioriLogLikelihood := firstTrySL.LogLikelihood(total.HasUnexpected, total.Runs) + retrySL.LogLikelihood(total.UnexpectedAfterRetry, total.Retried) 129 130 // bestChangepoint represents the index of the best change point. 131 // The change point is said to occur before the corresponding slice 132 // element, so that results[:bestChangepoint] and results[bestChangepoint:] 133 // represents the two distinct test history series divided by the 134 // change point. 135 bestChangepoint := 0 136 bestLikelihood := -math.MaxFloat64 137 138 // leftUnexpected stores the totals for result positions 139 // history[0...i-1 (inclusive)]. 140 var i int 141 var left counts 142 143 // A heuristic for determining which points in the history 144 // are interesting to evaluate. 145 var heuristic changePointHeuristic 146 147 // The provided history may have multiple verdicts for the same 148 // commit position. As we should only consider change points between 149 // commit positions (not inside them), we will iterate over the 150 // history using nextPosition(). 151 152 // Advance past the first commit position. 153 i, pending := nextPosition(history, 0) 154 left = left.add(pending) 155 heuristic.addToHistory(pending) 156 157 for i < length { 158 // Find the end of the next commit position. 159 // Pending contains the counts from history[i:nextIndex]. 160 nextIndex, pending := nextPosition(history, i) 161 162 // Only consider change points at positions that 163 // are heuristically likely, to save on compute cycles. 164 // The heuristic is designed to be consistent with 165 // the sequence likelihood model, so will not eliminate 166 // evaluation of positions that have no chance of 167 // maximising bestLikelihood. 168 if heuristic.isChangepointPossibleWithNext(pending) { 169 right := total.subtract(left) 170 171 // Calculate the likelihood of observing sequence 172 // given there is a change point at this position. 173 leftLikelihood := firstTrySL.LogLikelihood(left.HasUnexpected, left.Runs) + retrySL.LogLikelihood(left.UnexpectedAfterRetry, left.Retried) 174 rightLikelihood := firstTrySL.LogLikelihood(right.HasUnexpected, right.Runs) + retrySL.LogLikelihood(right.UnexpectedAfterRetry, right.Retried) 175 conditionalLikelihood := leftLikelihood + rightLikelihood 176 if conditionalLikelihood > bestLikelihood { 177 bestChangepoint = i 178 bestLikelihood = conditionalLikelihood 179 } 180 } 181 182 // Advance to the next commit position. 183 left = left.add(pending) 184 heuristic.addToHistory(pending) 185 i = nextIndex 186 } 187 return bestLikelihood - prioriLogLikelihood, bestChangepoint 188 } 189 190 // nextPosition allows iterating over test history one commit position at a time. 191 // 192 // It finds the index `nextIndex` that represents advancing exactly one commit 193 // position from `index`, and returns the counts of verdicts that were 194 // advanced over. 195 // 196 // If there is only one verdict for a commit position, nextIndex will be index + 1, 197 // otherwise, if there are a number of verdicts for a commit position, nextIndex 198 // will be advanced by that number. 199 // 200 // Preconditions: 201 // The provided history is in order by commit position (either ascending or 202 // descending order is fine). 203 func nextPosition(history []inputbuffer.PositionVerdict, index int) (nextIndex int, pending counts) { 204 // The commit position for which we are accumulating test runs. 205 commitPosition := history[index].CommitPosition 206 207 var c counts 208 nextIndex = index 209 for ; nextIndex < len(history) && history[nextIndex].CommitPosition == commitPosition; nextIndex++ { 210 c = c.addVerdict(history[nextIndex]) 211 } 212 return nextIndex, c 213 }