go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/clustering/algorithms/failurereason/failurereason.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package failurereason contains the failure reason clustering algorithm 16 // for LUCI Analysis. 17 // 18 // This algorithm removes ips, temp file names, numbers and other such tokens 19 // to cluster similar reasons together. 20 package failurereason 21 22 import ( 23 "bytes" 24 "crypto/sha256" 25 "errors" 26 "fmt" 27 "regexp" 28 "strconv" 29 "strings" 30 "text/template" 31 32 "go.chromium.org/luci/analysis/internal/clustering" 33 "go.chromium.org/luci/analysis/internal/config/compiledcfg" 34 ) 35 36 // AlgorithmVersion is the version of the clustering algorithm. The algorithm 37 // version should be incremented whenever existing test results may be 38 // clustered differently (i.e. Cluster(f) returns a different value for some 39 // f that may have been already ingested). 40 const AlgorithmVersion = 6 41 42 // AlgorithmName is the identifier for the clustering algorithm. 43 // LUCI Analysis requires all clustering algorithms to have a unique 44 // identifier. Must match the pattern ^[a-z0-9-.]{1,32}$. 45 // 46 // The AlgorithmName must encode the algorithm version, so that each version 47 // of an algorithm has a different name. 48 var AlgorithmName = fmt.Sprintf("%sv%v", clustering.FailureReasonAlgorithmPrefix, AlgorithmVersion) 49 50 // BugTemplate is the template for the content of bugs created for failure 51 // reason clusters. A list of test IDs is included to improve searchability 52 // by test name. 53 var BugTemplate = template.Must(template.New("reasonTemplate").Parse( 54 `This bug is for all test failures where the primary error message is similiar to the following (ignoring numbers and hexadecimal values): 55 {{.FailureReason}} 56 57 The following test(s) were observed to have matching failures at this time (at most five examples listed): 58 {{range .TestIDs}}- {{.}} 59 {{end}}`)) 60 61 // To match any 1 or more digit numbers, or hex values (often appear in temp 62 // file names or prints of pointers), which will be replaced. 63 var clusterExp = regexp.MustCompile(`[/+0-9a-zA-Z]{10,}=+|[\-0-9a-fA-F \t]{16,}|[0-9a-fA-Fx]{8,}|[0-9]+`) 64 65 // likeEscapeRewriter escapes \, % and _ so that they are not interpreted by LIKE 66 // pattern matching. 67 var likeEscapeRewriter = strings.NewReplacer(`\`, `\\`, `%`, `\%`, `_`, `\_`) 68 69 // likeUnescapeRewriter unescapes the special sequences \\, \% and \_ 70 // used in LIKE expressions, so that literal text matched appears unescaped. 71 // This is used to make cluster definitions read more naturally on the UI, 72 // even if it introduces some ambiguity. 73 var likeUnescapeRewriter = strings.NewReplacer(`\\`, `\`, `\%`, `%`, `\_`, `_`) 74 75 // Algorithm represents an instance of the reason-based clustering 76 // algorithm. 77 type Algorithm struct{} 78 79 // Name returns the identifier of the clustering algorithm. 80 func (a *Algorithm) Name() string { 81 return AlgorithmName 82 } 83 84 // clusterLike returns the reason LIKE expression that defines 85 // the cluster the given test result belongs to. 86 // 87 // By default only numbers, hexadecimals and base64-encoding-like 88 // sequences are stripped out when clustering. But using configurable 89 // masking patterns, it is possible to strip out other parts too. 90 func clusterLike(config *compiledcfg.ProjectConfig, failure *clustering.Failure) string { 91 // Escape \, % and _ so that they are not interpreted by LIKE 92 // pattern matching. 93 likePattern := likeEscapeRewriter.Replace(failure.Reason.PrimaryErrorMessage) 94 95 // Replace hexadecimal sequences with wildcard matches. This is technically 96 // broader than our original cluster definition, but is more readable, and 97 // usually ends up matching the exact same set of failures. 98 likePattern = clusterExp.ReplaceAllString(likePattern, "%") 99 100 // Apply configured masks. 101 for _, re := range config.ReasonMaskPatterns { 102 likePattern = applyMask(re, likePattern) 103 } 104 105 return likePattern 106 } 107 108 // applyMask applies the given masking regexp to an error message. 109 // 110 // The regular expression re must have exactly one 111 // capturing sub-expression, and the part of this expression 112 // which matches the errorMessage is replaced with the LIKE 113 // wildcard operator "%". 114 // 115 // Masking is applied to all non-overlapping matches. 116 func applyMask(re *regexp.Regexp, errorMessage string) string { 117 matches := re.FindAllStringSubmatchIndex(errorMessage, -1) 118 if len(matches) == 0 { 119 return errorMessage 120 } 121 var builder strings.Builder 122 builder.Grow(len(errorMessage)) 123 124 // Replace the text in the first capturing subexpression with "%". 125 var startIndex int 126 for _, match := range matches { 127 matchStart := match[2] 128 matchEnd := match[3] 129 builder.WriteString(errorMessage[startIndex:matchStart]) 130 builder.WriteString("%") 131 startIndex = matchEnd 132 } 133 builder.WriteString(errorMessage[startIndex:]) 134 return builder.String() 135 } 136 137 // clusterKey returns the unhashed key for the cluster. Absent an extremely 138 // unlikely hash collision, this value is the same for all test results 139 // in the cluster. 140 func clusterKey(config *compiledcfg.ProjectConfig, failure *clustering.Failure) string { 141 // Use like expression as the clustering key. 142 return clusterLike(config, failure) 143 } 144 145 // Cluster clusters the given test failure and returns its cluster ID (if it 146 // can be clustered) or nil otherwise. 147 func (a *Algorithm) Cluster(config *compiledcfg.ProjectConfig, failure *clustering.Failure) []byte { 148 if failure.Reason == nil || failure.Reason.PrimaryErrorMessage == "" { 149 return nil 150 } 151 id := clusterKey(config, failure) 152 // sha256 hash the resulting string. 153 h := sha256.Sum256([]byte(id)) 154 // Take first 16 bytes as the ID. (Risk of collision is 155 // so low as to not warrant full 32 bytes.) 156 return h[0:16] 157 } 158 159 // ClusterDescription returns a description of the cluster, for use when 160 // filing bugs, with the help of the given example failure. 161 func (a *Algorithm) ClusterDescription(config *compiledcfg.ProjectConfig, summary *clustering.ClusterSummary) (*clustering.ClusterDescription, error) { 162 if summary.Example.Reason == nil || summary.Example.Reason.PrimaryErrorMessage == "" { 163 return nil, errors.New("cluster summary must contain example with failure reason") 164 } 165 type templateData struct { 166 FailureReason string 167 TestIDs []string 168 } 169 var input templateData 170 171 // Quote and escape. 172 primaryError := strconv.QuoteToGraphic(summary.Example.Reason.PrimaryErrorMessage) 173 // Unquote, so we are left with the escaped error message only. 174 primaryError = primaryError[1 : len(primaryError)-1] 175 176 input.FailureReason = primaryError 177 for _, t := range summary.TopTests { 178 input.TestIDs = append(input.TestIDs, clustering.EscapeToGraphical(t)) 179 } 180 var b bytes.Buffer 181 if err := BugTemplate.Execute(&b, input); err != nil { 182 return nil, err 183 } 184 185 return &clustering.ClusterDescription{ 186 Title: primaryError, 187 Description: b.String(), 188 }, nil 189 } 190 191 // ClusterTitle returns a definition of the cluster, typically in 192 // the form of an unhashed clustering key which is common 193 // across all test results in a cluster. For display on the cluster 194 // page or cluster listing. 195 func (a *Algorithm) ClusterTitle(config *compiledcfg.ProjectConfig, example *clustering.Failure) string { 196 if example.Reason == nil || example.Reason.PrimaryErrorMessage == "" { 197 return "" 198 } 199 // Should match exactly the algorithm in Cluster(...) 200 key := clusterKey(config, example) 201 202 // Remove LIKE escape sequences, as they are confusing in this context. 203 key = likeUnescapeRewriter.Replace(key) 204 205 return clustering.EscapeToGraphical(key) 206 } 207 208 // FailureAssociationRule returns a failure association rule that 209 // captures the definition of cluster containing the given example. 210 func (a *Algorithm) FailureAssociationRule(config *compiledcfg.ProjectConfig, example *clustering.Failure) string { 211 if example.Reason == nil || example.Reason.PrimaryErrorMessage == "" { 212 return "" 213 } 214 likePattern := clusterLike(config, example) 215 216 // Escape the pattern as a string literal. Double-quoted go 217 // string literals are also valid GoogleSQL string literals. 218 stringLiteral := strconv.QuoteToGraphic(likePattern) 219 return fmt.Sprintf("reason LIKE %s", stringLiteral) 220 }