go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/analysis/internal/clustering/algorithms/failurereason/failurereason.go (about)

     1  // Copyright 2022 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package failurereason contains the failure reason clustering algorithm
    16  // for LUCI Analysis.
    17  //
    18  // This algorithm removes ips, temp file names, numbers and other such tokens
    19  // to cluster similar reasons together.
    20  package failurereason
    21  
    22  import (
    23  	"bytes"
    24  	"crypto/sha256"
    25  	"errors"
    26  	"fmt"
    27  	"regexp"
    28  	"strconv"
    29  	"strings"
    30  	"text/template"
    31  
    32  	"go.chromium.org/luci/analysis/internal/clustering"
    33  	"go.chromium.org/luci/analysis/internal/config/compiledcfg"
    34  )
    35  
    36  // AlgorithmVersion is the version of the clustering algorithm. The algorithm
    37  // version should be incremented whenever existing test results may be
    38  // clustered differently (i.e. Cluster(f) returns a different value for some
    39  // f that may have been already ingested).
    40  const AlgorithmVersion = 6
    41  
    42  // AlgorithmName is the identifier for the clustering algorithm.
    43  // LUCI Analysis requires all clustering algorithms to have a unique
    44  // identifier. Must match the pattern ^[a-z0-9-.]{1,32}$.
    45  //
    46  // The AlgorithmName must encode the algorithm version, so that each version
    47  // of an algorithm has a different name.
    48  var AlgorithmName = fmt.Sprintf("%sv%v", clustering.FailureReasonAlgorithmPrefix, AlgorithmVersion)
    49  
    50  // BugTemplate is the template for the content of bugs created for failure
    51  // reason clusters. A list of test IDs is included to improve searchability
    52  // by test name.
    53  var BugTemplate = template.Must(template.New("reasonTemplate").Parse(
    54  	`This bug is for all test failures where the primary error message is similiar to the following (ignoring numbers and hexadecimal values):
    55  {{.FailureReason}}
    56  
    57  The following test(s) were observed to have matching failures at this time (at most five examples listed):
    58  {{range .TestIDs}}- {{.}}
    59  {{end}}`))
    60  
    61  // To match any 1 or more digit numbers, or hex values (often appear in temp
    62  // file names or prints of pointers), which will be replaced.
    63  var clusterExp = regexp.MustCompile(`[/+0-9a-zA-Z]{10,}=+|[\-0-9a-fA-F \t]{16,}|[0-9a-fA-Fx]{8,}|[0-9]+`)
    64  
    65  // likeEscapeRewriter escapes \, % and _ so that they are not interpreted by LIKE
    66  // pattern matching.
    67  var likeEscapeRewriter = strings.NewReplacer(`\`, `\\`, `%`, `\%`, `_`, `\_`)
    68  
    69  // likeUnescapeRewriter unescapes the special sequences \\, \% and \_
    70  // used in LIKE expressions, so that literal text matched appears unescaped.
    71  // This is used to make cluster definitions read more naturally on the UI,
    72  // even if it introduces some ambiguity.
    73  var likeUnescapeRewriter = strings.NewReplacer(`\\`, `\`, `\%`, `%`, `\_`, `_`)
    74  
    75  // Algorithm represents an instance of the reason-based clustering
    76  // algorithm.
    77  type Algorithm struct{}
    78  
    79  // Name returns the identifier of the clustering algorithm.
    80  func (a *Algorithm) Name() string {
    81  	return AlgorithmName
    82  }
    83  
    84  // clusterLike returns the reason LIKE expression that defines
    85  // the cluster the given test result belongs to.
    86  //
    87  // By default only numbers, hexadecimals and base64-encoding-like
    88  // sequences are stripped out when clustering. But using configurable
    89  // masking patterns, it is possible to strip out other parts too.
    90  func clusterLike(config *compiledcfg.ProjectConfig, failure *clustering.Failure) string {
    91  	// Escape \, % and _ so that they are not interpreted by LIKE
    92  	// pattern matching.
    93  	likePattern := likeEscapeRewriter.Replace(failure.Reason.PrimaryErrorMessage)
    94  
    95  	// Replace hexadecimal sequences with wildcard matches. This is technically
    96  	// broader than our original cluster definition, but is more readable, and
    97  	// usually ends up matching the exact same set of failures.
    98  	likePattern = clusterExp.ReplaceAllString(likePattern, "%")
    99  
   100  	// Apply configured masks.
   101  	for _, re := range config.ReasonMaskPatterns {
   102  		likePattern = applyMask(re, likePattern)
   103  	}
   104  
   105  	return likePattern
   106  }
   107  
   108  // applyMask applies the given masking regexp to an error message.
   109  //
   110  // The regular expression re must have exactly one
   111  // capturing sub-expression, and the part of this expression
   112  // which matches the errorMessage is replaced with the LIKE
   113  // wildcard operator "%".
   114  //
   115  // Masking is applied to all non-overlapping matches.
   116  func applyMask(re *regexp.Regexp, errorMessage string) string {
   117  	matches := re.FindAllStringSubmatchIndex(errorMessage, -1)
   118  	if len(matches) == 0 {
   119  		return errorMessage
   120  	}
   121  	var builder strings.Builder
   122  	builder.Grow(len(errorMessage))
   123  
   124  	// Replace the text in the first capturing subexpression with "%".
   125  	var startIndex int
   126  	for _, match := range matches {
   127  		matchStart := match[2]
   128  		matchEnd := match[3]
   129  		builder.WriteString(errorMessage[startIndex:matchStart])
   130  		builder.WriteString("%")
   131  		startIndex = matchEnd
   132  	}
   133  	builder.WriteString(errorMessage[startIndex:])
   134  	return builder.String()
   135  }
   136  
   137  // clusterKey returns the unhashed key for the cluster. Absent an extremely
   138  // unlikely hash collision, this value is the same for all test results
   139  // in the cluster.
   140  func clusterKey(config *compiledcfg.ProjectConfig, failure *clustering.Failure) string {
   141  	// Use like expression as the clustering key.
   142  	return clusterLike(config, failure)
   143  }
   144  
   145  // Cluster clusters the given test failure and returns its cluster ID (if it
   146  // can be clustered) or nil otherwise.
   147  func (a *Algorithm) Cluster(config *compiledcfg.ProjectConfig, failure *clustering.Failure) []byte {
   148  	if failure.Reason == nil || failure.Reason.PrimaryErrorMessage == "" {
   149  		return nil
   150  	}
   151  	id := clusterKey(config, failure)
   152  	// sha256 hash the resulting string.
   153  	h := sha256.Sum256([]byte(id))
   154  	// Take first 16 bytes as the ID. (Risk of collision is
   155  	// so low as to not warrant full 32 bytes.)
   156  	return h[0:16]
   157  }
   158  
   159  // ClusterDescription returns a description of the cluster, for use when
   160  // filing bugs, with the help of the given example failure.
   161  func (a *Algorithm) ClusterDescription(config *compiledcfg.ProjectConfig, summary *clustering.ClusterSummary) (*clustering.ClusterDescription, error) {
   162  	if summary.Example.Reason == nil || summary.Example.Reason.PrimaryErrorMessage == "" {
   163  		return nil, errors.New("cluster summary must contain example with failure reason")
   164  	}
   165  	type templateData struct {
   166  		FailureReason string
   167  		TestIDs       []string
   168  	}
   169  	var input templateData
   170  
   171  	// Quote and escape.
   172  	primaryError := strconv.QuoteToGraphic(summary.Example.Reason.PrimaryErrorMessage)
   173  	// Unquote, so we are left with the escaped error message only.
   174  	primaryError = primaryError[1 : len(primaryError)-1]
   175  
   176  	input.FailureReason = primaryError
   177  	for _, t := range summary.TopTests {
   178  		input.TestIDs = append(input.TestIDs, clustering.EscapeToGraphical(t))
   179  	}
   180  	var b bytes.Buffer
   181  	if err := BugTemplate.Execute(&b, input); err != nil {
   182  		return nil, err
   183  	}
   184  
   185  	return &clustering.ClusterDescription{
   186  		Title:       primaryError,
   187  		Description: b.String(),
   188  	}, nil
   189  }
   190  
   191  // ClusterTitle returns a definition of the cluster, typically in
   192  // the form of an unhashed clustering key which is common
   193  // across all test results in a cluster. For display on the cluster
   194  // page or cluster listing.
   195  func (a *Algorithm) ClusterTitle(config *compiledcfg.ProjectConfig, example *clustering.Failure) string {
   196  	if example.Reason == nil || example.Reason.PrimaryErrorMessage == "" {
   197  		return ""
   198  	}
   199  	// Should match exactly the algorithm in Cluster(...)
   200  	key := clusterKey(config, example)
   201  
   202  	// Remove LIKE escape sequences, as they are confusing in this context.
   203  	key = likeUnescapeRewriter.Replace(key)
   204  
   205  	return clustering.EscapeToGraphical(key)
   206  }
   207  
   208  // FailureAssociationRule returns a failure association rule that
   209  // captures the definition of cluster containing the given example.
   210  func (a *Algorithm) FailureAssociationRule(config *compiledcfg.ProjectConfig, example *clustering.Failure) string {
   211  	if example.Reason == nil || example.Reason.PrimaryErrorMessage == "" {
   212  		return ""
   213  	}
   214  	likePattern := clusterLike(config, example)
   215  
   216  	// Escape the pattern as a string literal. Double-quoted go
   217  	// string literals are also valid GoogleSQL string literals.
   218  	stringLiteral := strconv.QuoteToGraphic(likePattern)
   219  	return fmt.Sprintf("reason LIKE %s", stringLiteral)
   220  }