github.com/aclements/go-misc@v0.0.0-20240129233631-2f6ede80790c/internal/loganal/classify.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package loganal 6 7 import ( 8 "regexp" 9 "strings" 10 ) 11 12 var ( 13 canonMsg = regexp.MustCompile(`[0-9]+`) 14 15 // numberWords matches words that consist of both letters and 16 // digits. Since this is meant to canonicalize numeric fields 17 // of error messages, we accept any Unicode letter, but only 18 // digits 0-9. We match the whole word to catch things like 19 // hexadecimal and temporary file names. 20 numberWords = regexp.MustCompile(`\pL*[0-9][\pL0-9]*`) 21 ) 22 23 func (f *Failure) canonicalMessage() string { 24 // Do we need to do anything to the message? 25 for _, c := range f.Message { 26 if '0' <= c && c <= '9' { 27 goto rewrite 28 } 29 } 30 return f.Message 31 32 rewrite: 33 // Canonicalize any "word" of the message containing numbers. 34 // 35 // TODO: "Escape" any existing … to make this safe as a key 36 // for later use with canonicalFields (direct use is 37 // unimportant). 38 return numberWords.ReplaceAllString(f.Message, "…") 39 } 40 41 func (f *Failure) canonicalFields() []string { 42 fields := []string{} 43 msg := f.Message 44 for len(msg) > 0 { 45 next := numberWords.FindStringIndex(msg) 46 if next == nil { 47 fields = append(fields, msg) 48 break 49 } 50 if next[0] > 0 { 51 fields = append(fields, msg[:next[0]]) 52 } 53 fields = append(fields, msg[next[0]:next[1]]) 54 msg = msg[next[1]:] 55 } 56 return fields 57 } 58 59 // Classify groups a set of failures in to canonicalized failure 60 // classes. The returned map maps from each failure class to the 61 // indexes of the input failures in that class. Each input failure 62 // will be in exactly one failure class. 63 func Classify(fs []*Failure) map[Failure][]int { 64 // Map maximally canonicalized failures to input indexes. 65 canon := map[Failure][]int{} 66 for i, f := range fs { 67 // TODO: Match up nearby line numbers? 68 key := Failure{ 69 Package: f.Package, 70 Test: f.Test, 71 Message: f.canonicalMessage(), 72 Function: f.Function, 73 File: f.File, 74 } 75 76 canon[key] = append(canon[key], i) 77 } 78 79 // De-canonicalize fields that all of the failures in a class 80 // have a common. 81 out := make(map[Failure][]int, len(canon)) 82 for key, class := range canon { 83 if len(class) == 1 { 84 out[key] = class 85 continue 86 } 87 88 // Does the message need de-canonicalization? 89 if key.Message != fs[class[0]].Message { 90 fields := fs[class[0]].canonicalFields() 91 for _, fi := range class[1:] { 92 nfields := fs[fi].canonicalFields() 93 for i, field := range fields { 94 if field != nfields[i] { 95 fields[i] = "…" 96 } 97 } 98 } 99 key.Message = strings.Join(fields, "") 100 } 101 102 // De-canonicalize Line, OS, and Arch. 103 line, os, arch := fs[class[0]].Line, fs[class[0]].OS, fs[class[0]].Arch 104 for _, fi := range class[1:] { 105 if fs[fi].Line != line { 106 line = 0 107 } 108 if fs[fi].OS != os { 109 os = "" 110 } 111 if fs[fi].Arch != arch { 112 arch = "" 113 } 114 } 115 key.Line, key.OS, key.Arch = line, os, arch 116 117 out[key] = class 118 } 119 120 return out 121 }