go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/bisection/compilefailureanalysis/heuristic/signal_extractor.go (about) 1 // Copyright 2022 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package heuristic 16 17 import ( 18 "context" 19 "fmt" 20 "regexp" 21 "sort" 22 "strconv" 23 "strings" 24 25 "go.chromium.org/luci/bisection/model" 26 "go.chromium.org/luci/bisection/util" 27 ) 28 29 const ( 30 // Patterns for Python stack trace frames. 31 PYTHON_STACK_TRACE_FRAME_PATTERN_1 = `File "(?P<file>.+\.py)", line (?P<line>[0-9]+), in (?P<function>.+)` 32 PYTHON_STACK_TRACE_FRAME_PATTERN_2 = `(?P<function>[^\s]+) at (?P<file>.+\.py):(?P<line>[0-9]+)` 33 // Match file path separator: "/", "//", "\", "\\". 34 PATH_SEPARATOR_PATTERN = `(?:/{1,2}|\\{1,2})` 35 36 // Match drive root directory on Windows, like "C:/" or "C:\\". 37 WINDOWS_ROOT_PATTERN = `[a-zA-Z]:` + PATH_SEPARATOR_PATTERN 38 39 // Match system root directory on Linux/Mac. 40 UNIX_ROOT_PATTERN = `/+` 41 42 // Match system/drive root on Linux/Mac/Windows. 43 ROOT_DIR_PATTERN = "(?:" + WINDOWS_ROOT_PATTERN + "|" + UNIX_ROOT_PATTERN + ")" 44 45 // Match file/directory names and also match ., .. 46 FILE_NAME_PATTERN = `[\w\.-]+` 47 48 // Mark the beginning of the failure section in stdout log 49 FAILURE_SECTION_START_PREFIX = "FAILED: " 50 51 // Mark the end of the failure section in stdout log 52 FAILURE_SECTION_END_PATTERN_1 = `^\d+ errors? generated.` 53 FAILURE_SECTION_END_PATTERN_2 = `failed with exit code \d+` 54 // If it reads this line, it is also ends of failure section 55 OUTSIDE_FAILURE_SECTION_PATTERN = `\[\d+/\d+\]` 56 57 NINJA_FAILURE_LINE_END_PREFIX = `ninja: build stopped` 58 NINJA_ERROR_LINE_PREFIX = `ninja: error` 59 60 STDLOG_NODE_PATTERN = `(?:"([^"]+)")|(\S+)` 61 ) 62 63 // ExtractSignals extracts necessary signals for heuristic analysis from logs 64 func ExtractSignals(c context.Context, compileLogs *model.CompileLogs) (*model.CompileFailureSignal, error) { 65 if compileLogs.NinjaLog == nil && compileLogs.StdOutLog == "" { 66 return nil, fmt.Errorf("Unable to extract signals from empty logs.") 67 } 68 // Prioritise extracting signals from ninja logs instead of stdout logs 69 if compileLogs.NinjaLog != nil { 70 return ExtractSignalsFromNinjaLog(c, compileLogs.NinjaLog) 71 } 72 return ExtractSignalsFromStdoutLog(c, compileLogs.StdOutLog) 73 } 74 75 // ExtractSignalsFromNinjaLog extracts necessary signals for heuristic analysis from ninja log 76 func ExtractSignalsFromNinjaLog(c context.Context, ninjaLog *model.NinjaLog) (*model.CompileFailureSignal, error) { 77 signal := &model.CompileFailureSignal{} 78 for _, failure := range ninjaLog.Failures { 79 edge := &model.CompileFailureEdge{ 80 Rule: failure.Rule, 81 OutputNodes: failure.OutputNodes, 82 Dependencies: normalizeDependencies(failure.Dependencies), 83 } 84 signal.Edges = append(signal.Edges, edge) 85 signal.Nodes = append(signal.Nodes, failure.OutputNodes...) 86 e := extractFiles(signal, failure.Output) 87 if e != nil { 88 return nil, e 89 } 90 } 91 return signal, nil 92 } 93 94 func extractFiles(signal *model.CompileFailureSignal, output string) error { 95 pythonPatterns := []*regexp.Regexp{ 96 regexp.MustCompile(PYTHON_STACK_TRACE_FRAME_PATTERN_1), 97 regexp.MustCompile(PYTHON_STACK_TRACE_FRAME_PATTERN_2), 98 } 99 filePathLinePattern := regexp.MustCompile(getFileLinePathPatternStr()) 100 101 lines := strings.Split(output, "\n") 102 for i, line := range lines { 103 // Do not extract the first line 104 if i == 0 { 105 continue 106 } 107 // Check if the line matches python pattern 108 matchedPython := false 109 for _, pythonPattern := range pythonPatterns { 110 matches, err := util.MatchedNamedGroup(pythonPattern, line) 111 if err == nil { 112 pyLine, e := strconv.Atoi(matches["line"]) 113 if e != nil { 114 return e 115 } 116 signal.AddLine(util.NormalizeFilePath(matches["file"]), pyLine) 117 matchedPython = true 118 continue 119 } 120 } 121 if matchedPython { 122 continue 123 } 124 // Non-python cases 125 matches := filePathLinePattern.FindAllStringSubmatch(line, -1) 126 if matches != nil { 127 for _, match := range matches { 128 if len(match) != 3 { 129 return fmt.Errorf("Invalid line: %s", line) 130 } 131 // match[1] is file, match[2] is line number 132 if match[2] == "" { 133 signal.AddFilePath(util.NormalizeFilePath(match[1])) 134 } else { 135 lineInt, e := strconv.Atoi(match[2]) 136 if e != nil { 137 return e 138 } 139 signal.AddLine(util.NormalizeFilePath(match[1]), lineInt) 140 } 141 } 142 } 143 } 144 return nil 145 } 146 147 func extractFilesFromLine(signal *model.CompileFailureSignal, line string) error { 148 pythonPatterns := []*regexp.Regexp{ 149 regexp.MustCompile(PYTHON_STACK_TRACE_FRAME_PATTERN_1), 150 regexp.MustCompile(PYTHON_STACK_TRACE_FRAME_PATTERN_2), 151 } 152 filePathLinePattern := regexp.MustCompile(getFileLinePathPatternStr()) 153 154 // Check if the line matches python pattern 155 matchedPython := false 156 for _, pythonPattern := range pythonPatterns { 157 matches, err := util.MatchedNamedGroup(pythonPattern, line) 158 if err == nil { 159 pyLine, e := strconv.Atoi(matches["line"]) 160 if e != nil { 161 return e 162 } 163 signal.AddLine(util.NormalizeFilePath(matches["file"]), pyLine) 164 matchedPython = true 165 continue 166 } 167 } 168 if matchedPython { 169 return nil 170 } 171 // Non-python cases 172 matches := filePathLinePattern.FindAllStringSubmatch(line, -1) 173 if matches != nil { 174 for _, match := range matches { 175 if len(match) != 3 { 176 return fmt.Errorf("Invalid line: %s", line) 177 } 178 // match[1] is file, match[2] is line number 179 if match[2] == "" { 180 signal.AddFilePath(util.NormalizeFilePath(match[1])) 181 } else { 182 lineInt, e := strconv.Atoi(match[2]) 183 if e != nil { 184 return e 185 } 186 signal.AddLine(util.NormalizeFilePath(match[1]), lineInt) 187 } 188 } 189 } 190 return nil 191 } 192 193 func normalizeDependencies(dependencies []string) []string { 194 result := []string{} 195 for _, dependency := range dependencies { 196 result = append(result, util.NormalizeFilePath(dependency)) 197 } 198 return result 199 } 200 201 // ExtractSignalsFromStdoutLog extracts necessary signals for heuristic analysis from stdout log 202 func ExtractSignalsFromStdoutLog(c context.Context, stdoutLog string) (*model.CompileFailureSignal, error) { 203 signal := &model.CompileFailureSignal{} 204 lines := strings.Split(stdoutLog, "\n") 205 failureSectionEndPattern1 := regexp.MustCompile(FAILURE_SECTION_END_PATTERN_1) 206 failureSectionEndPattern2 := regexp.MustCompile(FAILURE_SECTION_END_PATTERN_2) 207 outsideFailureSectionPattern := regexp.MustCompile(OUTSIDE_FAILURE_SECTION_PATTERN) 208 failureStarted := false 209 for _, line := range lines { 210 line = strings.Trim(line, " \t") 211 if strings.HasPrefix(line, FAILURE_SECTION_START_PREFIX) { 212 failureStarted = true 213 line = line[len(FAILURE_SECTION_START_PREFIX):] 214 signal.Nodes = append(signal.Nodes, extractNodes(line)...) 215 continue 216 } else if failureStarted && strings.HasPrefix(line, NINJA_FAILURE_LINE_END_PREFIX) { 217 // End parsing 218 break 219 } else if failureStarted && (failureSectionEndPattern1.MatchString(line) || failureSectionEndPattern2.MatchString(line) || outsideFailureSectionPattern.MatchString(line)) { 220 failureStarted = false 221 } 222 223 if failureStarted || strings.HasPrefix(line, NINJA_ERROR_LINE_PREFIX) { 224 extractFilesFromLine(signal, line) 225 } 226 } 227 return signal, nil 228 } 229 230 // extractNode returns the list of failed output nodes. 231 // Possible format: 232 // FAILED: obj/path/to/file.o 233 // FAILED: target.exe 234 // FAILED: "target with space in name" 235 func extractNodes(line string) []string { 236 pattern := regexp.MustCompile(STDLOG_NODE_PATTERN) 237 matches := pattern.FindAllStringSubmatch(line, -1) 238 result := []string{} 239 for _, match := range matches { 240 for i := 1; i <= 2; i++ { 241 if match[i] != "" { 242 result = append(result, match[i]) 243 } 244 } 245 } 246 return result 247 } 248 249 // getFileLinePathPatternStr matches a full file path and line number. 250 // It could match files with or without line numbers like below: 251 // 252 // c:\\a\\b.txt:12 253 // c:\a\b.txt(123) 254 // c:\a\b.txt:[line 123] 255 // D:/a/b.txt 256 // /a/../b/./c.txt 257 // a/b/c.txt 258 // //BUILD.gn:246 259 func getFileLinePathPatternStr() string { 260 pattern := `(` 261 pattern += ROOT_DIR_PATTERN + "?" // System/Drive root directory. 262 pattern += `(?:` + FILE_NAME_PATTERN + PATH_SEPARATOR_PATTERN + `)*` // Directories. 263 pattern += FILE_NAME_PATTERN + `\.` + getFileExtensionPatternStr() 264 pattern += `)` // File name and extension. 265 pattern += `(?:(?:[\(:]|\[line )(\d+))?` // Line number might not be available. 266 return pattern 267 } 268 269 // getFileExtensionPattern matches supported file extensions. 270 // Sort extension list to avoid non-full match like 'c' matching 'c' in 'cpp'. 271 func getFileExtensionPatternStr() string { 272 extensions := getSupportedFileExtension() 273 sort.Sort(sort.Reverse(sort.StringSlice(extensions))) 274 return fmt.Sprintf("(?:%s)", strings.Join(extensions, "|")) 275 } 276 277 // getSupportedFileExtension get gile extensions to filter out files from log. 278 func getSupportedFileExtension() []string { 279 return []string{ 280 "c", 281 "cc", 282 "cpp", 283 "css", 284 "exe", 285 "gn", 286 "gni", 287 "gyp", 288 "gypi", 289 "h", 290 "hh", 291 "html", 292 "idl", 293 "isolate", 294 "java", 295 "js", 296 "json", 297 "m", 298 "mm", 299 "mojom", 300 "nexe", 301 "o", 302 "obj", 303 "py", 304 "pyc", 305 "rc", 306 "sh", 307 "sha1", 308 "ts", 309 "txt", 310 } 311 }