github.com/aclements/go-misc@v0.0.0-20240129233631-2f6ede80790c/internal/loganal/failure.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package loganal 6 7 import ( 8 "fmt" 9 "regexp" 10 "strconv" 11 "strings" 12 "sync" 13 ) 14 15 // Failure records a failure extracted from an all.bash log. 16 type Failure struct { 17 // Package is the Go package of this failure. In the case of a 18 // testing.T failure, this will be the package of the test. 19 Package string 20 21 // Test identifies the failed test function. If this is not a 22 // testing.T failure, this will be "". 23 Test string 24 25 // Message is the summarized failure message. This will be one 26 // line of text. 27 Message string 28 29 // FullMessage is a substring of the log that captures the 30 // entire failure message. It may be many lines long. 31 FullMessage string 32 33 // Function is the fully qualified name of the function where 34 // this failure happened, if known. This helps distinguish 35 // between generic errors like "out of bounds" and is more 36 // stable for matching errors than file/line. 37 Function string 38 39 // File is the source file where this failure happened, if 40 // known. 41 File string 42 43 // Line is the source line where this failure happened, if 44 // known. 45 Line int 46 47 // OS and Arch are the GOOS and GOARCH of this failure. 48 OS, Arch string 49 } 50 51 func (f Failure) String() string { 52 s := f.Package 53 if f.Test != "" { 54 s += "." + f.Test 55 } 56 if f.Function != "" || f.File != "" { 57 if s != "" { 58 s += " " 59 } 60 if f.Function != "" { 61 s += "at " + f.Function 62 } else { 63 s += "at " + f.File 64 if f.Line != 0 { 65 s += fmt.Sprintf(":%d", f.Line) 66 } 67 } 68 } 69 if s != "" { 70 s += ": " 71 } 72 s += f.Message 73 return s 74 } 75 76 var ( 77 linesStar = `(?:.*\n)*?` 78 linesPlus = `(?:.*\n)+?` 79 80 // failPkg matches the FAIL line for a package. 81 // 82 // In case of failure the Android wrapper prints "exitcode=1" without a newline, 83 // so for logs prior to the fix for https://golang.org/issue/49317 we need to 84 // strip that from the beginning of the line. 85 failPkg = `(?m:^(?:exitcode=1)?FAIL[ \t]+(\S+))` 86 87 // logTruncated matches the "log truncated" line injected by the coordinator. 88 logTruncated = `(?:\n\.\.\. log truncated \.\.\.)` 89 90 endOfTest = `(?:` + failPkg + `|` + logTruncated + `)` 91 92 canonLine = regexp.MustCompile(`\r+\n`) 93 94 // testingHeader matches the beginning of the go test std 95 // section. On Plan 9 there used to be just one #. 96 testingHeader = regexp.MustCompile(`^#+ Testing packages`) 97 98 // sectionHeader matches the header of each testing section 99 // printed by go tool dist test. 100 sectionHeader = regexp.MustCompile(`^##### (.*)`) 101 102 // testingFailed matches a testing.T failure. This may be a 103 // T.Error or a recovered panic. There was a time when the 104 // test name included GOMAXPROCS (like how benchmark names 105 // do), so we strip that out. 106 testingFailed = regexp.MustCompile(`^--- FAIL: ([^-\s]+).*\n(` + linesStar + `)` + endOfTest) 107 108 // testingError matches the file name and message of the last 109 // T.Error in a testingFailed log. 110 testingError = regexp.MustCompile(`(?:.*\n)*\t([^:]+):([0-9]+): (.*)\n`) 111 112 // testingPanic matches a recovered panic in a testingFailed 113 // log. 114 testingPanic = regexp.MustCompile(`panic: (.*?)(?: \[recovered\])`) 115 116 // gotestFailed matches a $GOROOT/test failure. 117 gotestFailed = regexp.MustCompile(`^# go run run\.go.*\n(` + linesPlus + `)` + endOfTest) 118 119 // buildFailed matches build failures from the testing package. 120 buildFailed = regexp.MustCompile(`^` + failPkg + `\s+\[build failed\]`) 121 122 // timeoutPanic1 matches a test timeout detected by the testing package. 123 timeoutPanic1 = regexp.MustCompile(`^panic: test timed out after .*\n(` + linesStar + `)` + endOfTest) 124 125 // timeoutPanic2 matches a test timeout detected by go test. 126 timeoutPanic2 = regexp.MustCompile(`^\*\*\* Test killed.*ran too long\n` + endOfTest) 127 128 // coordinatorTimeout matches a test timeout detected by the 129 // coordinator, for both non-sharded and sharded tests. 130 coordinatorTimeout = regexp.MustCompile(`(?m)^Build complete.*Result: error: timed out|^Test "[^"]+" ran over [0-9a-z]+ limit`) 131 132 // tbEntry is a regexp string that matches a single 133 // function/line number entry in a traceback. Group 1 matches 134 // the fully qualified function name. Groups 2 and 3 match the 135 // file name and line number. 136 // Most entries have trailing stack metadata for each frame, 137 // but inlined calls, lacking a frame, may omit that metadata. 138 tbEntry = `(\S+)\(.*\)\n\t(.*):([0-9]+)(?: .*)?\n` 139 140 // runtimeFailed matches a runtime throw or testing package 141 // panic. Matching the panic is fairly loose because in some 142 // cases a "fatal error:" can be preceded by a "panic:" if 143 // we've started the panic and then realize we can't (e.g., 144 // sigpanic). Also gather up the "runtime:" prints preceding a 145 // throw. 146 runtimeFailed = regexp.MustCompile(`^(?:runtime:.*\n)*.*(?:panic: |fatal error: )(.*)`) 147 runtimeLiterals = []string{"runtime:", "panic:", "fatal error:"} 148 runtimeFailedTrailer = regexp.MustCompile(`^(?:exit status.*\n)?(?:\*\*\* Test killed.*\n)?` + endOfTest + `?`) 149 150 // apiCheckerFailed matches an API checker failure. 151 apiCheckerFailed = regexp.MustCompile(`^Error running API checker: (.*)`) 152 153 // goodLine matches known-good lines so we can ignore them 154 // before doing more aggressive/fuzzy failure extraction. 155 goodLine = regexp.MustCompile(`^#|^ok\s|^\?\s|^Benchmark|^PASS|^=== |^--- `) 156 157 // testingUnknownFailed matches the last line of some unknown 158 // failure detected by the testing package. 159 testingUnknownFailed = regexp.MustCompile(`^` + endOfTest) 160 161 // miscFailed matches the log.Fatalf in go tool dist test when 162 // a test fails. We use this as a last resort, mostly to pick 163 // up failures in sections that don't use the testing package. 164 miscFailed = regexp.MustCompile(`^.*Failed: (?:exit status|test failed)`) 165 ) 166 167 // An extractCache speeds up failure extraction from multiple logs by 168 // caching known lines. It is *not* thread-safe, so we track it in a 169 // sync.Pool. 170 type extractCache struct { 171 boringLines map[string]bool 172 } 173 174 var extractCachePool sync.Pool 175 176 func init() { 177 extractCachePool.New = func() interface{} { 178 return &extractCache{make(map[string]bool)} 179 } 180 } 181 182 // Extract parses the failures from all.bash log m. 183 func Extract(m string, os, arch string) ([]*Failure, error) { 184 fs := []*Failure{} 185 testingStarted := false 186 section := "" 187 sectionHeaderFailures := 0 // # failures at section start 188 unknown := []string{} 189 cache := extractCachePool.Get().(*extractCache) 190 defer extractCachePool.Put(cache) 191 192 // Canonicalize line endings. Note that some logs have a mix 193 // of line endings and some somehow have multiple \r's. 194 m = canonLine.ReplaceAllString(m, "\n") 195 196 var s []string 197 matcher := newMatcher(m) 198 consume := func(r *regexp.Regexp) bool { 199 matched := matcher.consume(r) 200 s = matcher.groups 201 if matched && !strings.HasSuffix(s[0], "\n") { 202 // Consume the rest of the line. 203 matcher.line() 204 } 205 return matched 206 } 207 firstBadLine := func() string { 208 for _, u := range unknown { 209 if len(u) > 0 { 210 return u 211 } 212 } 213 return "" 214 } 215 216 for !matcher.done() { 217 // Check for a cached result. 218 line, nextLinePos := matcher.peekLine() 219 isGoodLine, cached := cache.boringLines[line] 220 221 // Process the line. 222 isKnown := true 223 switch { 224 case cached: 225 matcher.pos = nextLinePos 226 if !isGoodLine { 227 // This line is known to not match any 228 // regexps. Follow the default case. 229 isKnown = false 230 unknown = append(unknown, line) 231 } 232 233 case consume(testingHeader): 234 testingStarted = true 235 236 case consume(sectionHeader): 237 section = s[1] 238 sectionHeaderFailures = len(fs) 239 240 case consume(testingFailed): 241 f := &Failure{ 242 Test: s[1], 243 Package: s[3], 244 FullMessage: s[0], 245 Message: "unknown testing.T failure", 246 } 247 248 // TODO: Can have multiple errors per FAIL: 249 // ../fetchlogs/rev/2015-03-24T19:51:21-41f9c43/linux-arm64-canonical 250 251 sError := testingError.FindStringSubmatch(s[2]) 252 sPanic := testingPanic.FindStringSubmatch(s[2]) 253 if sError != nil { 254 f.File, f.Line, f.Message = sError[1], atoi(sError[2]), sError[3] 255 } else if sPanic != nil { 256 f.Function, f.File, f.Line = panicWhere(s[2]) 257 f.Message = sPanic[1] 258 } 259 260 fs = append(fs, f) 261 262 case consume(gotestFailed): 263 fs = append(fs, &Failure{ 264 Package: "test/" + s[2], 265 FullMessage: s[0], 266 Message: firstLine(s[1]), 267 }) 268 269 case consume(buildFailed): 270 // This may have an accompanying compiler 271 // crash, but it's interleaved with other "ok" 272 // lines, so it's hard to find. 273 fs = append(fs, &Failure{ 274 FullMessage: s[0], 275 Message: "build failed", 276 Package: s[1], 277 }) 278 279 case consume(timeoutPanic1): 280 fs = append(fs, &Failure{ 281 Test: testFromTraceback(s[1]), 282 FullMessage: s[0], 283 Message: "test timed out", 284 Package: s[2], 285 }) 286 287 case consume(timeoutPanic2): 288 tb := strings.Join(unknown, "\n") 289 fs = append(fs, &Failure{ 290 Test: testFromTraceback(tb), 291 FullMessage: tb + "\n" + s[0], 292 Message: "test timed out", 293 Package: s[1], 294 }) 295 296 case matcher.lineHasLiteral(runtimeLiterals...) && consume(runtimeFailed): 297 start := matcher.matchPos 298 msg := s[1] 299 pkg := "testing" 300 if strings.Contains(s[0], "fatal error:") { 301 pkg = "runtime" 302 } 303 traceback := consumeTraceback(matcher) 304 matcher.consume(runtimeFailedTrailer) 305 fn, file, line := panicWhere(traceback) 306 fs = append(fs, &Failure{ 307 Package: pkg, 308 FullMessage: matcher.str[start:matcher.pos], 309 Message: msg, 310 Function: fn, 311 File: file, 312 Line: line, 313 }) 314 315 case consume(apiCheckerFailed): 316 fs = append(fs, &Failure{ 317 Package: "API checker", 318 FullMessage: s[0], 319 Message: s[1], 320 }) 321 322 case consume(goodLine): 323 // Ignore. Just cache and clear unknown. 324 cache.boringLines[line] = true 325 326 case consume(testingUnknownFailed): 327 fs = append(fs, &Failure{ 328 Package: s[1], 329 FullMessage: s[0], 330 Message: "unknown failure: " + firstBadLine(), 331 }) 332 333 case len(fs) == sectionHeaderFailures && consume(miscFailed): 334 fs = append(fs, &Failure{ 335 Package: section, 336 FullMessage: s[0], 337 Message: "unknown failure: " + firstBadLine(), 338 }) 339 340 default: 341 isKnown = false 342 unknown = append(unknown, line) 343 cache.boringLines[line] = false 344 matcher.pos = nextLinePos 345 } 346 347 // Clear unknown lines on any known line. 348 if isKnown { 349 unknown = unknown[:0] 350 } 351 } 352 353 // TODO: FullMessages for these. 354 if len(fs) == 0 && strings.Contains(m, "no space left on device") { 355 fs = append(fs, &Failure{ 356 Message: "build failed (no space left on device)", 357 }) 358 } 359 if len(fs) == 0 && coordinatorTimeout.MatchString(m) { 360 // all.bash was killed by coordinator. 361 fs = append(fs, &Failure{ 362 Message: "build failed (timed out)", 363 }) 364 } 365 if len(fs) == 0 && strings.Contains(m, "Failed to schedule") { 366 // Test sharding failed. 367 fs = append(fs, &Failure{ 368 Message: "build failed (failed to schedule)", 369 }) 370 } 371 if len(fs) == 0 && strings.Contains(m, "nosplit stack overflow") { 372 fs = append(fs, &Failure{ 373 Message: "build failed (nosplit stack overflow)", 374 }) 375 } 376 377 // If the same (message, where) shows up in more than five 378 // packages, it's probably a systemic issue, so collapse it 379 // down to one failure with no package. 380 type dedup struct { 381 packages map[string]bool 382 kept bool 383 } 384 msgDedup := map[Failure]*dedup{} 385 failureMap := map[*Failure]*dedup{} 386 maxCount := 0 387 for _, f := range fs { 388 key := Failure{ 389 Message: f.canonicalMessage(), 390 Function: f.Function, 391 File: f.File, 392 Line: f.Line, 393 } 394 395 d := msgDedup[key] 396 if d == nil { 397 d = &dedup{packages: map[string]bool{}} 398 msgDedup[key] = d 399 } 400 d.packages[f.Package] = true 401 if len(d.packages) > maxCount { 402 maxCount = len(d.packages) 403 } 404 failureMap[f] = d 405 } 406 if maxCount >= 5 { 407 fsn := []*Failure{} 408 for _, f := range fs { 409 d := failureMap[f] 410 if len(d.packages) < 5 { 411 fsn = append(fsn, f) 412 } else if !d.kept { 413 d.kept = true 414 f.Test, f.Package = "", "" 415 fsn = append(fsn, f) 416 } 417 } 418 fs = fsn 419 } 420 421 // Check if we even got as far as testing. Note that there was 422 // a period when we didn't print the "testing" header, so as 423 // long as we found failures, we don't care if we found the 424 // header. 425 if !testingStarted && len(fs) == 0 { 426 fs = append(fs, &Failure{ 427 Message: "toolchain build failed", 428 }) 429 } 430 431 for _, f := range fs { 432 f.OS, f.Arch = os, arch 433 434 // Clean up package. For misc/cgo tests, this will be 435 // something like 436 // _/tmp/buildlet-scatch825855615/go/misc/cgo/test. 437 if strings.HasPrefix(f.Package, "_/tmp/") { 438 f.Package = strings.SplitN(f.Package, "/", 4)[3] 439 } 440 441 // Trim trailing newlines from FullMessage. 442 f.FullMessage = strings.TrimRight(f.FullMessage, "\n") 443 } 444 return fs, nil 445 } 446 447 func atoi(s string) int { 448 v, err := strconv.Atoi(s) 449 if err != nil { 450 panic("expected number, got " + s) 451 } 452 return v 453 } 454 455 // firstLine returns the first line from s, not including the line 456 // terminator. 457 func firstLine(s string) string { 458 if i := strings.Index(s, "\n"); i >= 0 { 459 return s[:i] 460 } 461 return s 462 } 463 464 var ( 465 tracebackStart = regexp.MustCompile(`^(goroutine [0-9]+.*:|runtime stack:)\n`) 466 tracebackEntry = regexp.MustCompile(`^` + tbEntry) 467 ) 468 469 // consumeTraceback consumes a traceback from m. 470 func consumeTraceback(m *matcher) string { 471 // Find the beginning of the traceback. 472 for !m.done() && !m.peek(tracebackStart) { 473 m.line() 474 } 475 476 start := m.pos 477 loop: 478 for !m.done() { 479 switch { 480 case m.hasPrefix("\n") || m.hasPrefix("\t") || 481 m.hasPrefix("goroutine ") || m.hasPrefix("runtime stack:") || 482 m.hasPrefix("created by "): 483 m.line() 484 485 case m.consume(tracebackEntry): 486 // Do nothing. 487 488 default: 489 break loop 490 } 491 } 492 return m.str[start:m.pos] 493 } 494 495 var ( 496 // testFromTracebackRe matches a traceback entry from a 497 // function named Test* in a file named *_test.go. It ignores 498 // "created by" lines. 499 testFromTracebackRe = regexp.MustCompile(`\.(Test[^(\n]+)\(.*\n.*_test\.go`) 500 501 panicWhereRe = regexp.MustCompile(`(?m:^)` + tbEntry) 502 ) 503 504 // testFromTraceback attempts to return the test name from a 505 // traceback. 506 func testFromTraceback(tb string) string { 507 s := testFromTracebackRe.FindStringSubmatch(tb) 508 if s == nil { 509 return "" 510 } 511 return s[1] 512 } 513 514 // panicWhere attempts to return the fully qualified name, source 515 // file, and line number of the panicking function in traceback tb. 516 func panicWhere(tb string) (fn string, file string, line int) { 517 m := matcher{str: tb} 518 for m.consume(panicWhereRe) { 519 fn := m.groups[1] 520 521 // Ignore functions involved in panic handling. 522 if strings.HasPrefix(fn, "runtime.panic") || fn == "runtime.throw" || fn == "runtime.sigpanic" { 523 continue 524 } 525 return fn, m.groups[2], atoi(m.groups[3]) 526 } 527 return "", "", 0 528 }