github.com/aclements/go-misc@v0.0.0-20240129233631-2f6ede80790c/internal/loganal/failure.go

github.com/aclements/go-misc@v0.0.0-20240129233631-2f6ede80790c/internal/loganal/failure.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package loganal
     6  
     7  import (
     8  	"fmt"
     9  	"regexp"
    10  	"strconv"
    11  	"strings"
    12  	"sync"
    13  )
    14  
    15  // Failure records a failure extracted from an all.bash log.
    16  type Failure struct {
    17  	// Package is the Go package of this failure. In the case of a
    18  	// testing.T failure, this will be the package of the test.
    19  	Package string
    20  
    21  	// Test identifies the failed test function. If this is not a
    22  	// testing.T failure, this will be "".
    23  	Test string
    24  
    25  	// Message is the summarized failure message. This will be one
    26  	// line of text.
    27  	Message string
    28  
    29  	// FullMessage is a substring of the log that captures the
    30  	// entire failure message. It may be many lines long.
    31  	FullMessage string
    32  
    33  	// Function is the fully qualified name of the function where
    34  	// this failure happened, if known. This helps distinguish
    35  	// between generic errors like "out of bounds" and is more
    36  	// stable for matching errors than file/line.
    37  	Function string
    38  
    39  	// File is the source file where this failure happened, if
    40  	// known.
    41  	File string
    42  
    43  	// Line is the source line where this failure happened, if
    44  	// known.
    45  	Line int
    46  
    47  	// OS and Arch are the GOOS and GOARCH of this failure.
    48  	OS, Arch string
    49  }
    50  
    51  func (f Failure) String() string {
    52  	s := f.Package
    53  	if f.Test != "" {
    54  		s += "." + f.Test
    55  	}
    56  	if f.Function != "" || f.File != "" {
    57  		if s != "" {
    58  			s += " "
    59  		}
    60  		if f.Function != "" {
    61  			s += "at " + f.Function
    62  		} else {
    63  			s += "at " + f.File
    64  			if f.Line != 0 {
    65  				s += fmt.Sprintf(":%d", f.Line)
    66  			}
    67  		}
    68  	}
    69  	if s != "" {
    70  		s += ": "
    71  	}
    72  	s += f.Message
    73  	return s
    74  }
    75  
    76  var (
    77  	linesStar = `(?:.*\n)*?`
    78  	linesPlus = `(?:.*\n)+?`
    79  
    80  	// failPkg matches the FAIL line for a package.
    81  	//
    82  	// In case of failure the Android wrapper prints "exitcode=1" without a newline,
    83  	// so for logs prior to the fix for https://golang.org/issue/49317 we need to
    84  	// strip that from the beginning of the line.
    85  	failPkg = `(?m:^(?:exitcode=1)?FAIL[ \t]+(\S+))`
    86  
    87  	// logTruncated matches the "log truncated" line injected by the coordinator.
    88  	logTruncated = `(?:\n\.\.\. log truncated \.\.\.)`
    89  
    90  	endOfTest = `(?:` + failPkg + `|` + logTruncated + `)`
    91  
    92  	canonLine = regexp.MustCompile(`\r+\n`)
    93  
    94  	// testingHeader matches the beginning of the go test std
    95  	// section. On Plan 9 there used to be just one #.
    96  	testingHeader = regexp.MustCompile(`^#+ Testing packages`)
    97  
    98  	// sectionHeader matches the header of each testing section
    99  	// printed by go tool dist test.
   100  	sectionHeader = regexp.MustCompile(`^##### (.*)`)
   101  
   102  	// testingFailed matches a testing.T failure. This may be a
   103  	// T.Error or a recovered panic. There was a time when the
   104  	// test name included GOMAXPROCS (like how benchmark names
   105  	// do), so we strip that out.
   106  	testingFailed = regexp.MustCompile(`^--- FAIL: ([^-\s]+).*\n(` + linesStar + `)` + endOfTest)
   107  
   108  	// testingError matches the file name and message of the last
   109  	// T.Error in a testingFailed log.
   110  	testingError = regexp.MustCompile(`(?:.*\n)*\t([^:]+):([0-9]+): (.*)\n`)
   111  
   112  	// testingPanic matches a recovered panic in a testingFailed
   113  	// log.
   114  	testingPanic = regexp.MustCompile(`panic: (.*?)(?: \[recovered\])`)
   115  
   116  	// gotestFailed matches a $GOROOT/test failure.
   117  	gotestFailed = regexp.MustCompile(`^# go run run\.go.*\n(` + linesPlus + `)` + endOfTest)
   118  
   119  	// buildFailed matches build failures from the testing package.
   120  	buildFailed = regexp.MustCompile(`^` + failPkg + `\s+\[build failed\]`)
   121  
   122  	// timeoutPanic1 matches a test timeout detected by the testing package.
   123  	timeoutPanic1 = regexp.MustCompile(`^panic: test timed out after .*\n(` + linesStar + `)` + endOfTest)
   124  
   125  	// timeoutPanic2 matches a test timeout detected by go test.
   126  	timeoutPanic2 = regexp.MustCompile(`^\*\*\* Test killed.*ran too long\n` + endOfTest)
   127  
   128  	// coordinatorTimeout matches a test timeout detected by the
   129  	// coordinator, for both non-sharded and sharded tests.
   130  	coordinatorTimeout = regexp.MustCompile(`(?m)^Build complete.*Result: error: timed out|^Test "[^"]+" ran over [0-9a-z]+ limit`)
   131  
   132  	// tbEntry is a regexp string that matches a single
   133  	// function/line number entry in a traceback. Group 1 matches
   134  	// the fully qualified function name. Groups 2 and 3 match the
   135  	// file name and line number.
   136  	// Most entries have trailing stack metadata for each frame,
   137  	// but inlined calls, lacking a frame, may omit that metadata.
   138  	tbEntry = `(\S+)\(.*\)\n\t(.*):([0-9]+)(?: .*)?\n`
   139  
   140  	// runtimeFailed matches a runtime throw or testing package
   141  	// panic. Matching the panic is fairly loose because in some
   142  	// cases a "fatal error:" can be preceded by a "panic:" if
   143  	// we've started the panic and then realize we can't (e.g.,
   144  	// sigpanic). Also gather up the "runtime:" prints preceding a
   145  	// throw.
   146  	runtimeFailed        = regexp.MustCompile(`^(?:runtime:.*\n)*.*(?:panic: |fatal error: )(.*)`)
   147  	runtimeLiterals      = []string{"runtime:", "panic:", "fatal error:"}
   148  	runtimeFailedTrailer = regexp.MustCompile(`^(?:exit status.*\n)?(?:\*\*\* Test killed.*\n)?` + endOfTest + `?`)
   149  
   150  	// apiCheckerFailed matches an API checker failure.
   151  	apiCheckerFailed = regexp.MustCompile(`^Error running API checker: (.*)`)
   152  
   153  	// goodLine matches known-good lines so we can ignore them
   154  	// before doing more aggressive/fuzzy failure extraction.
   155  	goodLine = regexp.MustCompile(`^#|^ok\s|^\?\s|^Benchmark|^PASS|^=== |^--- `)
   156  
   157  	// testingUnknownFailed matches the last line of some unknown
   158  	// failure detected by the testing package.
   159  	testingUnknownFailed = regexp.MustCompile(`^` + endOfTest)
   160  
   161  	// miscFailed matches the log.Fatalf in go tool dist test when
   162  	// a test fails. We use this as a last resort, mostly to pick
   163  	// up failures in sections that don't use the testing package.
   164  	miscFailed = regexp.MustCompile(`^.*Failed: (?:exit status|test failed)`)
   165  )
   166  
   167  // An extractCache speeds up failure extraction from multiple logs by
   168  // caching known lines. It is *not* thread-safe, so we track it in a
   169  // sync.Pool.
   170  type extractCache struct {
   171  	boringLines map[string]bool
   172  }
   173  
   174  var extractCachePool sync.Pool
   175  
   176  func init() {
   177  	extractCachePool.New = func() interface{} {
   178  		return &extractCache{make(map[string]bool)}
   179  	}
   180  }
   181  
   182  // Extract parses the failures from all.bash log m.
   183  func Extract(m string, os, arch string) ([]*Failure, error) {
   184  	fs := []*Failure{}
   185  	testingStarted := false
   186  	section := ""
   187  	sectionHeaderFailures := 0 // # failures at section start
   188  	unknown := []string{}
   189  	cache := extractCachePool.Get().(*extractCache)
   190  	defer extractCachePool.Put(cache)
   191  
   192  	// Canonicalize line endings. Note that some logs have a mix
   193  	// of line endings and some somehow have multiple \r's.
   194  	m = canonLine.ReplaceAllString(m, "\n")
   195  
   196  	var s []string
   197  	matcher := newMatcher(m)
   198  	consume := func(r *regexp.Regexp) bool {
   199  		matched := matcher.consume(r)
   200  		s = matcher.groups
   201  		if matched && !strings.HasSuffix(s[0], "\n") {
   202  			// Consume the rest of the line.
   203  			matcher.line()
   204  		}
   205  		return matched
   206  	}
   207  	firstBadLine := func() string {
   208  		for _, u := range unknown {
   209  			if len(u) > 0 {
   210  				return u
   211  			}
   212  		}
   213  		return ""
   214  	}
   215  
   216  	for !matcher.done() {
   217  		// Check for a cached result.
   218  		line, nextLinePos := matcher.peekLine()
   219  		isGoodLine, cached := cache.boringLines[line]
   220  
   221  		// Process the line.
   222  		isKnown := true
   223  		switch {
   224  		case cached:
   225  			matcher.pos = nextLinePos
   226  			if !isGoodLine {
   227  				// This line is known to not match any
   228  				// regexps. Follow the default case.
   229  				isKnown = false
   230  				unknown = append(unknown, line)
   231  			}
   232  
   233  		case consume(testingHeader):
   234  			testingStarted = true
   235  
   236  		case consume(sectionHeader):
   237  			section = s[1]
   238  			sectionHeaderFailures = len(fs)
   239  
   240  		case consume(testingFailed):
   241  			f := &Failure{
   242  				Test:        s[1],
   243  				Package:     s[3],
   244  				FullMessage: s[0],
   245  				Message:     "unknown testing.T failure",
   246  			}
   247  
   248  			// TODO: Can have multiple errors per FAIL:
   249  			// ../fetchlogs/rev/2015-03-24T19:51:21-41f9c43/linux-arm64-canonical
   250  
   251  			sError := testingError.FindStringSubmatch(s[2])
   252  			sPanic := testingPanic.FindStringSubmatch(s[2])
   253  			if sError != nil {
   254  				f.File, f.Line, f.Message = sError[1], atoi(sError[2]), sError[3]
   255  			} else if sPanic != nil {
   256  				f.Function, f.File, f.Line = panicWhere(s[2])
   257  				f.Message = sPanic[1]
   258  			}
   259  
   260  			fs = append(fs, f)
   261  
   262  		case consume(gotestFailed):
   263  			fs = append(fs, &Failure{
   264  				Package:     "test/" + s[2],
   265  				FullMessage: s[0],
   266  				Message:     firstLine(s[1]),
   267  			})
   268  
   269  		case consume(buildFailed):
   270  			// This may have an accompanying compiler
   271  			// crash, but it's interleaved with other "ok"
   272  			// lines, so it's hard to find.
   273  			fs = append(fs, &Failure{
   274  				FullMessage: s[0],
   275  				Message:     "build failed",
   276  				Package:     s[1],
   277  			})
   278  
   279  		case consume(timeoutPanic1):
   280  			fs = append(fs, &Failure{
   281  				Test:        testFromTraceback(s[1]),
   282  				FullMessage: s[0],
   283  				Message:     "test timed out",
   284  				Package:     s[2],
   285  			})
   286  
   287  		case consume(timeoutPanic2):
   288  			tb := strings.Join(unknown, "\n")
   289  			fs = append(fs, &Failure{
   290  				Test:        testFromTraceback(tb),
   291  				FullMessage: tb + "\n" + s[0],
   292  				Message:     "test timed out",
   293  				Package:     s[1],
   294  			})
   295  
   296  		case matcher.lineHasLiteral(runtimeLiterals...) && consume(runtimeFailed):
   297  			start := matcher.matchPos
   298  			msg := s[1]
   299  			pkg := "testing"
   300  			if strings.Contains(s[0], "fatal error:") {
   301  				pkg = "runtime"
   302  			}
   303  			traceback := consumeTraceback(matcher)
   304  			matcher.consume(runtimeFailedTrailer)
   305  			fn, file, line := panicWhere(traceback)
   306  			fs = append(fs, &Failure{
   307  				Package:     pkg,
   308  				FullMessage: matcher.str[start:matcher.pos],
   309  				Message:     msg,
   310  				Function:    fn,
   311  				File:        file,
   312  				Line:        line,
   313  			})
   314  
   315  		case consume(apiCheckerFailed):
   316  			fs = append(fs, &Failure{
   317  				Package:     "API checker",
   318  				FullMessage: s[0],
   319  				Message:     s[1],
   320  			})
   321  
   322  		case consume(goodLine):
   323  			// Ignore. Just cache and clear unknown.
   324  			cache.boringLines[line] = true
   325  
   326  		case consume(testingUnknownFailed):
   327  			fs = append(fs, &Failure{
   328  				Package:     s[1],
   329  				FullMessage: s[0],
   330  				Message:     "unknown failure: " + firstBadLine(),
   331  			})
   332  
   333  		case len(fs) == sectionHeaderFailures && consume(miscFailed):
   334  			fs = append(fs, &Failure{
   335  				Package:     section,
   336  				FullMessage: s[0],
   337  				Message:     "unknown failure: " + firstBadLine(),
   338  			})
   339  
   340  		default:
   341  			isKnown = false
   342  			unknown = append(unknown, line)
   343  			cache.boringLines[line] = false
   344  			matcher.pos = nextLinePos
   345  		}
   346  
   347  		// Clear unknown lines on any known line.
   348  		if isKnown {
   349  			unknown = unknown[:0]
   350  		}
   351  	}
   352  
   353  	// TODO: FullMessages for these.
   354  	if len(fs) == 0 && strings.Contains(m, "no space left on device") {
   355  		fs = append(fs, &Failure{
   356  			Message: "build failed (no space left on device)",
   357  		})
   358  	}
   359  	if len(fs) == 0 && coordinatorTimeout.MatchString(m) {
   360  		// all.bash was killed by coordinator.
   361  		fs = append(fs, &Failure{
   362  			Message: "build failed (timed out)",
   363  		})
   364  	}
   365  	if len(fs) == 0 && strings.Contains(m, "Failed to schedule") {
   366  		// Test sharding failed.
   367  		fs = append(fs, &Failure{
   368  			Message: "build failed (failed to schedule)",
   369  		})
   370  	}
   371  	if len(fs) == 0 && strings.Contains(m, "nosplit stack overflow") {
   372  		fs = append(fs, &Failure{
   373  			Message: "build failed (nosplit stack overflow)",
   374  		})
   375  	}
   376  
   377  	// If the same (message, where) shows up in more than five
   378  	// packages, it's probably a systemic issue, so collapse it
   379  	// down to one failure with no package.
   380  	type dedup struct {
   381  		packages map[string]bool
   382  		kept     bool
   383  	}
   384  	msgDedup := map[Failure]*dedup{}
   385  	failureMap := map[*Failure]*dedup{}
   386  	maxCount := 0
   387  	for _, f := range fs {
   388  		key := Failure{
   389  			Message:  f.canonicalMessage(),
   390  			Function: f.Function,
   391  			File:     f.File,
   392  			Line:     f.Line,
   393  		}
   394  
   395  		d := msgDedup[key]
   396  		if d == nil {
   397  			d = &dedup{packages: map[string]bool{}}
   398  			msgDedup[key] = d
   399  		}
   400  		d.packages[f.Package] = true
   401  		if len(d.packages) > maxCount {
   402  			maxCount = len(d.packages)
   403  		}
   404  		failureMap[f] = d
   405  	}
   406  	if maxCount >= 5 {
   407  		fsn := []*Failure{}
   408  		for _, f := range fs {
   409  			d := failureMap[f]
   410  			if len(d.packages) < 5 {
   411  				fsn = append(fsn, f)
   412  			} else if !d.kept {
   413  				d.kept = true
   414  				f.Test, f.Package = "", ""
   415  				fsn = append(fsn, f)
   416  			}
   417  		}
   418  		fs = fsn
   419  	}
   420  
   421  	// Check if we even got as far as testing. Note that there was
   422  	// a period when we didn't print the "testing" header, so as
   423  	// long as we found failures, we don't care if we found the
   424  	// header.
   425  	if !testingStarted && len(fs) == 0 {
   426  		fs = append(fs, &Failure{
   427  			Message: "toolchain build failed",
   428  		})
   429  	}
   430  
   431  	for _, f := range fs {
   432  		f.OS, f.Arch = os, arch
   433  
   434  		// Clean up package. For misc/cgo tests, this will be
   435  		// something like
   436  		// _/tmp/buildlet-scatch825855615/go/misc/cgo/test.
   437  		if strings.HasPrefix(f.Package, "_/tmp/") {
   438  			f.Package = strings.SplitN(f.Package, "/", 4)[3]
   439  		}
   440  
   441  		// Trim trailing newlines from FullMessage.
   442  		f.FullMessage = strings.TrimRight(f.FullMessage, "\n")
   443  	}
   444  	return fs, nil
   445  }
   446  
   447  func atoi(s string) int {
   448  	v, err := strconv.Atoi(s)
   449  	if err != nil {
   450  		panic("expected number, got " + s)
   451  	}
   452  	return v
   453  }
   454  
   455  // firstLine returns the first line from s, not including the line
   456  // terminator.
   457  func firstLine(s string) string {
   458  	if i := strings.Index(s, "\n"); i >= 0 {
   459  		return s[:i]
   460  	}
   461  	return s
   462  }
   463  
   464  var (
   465  	tracebackStart = regexp.MustCompile(`^(goroutine [0-9]+.*:|runtime stack:)\n`)
   466  	tracebackEntry = regexp.MustCompile(`^` + tbEntry)
   467  )
   468  
   469  // consumeTraceback consumes a traceback from m.
   470  func consumeTraceback(m *matcher) string {
   471  	// Find the beginning of the traceback.
   472  	for !m.done() && !m.peek(tracebackStart) {
   473  		m.line()
   474  	}
   475  
   476  	start := m.pos
   477  loop:
   478  	for !m.done() {
   479  		switch {
   480  		case m.hasPrefix("\n") || m.hasPrefix("\t") ||
   481  			m.hasPrefix("goroutine ") || m.hasPrefix("runtime stack:") ||
   482  			m.hasPrefix("created by "):
   483  			m.line()
   484  
   485  		case m.consume(tracebackEntry):
   486  			// Do nothing.
   487  
   488  		default:
   489  			break loop
   490  		}
   491  	}
   492  	return m.str[start:m.pos]
   493  }
   494  
   495  var (
   496  	// testFromTracebackRe matches a traceback entry from a
   497  	// function named Test* in a file named *_test.go. It ignores
   498  	// "created by" lines.
   499  	testFromTracebackRe = regexp.MustCompile(`\.(Test[^(\n]+)\(.*\n.*_test\.go`)
   500  
   501  	panicWhereRe = regexp.MustCompile(`(?m:^)` + tbEntry)
   502  )
   503  
   504  // testFromTraceback attempts to return the test name from a
   505  // traceback.
   506  func testFromTraceback(tb string) string {
   507  	s := testFromTracebackRe.FindStringSubmatch(tb)
   508  	if s == nil {
   509  		return ""
   510  	}
   511  	return s[1]
   512  }
   513  
   514  // panicWhere attempts to return the fully qualified name, source
   515  // file, and line number of the panicking function in traceback tb.
   516  func panicWhere(tb string) (fn string, file string, line int) {
   517  	m := matcher{str: tb}
   518  	for m.consume(panicWhereRe) {
   519  		fn := m.groups[1]
   520  
   521  		// Ignore functions involved in panic handling.
   522  		if strings.HasPrefix(fn, "runtime.panic") || fn == "runtime.throw" || fn == "runtime.sigpanic" {
   523  			continue
   524  		}
   525  		return fn, m.groups[2], atoi(m.groups[3])
   526  	}
   527  	return "", "", 0
   528  }