github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/cmd/github-post/main.go (about) 1 // Copyright 2016 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 // Command github-post parses the JSON-formatted output from a Go test session, 12 // as generated by either 'go test -json' or './pkg.test | go tool test2json -t', 13 // and posts issues for any failed tests to GitHub. If there are no failed 14 // tests, it assumes that there was a build error and posts the entire log to 15 // GitHub. 16 package main 17 18 import ( 19 "bufio" 20 "bytes" 21 "context" 22 "encoding/json" 23 "fmt" 24 "io" 25 "io/ioutil" 26 "log" 27 "os" 28 "os/exec" 29 "regexp" 30 "sort" 31 "strconv" 32 "strings" 33 "time" 34 35 "github.com/cockroachdb/cockroach/pkg/cmd/internal/issues" 36 "github.com/cockroachdb/errors" 37 ) 38 39 const ( 40 pkgEnv = "PKG" 41 ) 42 43 func main() { 44 ctx := context.Background() 45 46 f := func(ctx context.Context, title, packageName, testName, testMessage, authorEmail string) error { 47 log.Printf("filing issue with title: %s", title) 48 req := issues.PostRequest{ 49 // TODO(tbg): actually use this as a template and not a hard-coded 50 // string. 51 TitleTemplate: title, 52 BodyTemplate: issues.UnitTestFailureBody, 53 PackageName: packageName, 54 TestName: testName, 55 Message: testMessage, 56 Artifacts: "", 57 AuthorEmail: authorEmail, 58 } 59 return issues.Post(ctx, req) 60 } 61 62 if err := listFailures(ctx, os.Stdin, f); err != nil { 63 log.Fatal(err) 64 } 65 } 66 67 // This struct is described in the test2json documentation. 68 // https://golang.org/cmd/test2json/ 69 type testEvent struct { 70 Action string 71 Package string 72 Test string 73 Output string 74 Time time.Time // encodes as an RFC3339-format string 75 Elapsed float64 // seconds 76 } 77 78 type scopedTest struct { 79 pkg string 80 name string 81 } 82 83 func scoped(te testEvent) scopedTest { 84 if te.Package == "" { 85 return scopedTest{pkg: mustPkgFromEnv(), name: te.Test} 86 } 87 return scopedTest{pkg: te.Package, name: te.Test} 88 } 89 90 func mustPkgFromEnv() string { 91 packageName := maybePkgFromEnv() 92 if packageName == "" { 93 panic(errors.Errorf("package name environment variable %s is not set", pkgEnv)) 94 } 95 return packageName 96 } 97 98 func maybePkgFromEnv() string { 99 packageName, ok := os.LookupEnv(pkgEnv) 100 if !ok { 101 return "" 102 } 103 return packageName 104 } 105 106 func trimPkg(pkg string) string { 107 return strings.TrimPrefix(pkg, issues.CockroachPkgPrefix) 108 } 109 110 func listFailures( 111 ctx context.Context, 112 input io.Reader, 113 f func(ctx context.Context, title, packageName, testName, testMessage, authorEmail string) error, 114 ) error { 115 // Tests that took less than this are not even considered for slow test 116 // reporting. This is so that we protect against large number of 117 // programatically-generated subtests. 118 const shortTestFilterSecs float64 = 0.5 119 var timeoutMsg = "panic: test timed out after" 120 121 var packageOutput bytes.Buffer 122 123 // map from test name to list of events (each log line is an event, plus 124 // start and pass/fail events). 125 // Tests/events are "outstanding" until we see a final pass/fail event. 126 // Because of the way the go test runner prints output, in case a subtest times 127 // out or panics, we don't get a pass/fail event for sibling and ancestor 128 // tests. Those tests will remain "outstanding" and will be ignored for the 129 // purpose of issue reporting. 130 outstandingOutput := make(map[scopedTest][]testEvent) 131 failures := make(map[scopedTest][]testEvent) 132 var slowPassEvents []testEvent 133 var slowFailEvents []testEvent 134 135 // init is true for the preamble of the input before the first "run" test 136 // event. 137 init := true 138 // trustTimestamps will be set if we don't find a marker suggesting that the 139 // input comes from a stress run. In that case, stress prints all its output 140 // at once (for a captured failed test run), so the test2json timestamps are 141 // meaningless. 142 trustTimestamps := true 143 // elapsedTotalSec accumulates the time spent in all tests, passing or 144 // failing. In case the input comes from a stress run, this will be used to 145 // deduce the duration of a timed out test. 146 var elapsedTotalSec float64 147 // Will be set if the last test timed out. 148 var timedOutCulprit scopedTest 149 var timedOutEvent testEvent 150 var curTestStart time.Time 151 var last scopedTest 152 var lastEvent testEvent 153 scanner := bufio.NewScanner(input) 154 for scanner.Scan() { 155 var te testEvent 156 { 157 line := scanner.Text() // has no EOL marker 158 if len(line) <= 2 || line[0] != '{' || line[len(line)-1] != '}' { 159 // This line is not test2json output, skip it. This can happen if 160 // whatever feeds our input has some extra non-JSON lines such as 161 // would happen with `make` invocations. 162 continue 163 } 164 if err := json.Unmarshal([]byte(line), &te); err != nil { 165 return errors.Wrapf(err, "unable to parse %q", line) 166 } 167 } 168 lastEvent = te 169 170 if te.Test != "" { 171 init = false 172 } 173 if init && strings.Contains(te.Output, "-exec 'stress '") { 174 trustTimestamps = false 175 } 176 if timedOutCulprit.name == "" && te.Elapsed > 0 { 177 // We don't count subtests as those are counted in the parent. 178 if split := strings.SplitN(te.Test, "/", 2); len(split) == 1 { 179 elapsedTotalSec += te.Elapsed 180 } 181 } 182 183 if timedOutCulprit.name == te.Test && te.Elapsed != 0 { 184 te.Elapsed = timedOutEvent.Elapsed 185 } 186 187 // Events for the overall package test do not set Test. 188 if len(te.Test) > 0 { 189 switch te.Action { 190 case "run": 191 last = scoped(te) 192 if trustTimestamps { 193 curTestStart = te.Time 194 } 195 case "output": 196 key := scoped(te) 197 outstandingOutput[key] = append(outstandingOutput[key], te) 198 if strings.Contains(te.Output, timeoutMsg) { 199 timedOutCulprit = key 200 201 // Fill in the Elapsed field for a timeout event. 202 // As of go1.11, the Elapsed field is bogus for fail events for timed 203 // out tests, so we do our own computation. 204 // See https://github.com/golang/go/issues/27568 205 // 206 // Also, if the input is coming from stress, there will not even be a 207 // fail event for the test, so the Elapsed field computed here will be 208 // useful. 209 if trustTimestamps { 210 te.Elapsed = te.Time.Sub(curTestStart).Seconds() 211 } else { 212 // If we don't trust the timestamps, then we compute the test's 213 // duration by subtracting all the durations that we've seen so far 214 // (which we do trust to some extent). Note that this is not 215 // entirely accurate, since there's no information about the 216 // duration about sibling subtests which may have run. And further 217 // note that it doesn't work well at all for small timeouts because 218 // the resolution that the test durations have is just tens of 219 // milliseconds, so many quick tests are rounded of to a duration of 220 // 0. 221 re := regexp.MustCompile(`panic: test timed out after (\d*(?:\.\d*)?)(.)`) 222 matches := re.FindStringSubmatch(te.Output) 223 if matches == nil { 224 log.Printf("failed to parse timeout message: %s", te.Output) 225 te.Elapsed = -1 226 } else { 227 dur, err := strconv.ParseFloat(matches[1], 64) 228 if err != nil { 229 log.Fatal(err) 230 } 231 if matches[2] == "m" { 232 // minutes to seconds 233 dur *= 60 234 } else if matches[2] != "s" { 235 log.Fatalf("unexpected time unit in: %s", te.Output) 236 } 237 te.Elapsed = dur - elapsedTotalSec 238 } 239 } 240 timedOutEvent = te 241 } 242 case "pass", "skip": 243 if timedOutCulprit.name != "" { 244 panic(fmt.Sprintf("detected test timeout but test seems to have passed (%+v)", te)) 245 } 246 delete(outstandingOutput, scoped(te)) 247 if te.Elapsed > shortTestFilterSecs { 248 // We ignore subtests; their time contributes to the parent's. 249 if !strings.Contains(te.Test, "/") { 250 slowPassEvents = append(slowPassEvents, te) 251 } 252 } 253 case "fail": 254 key := scoped(te) 255 // Record slow tests. We ignore subtests; their time contributes to the 256 // parent's. Except the timed out (sub)test, for which the parent (if 257 // any) is not going to appear in the report because there's not going 258 // to be a pass/fail event for it. 259 if !strings.Contains(te.Test, "/") || timedOutCulprit == key { 260 slowFailEvents = append(slowFailEvents, te) 261 } 262 // Move the test to the failures collection unless the test timed out. 263 // We have special reporting for timeouts below. 264 if timedOutCulprit != key { 265 failures[key] = outstandingOutput[key] 266 } 267 delete(outstandingOutput, key) 268 } 269 } else if te.Action == "output" { 270 // Output was outside the context of a test. This consists mostly of the 271 // preamble and epilogue that Make outputs, but also any log messages that 272 // are printed by a test binary's main function. 273 packageOutput.WriteString(te.Output) 274 } 275 } 276 277 // On timeout, we might or might not have gotten a fail event for the timed 278 // out test (we seem to get one when processing output from a test binary run, 279 // but not when processing the output of `stress`, which adds some lines at 280 // the end). If we haven't gotten a fail event, the test's output is still 281 // outstanding and the test is not registered in the slowFailEvents 282 // collection. The timeout handling code below relies on slowFailEvents not 283 // being empty though, so we'll process the test here. 284 if timedOutCulprit.name != "" { 285 if _, ok := outstandingOutput[timedOutCulprit]; ok { 286 slowFailEvents = append(slowFailEvents, timedOutEvent) 287 delete(outstandingOutput, timedOutCulprit) 288 } 289 } else { 290 // If we haven't received a final event for the last test, then a 291 // panic/log.Fatal must have happened. Consider it failed. 292 // Note that because of https://github.com/golang/go/issues/27582 there 293 // might be other outstanding tests; we ignore those. 294 if _, ok := outstandingOutput[last]; ok { 295 log.Printf("found outstanding output. Considering last test failed: %s", last) 296 failures[last] = outstandingOutput[last] 297 } 298 } 299 300 // test2json always puts a fail event last unless it sees a big pass message 301 // from the test output. 302 if lastEvent.Action == "fail" && len(failures) == 0 && timedOutCulprit.name == "" { 303 // If we couldn't find a failing Go test, assume that a failure occurred 304 // before running Go and post an issue about that. 305 const unknown = "(unknown)" 306 packageName := maybePkgFromEnv() 307 if packageName == "" { 308 packageName = "unknown" 309 } 310 trimmedPkgName := trimPkg(packageName) 311 title := fmt.Sprintf("%s: package failed", trimmedPkgName) 312 if err := f( 313 ctx, title, packageName, unknown, packageOutput.String(), "", /* authorEmail */ 314 ); err != nil { 315 return errors.Wrap(err, "failed to post issue") 316 } 317 } else { 318 for test, testEvents := range failures { 319 if split := strings.SplitN(test.name, "/", 2); len(split) == 2 { 320 parentTest, subTest := scopedTest{pkg: test.pkg, name: split[0]}, scopedTest{pkg: test.pkg, name: split[1]} 321 log.Printf("consolidating failed subtest %q into parent test %q", subTest.name, parentTest.name) 322 failures[parentTest] = append(failures[parentTest], testEvents...) 323 delete(failures, test) 324 } else { 325 log.Printf("failed parent test %q", test) 326 } 327 } 328 // Sort the failed tests to make the unit tests for this script deterministic. 329 var failedTestNames []scopedTest 330 for name := range failures { 331 failedTestNames = append(failedTestNames, name) 332 } 333 sort.Slice(failedTestNames, func(i, j int) bool { 334 return fmt.Sprint(failedTestNames[i]) < fmt.Sprint(failedTestNames[j]) 335 }) 336 for _, test := range failedTestNames { 337 testEvents := failures[test] 338 authorEmail, err := getAuthorEmail(ctx, test.pkg, test.name) 339 if err != nil { 340 log.Printf("unable to determine test author email: %s\n", err) 341 } 342 var outputs []string 343 for _, testEvent := range testEvents { 344 outputs = append(outputs, testEvent.Output) 345 } 346 message := strings.Join(outputs, "") 347 title := fmt.Sprintf("%s: %s failed", trimPkg(test.pkg), test.name) 348 if err := f(ctx, title, test.pkg, test.name, message, authorEmail); err != nil { 349 return errors.Wrap(err, "failed to post issue") 350 } 351 } 352 } 353 354 // Sort slow tests descendingly by duration. 355 sort.Slice(slowPassEvents, func(i, j int) bool { 356 return slowPassEvents[i].Elapsed > slowPassEvents[j].Elapsed 357 }) 358 sort.Slice(slowFailEvents, func(i, j int) bool { 359 return slowFailEvents[i].Elapsed > slowFailEvents[j].Elapsed 360 }) 361 362 report := genSlowTestsReport(slowPassEvents, slowFailEvents) 363 if err := writeSlowTestsReport(report); err != nil { 364 log.Printf("failed to create slow tests report: %s", err) 365 } 366 367 // If the run timed out, file an issue. A couple of cases: 368 // 1) If the test that was running when the package timed out is the longest 369 // test, then we blame it. The common case is the test deadlocking - it would 370 // have run forever. 371 // 2) Otherwise, we don't blame anybody in particular. We file a generic issue 372 // listing the package name containing the report of long-running tests. 373 if timedOutCulprit.name != "" { 374 slowest := slowFailEvents[0] 375 if len(slowPassEvents) > 0 && slowPassEvents[0].Elapsed > slowest.Elapsed { 376 slowest = slowPassEvents[0] 377 } 378 if timedOutCulprit == scoped(slowest) { 379 // The test that was running when the timeout hit is the one that ran for 380 // the longest time. 381 authorEmail, err := getAuthorEmail(ctx, timedOutCulprit.pkg, timedOutCulprit.name) 382 if err != nil { 383 log.Printf("unable to determine test author email: %s\n", err) 384 } 385 title := fmt.Sprintf("%s: %s timed out", trimPkg(timedOutCulprit.pkg), timedOutCulprit.name) 386 log.Printf("timeout culprit found: %s\n", timedOutCulprit.name) 387 if err := f(ctx, title, timedOutCulprit.pkg, timedOutCulprit.name, report, authorEmail); err != nil { 388 return errors.Wrap(err, "failed to post issue") 389 } 390 } else { 391 packageName := maybePkgFromEnv() 392 if packageName == "" { 393 packageName = "unknown" 394 } 395 trimmedPkgName := trimPkg(packageName) 396 title := fmt.Sprintf("%s: package timed out", trimmedPkgName) 397 // Andrei gets these reports for now, but don't think I'll fix anything 398 // you fools. 399 // TODO(andrei): Figure out how to assign to the on-call engineer. Maybe 400 // get their name from the Slack channel? 401 log.Printf("timeout culprit not found\n") 402 if err := f( 403 ctx, title, packageName, "(unknown)" /* testName */, report, "andreimatei1@gmail.com", 404 ); err != nil { 405 return errors.Wrap(err, "failed to post issue") 406 } 407 } 408 } 409 410 return nil 411 } 412 413 func genSlowTestsReport(slowPassingTests, slowFailingTests []testEvent) string { 414 var b strings.Builder 415 b.WriteString("Slow failing tests:\n") 416 for i, te := range slowFailingTests { 417 if i == 20 { 418 break 419 } 420 fmt.Fprintf(&b, "%s - %.2fs\n", te.Test, te.Elapsed) 421 } 422 if len(slowFailingTests) == 0 { 423 fmt.Fprint(&b, "<none>\n") 424 } 425 426 b.WriteString("\nSlow passing tests:\n") 427 for i, te := range slowPassingTests { 428 if i == 20 { 429 break 430 } 431 fmt.Fprintf(&b, "%s - %.2fs\n", te.Test, te.Elapsed) 432 } 433 if len(slowPassingTests) == 0 { 434 fmt.Fprint(&b, "<none>\n") 435 } 436 return b.String() 437 } 438 439 func writeSlowTestsReport(report string) error { 440 return ioutil.WriteFile("artifacts/slow-tests-report.txt", []byte(report), 0644) 441 } 442 443 func getAuthorEmail(ctx context.Context, packageName, testName string) (string, error) { 444 // Search the source code for the email address of the last committer to touch 445 // the first line of the source code that contains testName. Then, ask GitHub 446 // for the GitHub username of the user with that email address by searching 447 // commits in cockroachdb/cockroach for commits authored by the address. 448 subtests := strings.Split(testName, "/") 449 testName = subtests[0] 450 packageName = strings.TrimPrefix(packageName, "github.com/cockroachdb/cockroach/") 451 cmd := exec.Command(`/bin/bash`, `-c`, 452 fmt.Sprintf(`git grep -n "func %s" $(git rev-parse --show-toplevel)/%s/*_test.go`, 453 testName, packageName)) 454 // This command returns output such as: 455 // ../ccl/storageccl/export_test.go:31:func TestExportCmd(t *testing.T) { 456 out, err := cmd.CombinedOutput() 457 if err != nil { 458 return "", errors.Errorf("couldn't find test %s in %s: %s %s", 459 testName, packageName, err, string(out)) 460 } 461 re := regexp.MustCompile(`(.*):(.*):`) 462 // The first 2 :-delimited fields are the filename and line number. 463 matches := re.FindSubmatch(out) 464 if matches == nil { 465 return "", errors.Errorf("couldn't find filename/line number for test %s in %s: %s", 466 testName, packageName, string(out)) 467 } 468 filename := matches[1] 469 linenum := matches[2] 470 471 // Now run git blame. 472 cmd = exec.Command(`/bin/bash`, `-c`, 473 fmt.Sprintf(`git blame --porcelain -L%s,+1 %s | grep author-mail`, 474 linenum, filename)) 475 // This command returns output such as: 476 // author-mail <jordan@cockroachlabs.com> 477 out, err = cmd.CombinedOutput() 478 if err != nil { 479 return "", errors.Errorf("couldn't find author of test %s in %s: %s %s", 480 testName, packageName, err, string(out)) 481 } 482 re = regexp.MustCompile("author-mail <(.*)>") 483 matches = re.FindSubmatch(out) 484 if matches == nil { 485 return "", errors.Errorf("couldn't find author email of test %s in %s: %s", 486 testName, packageName, string(out)) 487 } 488 return string(matches[1]), nil 489 }