github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/dashboard/app/tree.go (about) 1 // Copyright 2023 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package main 5 6 // Given information on how commits flow from one kernel source tree to another, assign 7 // bugs labels of two kinds: 8 // a) LabelIntroduced -- reproducer does not work in any other kernel tree, FROM which commits flow. 9 // b) LabelReached -- reproducer does not work in any other kernel tree, TO which commits flow. 10 11 import ( 12 "context" 13 "fmt" 14 "sort" 15 "sync" 16 "time" 17 18 "github.com/google/syzkaller/dashboard/dashapi" 19 "golang.org/x/sync/errgroup" 20 db "google.golang.org/appengine/v2/datastore" 21 "google.golang.org/appengine/v2/log" 22 ) 23 24 // generateTreeOriginJobs generates new jobs for bug origin tree determination. 25 func generateTreeOriginJobs(cGlobal context.Context, bugKey *db.Key, 26 managers map[string]dashapi.ManagerJobs) (*Job, *db.Key, error) { 27 var job *Job 28 var jobKey *db.Key 29 tx := func(c context.Context) error { 30 bug := new(Bug) 31 if err := db.Get(c, bugKey, bug); err != nil { 32 return fmt.Errorf("failed to get bug: %w", err) 33 } 34 ctx := &bugTreeContext{ 35 c: c, 36 cGlobal: cGlobal, 37 bug: bug, 38 bugKey: bug.key(c), 39 } 40 ret := ctx.pollBugTreeJobs(managers) 41 switch ret.(type) { 42 case pollResultError: 43 return ret.(error) 44 case pollResultWait: 45 newTime, ok := ret.(time.Time) 46 if ok && newTime.After(bug.TreeTests.NextPoll) { 47 bug.TreeTests.NextPoll = newTime 48 } 49 } 50 bug.TreeTests.NeedPoll = false 51 if _, err := db.Put(c, bugKey, bug); err != nil { 52 return fmt.Errorf("failed to put bug: %w", err) 53 } 54 job, jobKey = ctx.job, ctx.jobKey 55 return nil 56 } 57 if err := db.RunInTransaction(cGlobal, tx, 58 &db.TransactionOptions{XG: true, Attempts: 10}); err != nil { 59 return nil, nil, err 60 } 61 return job, jobKey, nil 62 } 63 64 // treeOriginJobDone is supposed to be called when tree origin job is done. 65 // It keeps the cached info in Bug up to date and assigns bug tree origin labels. 66 func treeOriginJobDone(cGlobal context.Context, jobKey *db.Key, job *Job) error { 67 bugKey := jobKey.Parent() 68 tx := func(c context.Context) error { 69 bug := new(Bug) 70 if err := db.Get(c, bugKey, bug); err != nil { 71 return fmt.Errorf("failed to get bug: %w", err) 72 } 73 ctx := &bugTreeContext{ 74 c: c, 75 cGlobal: cGlobal, 76 bug: bug, 77 bugKey: bug.key(c), 78 noNewJobs: true, 79 } 80 ret := ctx.pollBugTreeJobs( 81 map[string]dashapi.ManagerJobs{job.Manager: {TestPatches: true}}, 82 ) 83 switch ret.(type) { 84 case pollResultError: 85 return ret.(error) 86 case pollResultPending: 87 bug.TreeTests.NextPoll = time.Time{} 88 bug.TreeTests.NeedPoll = true 89 } 90 if _, err := db.Put(c, bugKey, bug); err != nil { 91 return fmt.Errorf("failed to put bug: %w", err) 92 } 93 return nil 94 } 95 return db.RunInTransaction(cGlobal, tx, &db.TransactionOptions{XG: true, Attempts: 10}) 96 } 97 98 type pollTreeJobResult interface{} 99 100 // pollResultPending is returned when we wait some job to finish. 101 type pollResultPending struct{} 102 103 // pollResultWait is returned when we know the next time the process could be repeated. 104 type pollResultWait time.Time 105 106 // pollResultSkip means that there are no poll jobs we could run at the moment. 107 // It's impossible to say when it changes, so it's better not to repeat polling soon. 108 type pollResultSkip struct{} 109 110 type pollResultError error 111 112 type pollResultDone struct { 113 Crashed bool 114 Finished time.Time 115 } 116 117 type bugTreeContext struct { 118 c context.Context 119 // Datastore puts limits on how often a single entity can be accessed by transactions. 120 // And we actually don't always need a consistent view of the DB, we just want to query 121 // a single entity. So, when possible, let's make queries outside of a transaction. 122 cGlobal context.Context 123 crash *Crash 124 crashKey *db.Key 125 bugKey *db.Key 126 bug *Bug 127 build *Build 128 repoNode *repoNode 129 noNewJobs bool 130 131 // If any jobs were created, here'll be one of them. 132 job *Job 133 jobKey *db.Key 134 } 135 136 func (ctx *bugTreeContext) pollBugTreeJobs(managers map[string]dashapi.ManagerJobs) pollTreeJobResult { 137 // Determine the crash we'd stick to. 138 err := ctx.loadCrashInfo() 139 if err != nil { 140 log.Errorf(ctx.c, "bug %q: failed to load crash info: %s", ctx.bug.displayTitle(), err) 141 return pollResultError(err) 142 } 143 if ctx.crash == nil { 144 // There are no crashes we could further work with. 145 // TODO: consider looking at the recent repro retest results. 146 log.Infof(ctx.c, "bug %q: no suitable crash", ctx.bug.displayTitle()) 147 return pollResultSkip{} 148 } 149 if ctx.repoNode == nil { 150 // We have no information about the tree on which the bug happened. 151 log.Errorf(ctx.c, "bug %q: no information about the tree", ctx.bug.displayTitle()) 152 return pollResultSkip{} 153 } 154 if !managers[ctx.crash.Manager].TestPatches { 155 return pollResultSkip{} 156 } 157 if len(ctx.bug.TreeTests.List) > 0 && ctx.crashKey.IntID() != ctx.bug.TreeTests.List[0].CrashID { 158 // Clean up old job records, they are no longer relevant. 159 ctx.bug.TreeTests.List = nil 160 } 161 for i := range ctx.bug.TreeTests.List { 162 err := ctx.bug.TreeTests.List[i].applyPending(ctx.c) 163 if err != nil { 164 return pollResultError(err) 165 } 166 } 167 return ctx.groupResults([]pollTreeJobResult{ 168 ctx.setOriginLabels(), 169 ctx.missingBackports(), 170 }) 171 } 172 173 func (ctx *bugTreeContext) setOriginLabels() pollTreeJobResult { 174 if !ctx.labelsCanBeSet() || ctx.bug.HasUserLabel(OriginLabel) { 175 return pollResultSkip{} 176 } 177 ctx.bug.UnsetLabels(OriginLabel) 178 179 var results []pollTreeJobResult 180 perNode := map[*repoNode]pollTreeJobResult{} 181 for node, merge := range ctx.repoNode.allReachable() { 182 var result pollTreeJobResult 183 if merge { 184 // Merge base gives a much better result quality, so use it whenever possible. 185 result = ctx.runRepro(node.repo, wantFirstAny{}, runOnMergeBase{ 186 Repo: ctx.build.KernelRepo, 187 Branch: ctx.build.KernelBranch, 188 }) 189 } else { 190 result = ctx.runRepro(node.repo, wantFirstAny{}, runOnHEAD{}) 191 } 192 perNode[node] = result 193 results = append(results, result) 194 } 195 result := ctx.groupResults(results) 196 if _, ok := result.(pollResultPending); ok { 197 // At least wait until all started jobs have finished (successfully or not). 198 return result 199 } 200 lastDone := ctx.lastDone(results) 201 if lastDone.IsZero() { 202 // Demand that at least one of the finished jobs has finished successfully. 203 return pollResultSkip{} 204 } 205 // Since we have a repro for it, it definitely crashed at some point. 206 perNode[ctx.repoNode] = pollResultDone{Crashed: true} 207 allLabels := append(ctx.selectRepoLabels(true, perNode), ctx.selectRepoLabels(false, perNode)...) 208 for _, label := range allLabels { 209 if label == ctx.repoNode.repo.LabelIntroduced || label == ctx.repoNode.repo.LabelReached { 210 // It looks like our reproducer does not work on other trees. 211 // Just in case verify that it still works on the original one. 212 result := ctx.runRepro(ctx.repoNode.repo, wantNewAny(lastDone), runOnHEAD{}) 213 resultDone, ok := result.(pollResultDone) 214 if !ok { 215 return result 216 } 217 if !resultDone.Crashed { 218 // Unfortunately the repro no longer works. Don't assign labels. 219 return pollResultSkip{} 220 } 221 } 222 } 223 var labels []BugLabel 224 for _, label := range allLabels { 225 labels = append(labels, BugLabel{Label: OriginLabel, Value: label}) 226 } 227 ctx.bug.SetLabels(makeLabelSet(ctx.c, ctx.bug.Namespace), labels) 228 return pollResultSkip{} 229 } 230 231 // selectRepoLabels attributes bugs to trees depending on the patch testing results. 232 func (ctx *bugTreeContext) selectRepoLabels(in bool, results map[*repoNode]pollTreeJobResult) []string { 233 crashed := map[*repoNode]bool{} 234 for node, result := range results { 235 done, ok := result.(pollResultDone) 236 if ok { 237 crashed[node] = done.Crashed 238 } 239 } 240 for node := range crashed { 241 if !crashed[node] { 242 continue 243 } 244 // (1) The in = true case: 245 // If, for a tree X, there's a tree Y from which commits flow to X and the reproducer crashed 246 // on Y, X cannot be among bug origin trees. 247 // (1) The in = false case: 248 // If, for a tree X, there's a tree Y to which commits flow to X and the reproducer crashed 249 // on Y, X cannot be the last tree to which the bug has spread. 250 for otherNode := range node.reachable(!in) { 251 crashed[otherNode] = false 252 } 253 } 254 ret := []string{} 255 for node, set := range crashed { 256 if !set { 257 continue 258 } 259 if in && node.repo.LabelIntroduced != "" { 260 ret = append(ret, node.repo.LabelIntroduced) 261 } else if !in && node.repo.LabelReached != "" { 262 ret = append(ret, node.repo.LabelReached) 263 } 264 } 265 return ret 266 } 267 268 // Test if there's any sense in testing other trees. 269 // For example, if we hit a bug on a mainline, there's no sense to test linux-next to check 270 // if it's a linux-next bug. 271 func (ctx *bugTreeContext) labelsCanBeSet() bool { 272 for node := range ctx.repoNode.reachable(true) { 273 if node.repo.LabelIntroduced != "" { 274 return true 275 } 276 } 277 for node := range ctx.repoNode.reachable(false) { 278 if node.repo.LabelReached != "" { 279 return true 280 } 281 } 282 return ctx.repoNode.repo.LabelIntroduced != "" || 283 ctx.repoNode.repo.LabelReached != "" 284 } 285 286 func (ctx *bugTreeContext) missingBackports() pollTreeJobResult { 287 if !ctx.repoNode.repo.DetectMissingBackports || ctx.bug.HasUserLabel(MissingBackportLabel) { 288 return pollResultSkip{} 289 } 290 var okDate time.Time 291 results := []pollTreeJobResult{} 292 for node, merge := range ctx.repoNode.reachable(true) { 293 resultOK := ctx.runRepro(node.repo, wantFirstOK{}, runOnHEAD{}) 294 doneOK, ok := resultOK.(pollResultDone) 295 if !ok { 296 results = append(results, resultOK) 297 continue 298 } 299 var resultCrash pollTreeJobResult 300 if merge { 301 resultCrash = ctx.runRepro(node.repo, wantFirstAny{}, runOnMergeBase{ 302 Repo: ctx.build.KernelRepo, 303 Branch: ctx.build.KernelBranch, 304 }) 305 } else { 306 // We already know that the reproducer doesn't crash the tree. 307 // There'd be no sense to call runRepro in the hope of getting a crash, 308 // so let's just look into the past tree testing results. 309 resultCrash, _ = ctx.bug.findResult(ctx.c, node.repo, wantFirstCrash{}, runOnAny{}) 310 } 311 doneCrash, ok := resultCrash.(pollResultDone) 312 if !ok { 313 results = append(results, resultCrash) 314 continue 315 } else if merge && doneCrash.Crashed || doneOK.Finished.After(doneCrash.Finished) { 316 // That's what we want: earlier it crashed and then stopped. 317 okDate = doneOK.Finished 318 break 319 } 320 } 321 if okDate.IsZero() { 322 return ctx.groupResults(results) 323 } 324 // We are about to assign the "missing backport" label. 325 // To reduce the number of backports, just in case run once more on HEAD. 326 // The bug fix could have already reached the repository. 327 result := ctx.runRepro(ctx.repoNode.repo, wantNewAny(okDate), runOnHEAD{}) 328 resultDone, ok := result.(pollResultDone) 329 if !ok { 330 return result 331 } 332 ctx.bug.UnsetLabels(MissingBackportLabel) 333 if resultDone.Crashed { 334 ctx.bug.SetLabels(makeLabelSet(ctx.c, ctx.bug.Namespace), []BugLabel{ 335 {Label: MissingBackportLabel}, 336 }) 337 } 338 return pollResultSkip{} 339 } 340 341 func (ctx *bugTreeContext) lastDone(results []pollTreeJobResult) time.Time { 342 var maxTime time.Time 343 for _, item := range results { 344 done, ok := item.(pollResultDone) 345 if !ok { 346 continue 347 } 348 if done.Finished.After(maxTime) { 349 maxTime = done.Finished 350 } 351 } 352 return maxTime 353 } 354 355 func (ctx *bugTreeContext) groupResults(results []pollTreeJobResult) pollTreeJobResult { 356 var minWait time.Time 357 for _, result := range results { 358 switch v := result.(type) { 359 case pollResultPending, pollResultError: 360 // Wait for the job result to continue. 361 return result 362 case pollResultWait: 363 t := time.Time(v) 364 if minWait.IsZero() || minWait.After(t) { 365 minWait = t 366 } 367 } 368 } 369 if !minWait.IsZero() { 370 return pollResultWait(minWait) 371 } 372 return pollResultSkip{} 373 } 374 375 type expectedResult interface{} 376 377 // resultFreshness subtypes. 378 type wantFirstOK struct{} 379 type wantFirstCrash struct{} 380 type wantFirstAny struct{} 381 type wantNewAny time.Time 382 383 type runReproOn interface{} 384 385 // runReproOn subtypes. 386 type runOnAny struct{} // attempts to find any result, if unsuccessful, runs on HEAD 387 type runOnHEAD struct{} 388 type runOnMergeBase struct { 389 Repo string 390 Branch string 391 } 392 393 func (ctx *bugTreeContext) runRepro(repo KernelRepo, result expectedResult, runOn runReproOn) pollTreeJobResult { 394 ret := ctx.doRunRepro(repo, result, runOn) 395 log.Infof(ctx.c, "runRepro on %s, %T, %T: %#v", repo.Alias, result, runOn, ret) 396 return ret 397 } 398 399 func (ctx *bugTreeContext) doRunRepro(repo KernelRepo, result expectedResult, runOn runReproOn) pollTreeJobResult { 400 existingResult, _ := ctx.bug.findResult(ctx.c, repo, result, runOn) 401 if _, ok := existingResult.(pollResultSkip); !ok { 402 return existingResult 403 } 404 // Okay, nothing suitable was found. We need to set up a new job. 405 if ctx.noNewJobs { 406 return pollResultPending{} 407 } 408 // First check if there's existing BugTreeTest object. 409 if _, ok := runOn.(runOnAny); ok { 410 runOn = runOnHEAD{} 411 } 412 candidates := ctx.bug.matchingTreeTests(repo, runOn) 413 var bugTreeTest *BugTreeTest 414 if len(candidates) > 0 { 415 bugTreeTest = &ctx.bug.TreeTests.List[candidates[0]] 416 } else { 417 item := BugTreeTest{ 418 CrashID: ctx.crashKey.IntID(), 419 Repo: repo.URL, 420 Branch: repo.Branch, 421 } 422 if v, ok := runOn.(runOnMergeBase); ok { 423 item.MergeBaseRepo = v.Repo 424 item.MergeBaseBranch = v.Branch 425 } 426 ctx.bug.TreeTests.List = append(ctx.bug.TreeTests.List, item) 427 bugTreeTest = &ctx.bug.TreeTests.List[len(ctx.bug.TreeTests.List)-1] 428 } 429 430 if bugTreeTest.Error != "" { 431 const errorRetryTime = 24 * time.Hour * 14 432 result := ctx.ensureRepeatPeriod(bugTreeTest.Error, errorRetryTime) 433 if _, ok := result.(pollResultSkip); !ok { 434 return result 435 } 436 bugTreeTest.Error = "" 437 } 438 if bugTreeTest.Last != "" { 439 const fixRetryTime = 24 * time.Hour * 45 440 result := ctx.ensureRepeatPeriod(bugTreeTest.Last, fixRetryTime) 441 if _, ok := result.(pollResultSkip); !ok { 442 return result 443 } 444 } 445 var err error 446 ctx.job, ctx.jobKey, err = addTestJob(ctx.c, &testJobArgs{ 447 crash: ctx.crash, 448 crashKey: ctx.crashKey, 449 configRef: ctx.build.KernelConfig, 450 configAppend: repo.AppendConfig, 451 inTransaction: true, 452 treeOrigin: true, 453 testReqArgs: testReqArgs{ 454 bug: ctx.bug, 455 bugKey: ctx.bugKey, 456 repo: bugTreeTest.Repo, 457 branch: bugTreeTest.Branch, 458 mergeBaseRepo: bugTreeTest.MergeBaseRepo, 459 mergeBaseBranch: bugTreeTest.MergeBaseBranch, 460 }, 461 }) 462 if err != nil { 463 return pollResultError(err) 464 } 465 bugTreeTest.Pending = ctx.jobKey.Encode() 466 return pollResultPending{} 467 } 468 469 func (ctx *bugTreeContext) ensureRepeatPeriod(jobKey string, period time.Duration) pollTreeJobResult { 470 job, _, err := fetchJob(ctx.c, jobKey) 471 if err != nil { 472 return pollResultError(err) 473 } 474 timePassed := timeNow(ctx.c).Sub(job.Finished) 475 if timePassed < period { 476 return pollResultWait(job.Finished.Add(period)) 477 } 478 return pollResultSkip{} 479 } 480 481 func (bug *Bug) findResult(c context.Context, 482 repo KernelRepo, result expectedResult, runOn runReproOn) (pollTreeJobResult, *Job) { 483 anyPending := false 484 for _, i := range bug.matchingTreeTests(repo, runOn) { 485 info := &bug.TreeTests.List[i] 486 anyPending = anyPending || info.Pending != "" 487 key := "" 488 switch result.(type) { 489 case wantFirstOK: 490 key = info.FirstOK 491 case wantFirstCrash: 492 key = info.FirstCrash 493 case wantFirstAny: 494 key = info.First 495 case wantNewAny: 496 key = info.Last 497 default: 498 return pollResultError(fmt.Errorf("unexpected expected result: %T", result)), nil 499 } 500 if key == "" { 501 continue 502 } 503 job, _, err := fetchJob(c, key) 504 if err != nil { 505 return pollResultError(err), nil 506 } 507 if date, ok := result.(wantNewAny); ok { 508 if job.Finished.Before(time.Time(date)) { 509 continue 510 } 511 } 512 return pollResultDone{ 513 Crashed: job.CrashTitle != "", 514 Finished: job.Finished, 515 }, job 516 } 517 if anyPending { 518 return pollResultPending{}, nil 519 } else { 520 return pollResultSkip{}, nil 521 } 522 } 523 524 func (bug *Bug) matchingTreeTests(repo KernelRepo, runOn runReproOn) []int { 525 ret := []int{} 526 for i, item := range bug.TreeTests.List { 527 if item.Repo != repo.URL { 528 continue 529 } 530 ok := true 531 switch v := runOn.(type) { 532 case runOnHEAD: 533 // TODO: should we check for an empty merge base here? 534 ok = item.Branch == repo.Branch 535 case runOnMergeBase: 536 ok = item.Branch == repo.Branch && 537 item.MergeBaseRepo == v.Repo && 538 item.MergeBaseBranch == v.Branch 539 } 540 if ok { 541 ret = append(ret, i) 542 } 543 } 544 return ret 545 } 546 547 func (ctx *bugTreeContext) loadCrashInfo() error { 548 // First look at the crash from previous tests. 549 if len(ctx.bug.TreeTests.List) > 0 { 550 crashID := ctx.bug.TreeTests.List[len(ctx.bug.TreeTests.List)-1].CrashID 551 crashKey := db.NewKey(ctx.c, "Crash", "", crashID, ctx.bugKey) 552 crash := new(Crash) 553 // We need to also tolerate the case when the crash was just deleted. 554 err := db.Get(ctx.cGlobal, crashKey, crash) 555 if err != nil && err != db.ErrNoSuchEntity { 556 return fmt.Errorf("failed to get crash: %w", err) 557 } else if err == nil { 558 ok, build, err := ctx.isCrashRelevant(crash) 559 if err != nil { 560 return err 561 } 562 if ok { 563 ctx.build = build 564 ctx.crash = crash 565 ctx.crashKey = crashKey 566 } 567 } 568 } 569 570 // Query the most relevant crash with repro. 571 crash, crashKey, err := findCrashForBug(ctx.cGlobal, ctx.bug) 572 if err != nil { 573 return err 574 } 575 ok, build, err := ctx.isCrashRelevant(crash) 576 if err != nil { 577 return err 578 } else if ok && (ctx.crash == nil || crash.ReportLen > ctx.crash.ReportLen) { 579 // Update the crash only if we found a better one. 580 ctx.build = build 581 ctx.crash = crash 582 ctx.crashKey = crashKey 583 } 584 // Load the rest of the data. 585 if ctx.crash != nil { 586 var err error 587 ns := ctx.bug.Namespace 588 repoGraph, err := makeRepoGraph(getNsConfig(ctx.c, ns).Repos) 589 if err != nil { 590 return err 591 } 592 ctx.repoNode = repoGraph.nodeByRepo(ctx.build.KernelRepo, ctx.build.KernelBranch) 593 } 594 return nil 595 } 596 597 func (ctx *bugTreeContext) isCrashRelevant(crash *Crash) (bool, *Build, error) { 598 if crash.ReproIsRevoked { 599 // No sense in running the reproducer. 600 return false, nil, nil 601 } else if crash.ReproC == 0 && crash.ReproSyz == 0 { 602 // Let's wait for the repro. 603 return false, nil, nil 604 } 605 newManager, _ := activeManager(ctx.cGlobal, crash.Manager, ctx.bug.Namespace) 606 if newManager != crash.Manager { 607 // The manager was deprecated since the crash. 608 // Let's just ignore such bugs for now. 609 return false, nil, nil 610 } 611 build, err := loadBuild(ctx.cGlobal, ctx.bug.Namespace, crash.BuildID) 612 if err != nil { 613 return false, nil, err 614 } 615 mgrBuild, err := lastManagerBuild(ctx.cGlobal, build.Namespace, newManager) 616 if err != nil { 617 return false, build, err 618 } 619 // It does happen that we sometimes update the tested tree. 620 // It's not frequent at all, but it will make all results very confusing. 621 return build.KernelRepo == mgrBuild.KernelRepo && 622 build.KernelBranch == mgrBuild.KernelBranch, build, nil 623 } 624 625 func (test *BugTreeTest) applyPending(c context.Context) error { 626 if test.Pending == "" { 627 return nil 628 } 629 job, _, err := fetchJob(c, test.Pending) 630 if err != nil { 631 return err 632 } 633 if job.Finished.IsZero() { 634 // Not yet ready. 635 return nil 636 } 637 pendingKey := test.Pending 638 test.Pending = "" 639 if job.Error != 0 { 640 test.Error = pendingKey 641 return nil 642 } 643 test.Last = pendingKey 644 if test.First == "" { 645 test.First = pendingKey 646 } 647 if test.FirstOK == "" && job.CrashTitle == "" { 648 test.FirstOK = pendingKey 649 } else if test.FirstCrash == "" && job.CrashTitle != "" { 650 test.FirstCrash = pendingKey 651 } 652 return nil 653 } 654 655 // treeTestJobs fetches relevant tree testing results. 656 func treeTestJobs(c context.Context, bug *Bug) ([]*dashapi.JobInfo, error) { 657 g, _ := errgroup.WithContext(context.Background()) 658 jobIDs := make(chan string) 659 660 var ret []*dashapi.JobInfo 661 var mu sync.Mutex 662 663 // The underlying code makes a number of queries, so let's do it in parallel to speed up processing. 664 const threads = 3 665 for i := 0; i < threads; i++ { 666 g.Go(func() error { 667 for id := range jobIDs { 668 job, jobKey, err := fetchJob(c, id) 669 if err != nil { 670 return err 671 } 672 build, err := loadBuild(c, job.Namespace, job.BuildID) 673 if err != nil { 674 return err 675 } 676 crashKey := db.NewKey(c, "Crash", "", job.CrashID, bug.key(c)) 677 crash := new(Crash) 678 if err := db.Get(c, crashKey, crash); err != nil { 679 return fmt.Errorf("failed to get crash: %w", err) 680 } 681 info := makeJobInfo(c, job, jobKey, bug, build, crash) 682 mu.Lock() 683 ret = append(ret, info) 684 mu.Unlock() 685 } 686 return nil 687 }) 688 } 689 for _, info := range bug.TreeTests.List { 690 if info.FirstOK != "" { 691 jobIDs <- info.FirstOK 692 } 693 if info.FirstCrash != "" { 694 jobIDs <- info.FirstCrash 695 } 696 if info.Error != "" { 697 jobIDs <- info.Error 698 } 699 } 700 // Wait until we have all information. 701 close(jobIDs) 702 err := g.Wait() 703 if err != nil { 704 return nil, err 705 } 706 // Sort structures to keep output consistent. 707 sort.Slice(ret, func(i, j int) bool { 708 if ret[i].KernelAlias != ret[j].KernelAlias { 709 return ret[i].KernelAlias < ret[j].KernelAlias 710 } 711 return ret[i].Finished.Before(ret[j].Finished) 712 }) 713 return ret, nil 714 } 715 716 // Create a cross-tree bisection job (if needed). 717 // Returns: 718 // a) Job object and its key -- in case of success. 719 // b) Whether the lookup was expensive (it can help optimize crossTreeBisection calls). 720 func crossTreeBisection(c context.Context, bug *Bug, 721 managers map[string]dashapi.ManagerJobs) (*Job, *db.Key, bool, error) { 722 repoGraph, err := makeRepoGraph(getNsConfig(c, bug.Namespace).Repos) 723 if err != nil { 724 return nil, nil, false, err 725 } 726 bugJobs := &lazyJobList{ 727 c: c, 728 bug: bug, 729 jobType: JobBisectFix, 730 } 731 var job *Job 732 var jobKey *db.Key 733 expensive := false 734 err = repoGraph.forEachEdge(func(from, to *repoNode, info KernelRepoLink) error { 735 if jobKey != nil { 736 return nil 737 } 738 if !info.BisectFixes { 739 return nil 740 } 741 expensive = true 742 log.Infof(c, "%s: considering cross-tree bisection %s/%s", 743 bug.displayTitle(), from.repo.Alias, to.repo.Alias) 744 _, crashJob := bug.findResult(c, to.repo, wantNewAny{}, runOnHEAD{}) 745 if crashJob == nil { 746 // No patch testing was performed yet. 747 return nil 748 } 749 if crashJob.CrashTitle == "" { 750 // The bug is already fixed on the target tree. 751 return nil 752 } 753 crashBuild, err := loadBuild(c, bug.Namespace, crashJob.BuildID) 754 if err != nil { 755 return err 756 } 757 manager, _ := activeManager(c, crashJob.Manager, crashJob.Namespace) 758 if !managers[manager].BisectFix { 759 return nil 760 } 761 _, successJob := bug.findResult(c, from.repo, wantNewAny{}, runOnHEAD{}) 762 if successJob == nil { 763 // The jobs is not done yet. 764 return nil 765 } 766 if successJob.CrashTitle != "" { 767 // The kernel tree is still crashed by the repro. 768 return nil 769 } 770 newJob := &Job{ 771 Type: JobBisectFix, 772 Created: timeNow(c), 773 Namespace: bug.Namespace, 774 Manager: crashJob.Manager, 775 BisectFrom: crashBuild.KernelCommit, 776 KernelRepo: from.repo.URL, 777 KernelBranch: from.repo.Branch, 778 MergeBaseRepo: to.repo.URL, 779 MergeBaseBranch: to.repo.Branch, 780 BugTitle: bug.displayTitle(), 781 CrashID: crashJob.CrashID, 782 } 783 // It's expected that crossTreeBisection is not concurrently called with the same 784 // manager list. 785 prevJob, err := bugJobs.lastMatch(newJob) 786 if err != nil { 787 return err 788 } 789 const repeatPeriod = time.Hour * 24 * 30 790 if prevJob != nil && (prevJob.Error == 0 || 791 prevJob.Finished.After(timeNow(c).Add(-repeatPeriod))) { 792 // The job is already pending or failed recently. Skip. 793 return nil 794 } 795 job = newJob 796 jobKey, err = saveJob(c, newJob, bug.key(c)) 797 return err 798 }) 799 return job, jobKey, expensive, err 800 } 801 802 type lazyJobList struct { 803 c context.Context 804 bug *Bug 805 jobType JobType 806 jobs *bugJobs 807 } 808 809 func (list *lazyJobList) lastMatch(job *Job) (*Job, error) { 810 if list.jobs == nil { 811 var err error 812 list.jobs, err = queryBugJobs(list.c, list.bug, list.jobType) 813 if err != nil { 814 return nil, err 815 } 816 } 817 var best *Job 818 for _, item := range list.jobs.all() { 819 otherJob := item.job 820 same := otherJob.Manager == job.Manager && 821 otherJob.KernelRepo == job.KernelRepo && 822 otherJob.KernelBranch == job.KernelBranch && 823 otherJob.CrashID == job.CrashID && 824 otherJob.MergeBaseRepo == job.MergeBaseRepo && 825 otherJob.MergeBaseBranch == job.MergeBaseBranch 826 if !same { 827 continue 828 } 829 if best == nil || best.Created.Before(otherJob.Created) { 830 best = otherJob 831 } 832 } 833 return best, nil 834 } 835 836 func doneCrossTreeBisection(c context.Context, jobKey *db.Key, job *Job) error { 837 if job.Type != JobBisectFix || job.MergeBaseRepo == "" { 838 // Not a cross tree bisection. 839 return nil 840 } 841 if job.Error != 0 || job.isUnreliableBisect() || len(job.Commits) != 1 { 842 // The result is not interesting. 843 return nil 844 } 845 return updateSingleBug(c, jobKey.Parent(), func(bug *Bug) error { 846 bug.FixCandidateJob = jobKey.Encode() 847 return nil 848 }) 849 } 850 851 type repoNode struct { 852 repo KernelRepo 853 edges []repoEdge 854 } 855 856 type repoEdge struct { 857 in bool 858 info KernelRepoLink 859 other *repoNode 860 } 861 862 type repoGraph struct { 863 nodes map[string]*repoNode 864 } 865 866 func makeRepoGraph(repos []KernelRepo) (*repoGraph, error) { 867 g := &repoGraph{ 868 nodes: map[string]*repoNode{}, 869 } 870 for _, repo := range repos { 871 if repo.Alias == "" { 872 return nil, fmt.Errorf("one of the repos has an empty alias") 873 } 874 g.nodes[repo.Alias] = &repoNode{repo: repo} 875 } 876 for _, repo := range repos { 877 for _, link := range repo.CommitInflow { 878 if g.nodes[link.Alias] == nil { 879 return nil, fmt.Errorf("no repo with alias %q", link.Alias) 880 } 881 g.nodes[repo.Alias].addEdge(true, link, g.nodes[link.Alias]) 882 g.nodes[link.Alias].addEdge(false, link, g.nodes[repo.Alias]) 883 } 884 } 885 for alias, node := range g.nodes { 886 reachable := node.reachable(true) 887 if _, ok := reachable[node]; ok { 888 return nil, fmt.Errorf("%q lies on a cycle", alias) 889 } 890 } 891 return g, nil 892 } 893 894 func (g *repoGraph) nodeByRepo(url, branch string) *repoNode { 895 for _, node := range g.nodes { 896 if node.repo.URL == url && node.repo.Branch == branch { 897 return node 898 } 899 } 900 return nil 901 } 902 903 func (g *repoGraph) nodeByAlias(alias string) *repoNode { 904 for _, node := range g.nodes { 905 if node.repo.Alias == alias { 906 return node 907 } 908 } 909 return nil 910 } 911 912 func (g *repoGraph) forEachEdge(cb func(from, to *repoNode, info KernelRepoLink) error) error { 913 for _, node := range g.nodes { 914 for _, e := range node.edges { 915 if !e.in { 916 continue 917 } 918 err := cb(e.other, node, e.info) 919 if err != nil { 920 return err 921 } 922 } 923 } 924 return nil 925 } 926 927 // reachable returns a map *repoNode -> bool (whether commits are merged). 928 func (n *repoNode) reachable(in bool) map[*repoNode]bool { 929 ret := map[*repoNode]bool{} 930 // First collect nodes only reachable via merge=true links. 931 n.reachableMerged(in, true, ret) 932 n.reachableMerged(in, false, ret) 933 return ret 934 } 935 936 func (n *repoNode) reachableMerged(in, onlyMerge bool, ret map[*repoNode]bool) { 937 var dfs func(*repoNode, bool) 938 dfs = func(node *repoNode, merge bool) { 939 for _, edge := range node.edges { 940 if edge.in != in || onlyMerge && !edge.info.Merge { 941 continue 942 } 943 if _, ok := ret[edge.other]; ok { 944 continue 945 } 946 ret[edge.other] = merge && edge.info.Merge 947 dfs(edge.other, merge && edge.info.Merge) 948 } 949 } 950 dfs(n, true) 951 } 952 953 func (n *repoNode) allReachable() map[*repoNode]bool { 954 ret := n.reachable(true) 955 for node, merge := range n.reachable(false) { 956 ret[node] = merge 957 } 958 return ret 959 } 960 961 func (n *repoNode) addEdge(in bool, info KernelRepoLink, other *repoNode) { 962 n.edges = append(n.edges, repoEdge{ 963 in: in, 964 info: info, 965 other: other, 966 }) 967 }