github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/dashboard/app/tree.go (about) 1 // Copyright 2023 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package main 5 6 // Given information on how commits flow from one kernel source tree to another, assign 7 // bugs labels of two kinds: 8 // a) LabelIntroduced -- reproducer does not work in any other kernel tree, FROM which commits flow. 9 // b) LabelReached -- reproducer does not work in any other kernel tree, TO which commits flow. 10 11 import ( 12 "context" 13 "fmt" 14 "sort" 15 "sync" 16 "time" 17 18 "github.com/google/syzkaller/dashboard/dashapi" 19 "golang.org/x/sync/errgroup" 20 db "google.golang.org/appengine/v2/datastore" 21 "google.golang.org/appengine/v2/log" 22 ) 23 24 // generateTreeOriginJobs generates new jobs for bug origin tree determination. 25 func generateTreeOriginJobs(cGlobal context.Context, bugKey *db.Key, 26 managers map[string]dashapi.ManagerJobs) (*Job, *db.Key, error) { 27 var job *Job 28 var jobKey *db.Key 29 tx := func(c context.Context) error { 30 bug := new(Bug) 31 if err := db.Get(c, bugKey, bug); err != nil { 32 return fmt.Errorf("failed to get bug: %w", err) 33 } 34 ctx := &bugTreeContext{ 35 c: c, 36 cGlobal: cGlobal, 37 bug: bug, 38 bugKey: bug.key(c), 39 } 40 ret := ctx.pollBugTreeJobs(managers) 41 switch ret.(type) { 42 case pollResultError: 43 return ret.(error) 44 case pollResultWait: 45 newTime, ok := ret.(time.Time) 46 if ok && newTime.After(bug.TreeTests.NextPoll) { 47 bug.TreeTests.NextPoll = newTime 48 } 49 } 50 bug.TreeTests.NeedPoll = false 51 if _, err := db.Put(c, bugKey, bug); err != nil { 52 return fmt.Errorf("failed to put bug: %w", err) 53 } 54 job, jobKey = ctx.job, ctx.jobKey 55 return nil 56 } 57 if err := runInTransaction(cGlobal, tx, &db.TransactionOptions{XG: true}); err != nil { 58 return nil, nil, err 59 } 60 return job, jobKey, nil 61 } 62 63 // treeOriginJobDone is supposed to be called when tree origin job is done. 64 // It keeps the cached info in Bug up to date and assigns bug tree origin labels. 65 func treeOriginJobDone(cGlobal context.Context, jobKey *db.Key, job *Job) error { 66 bugKey := jobKey.Parent() 67 tx := func(c context.Context) error { 68 bug := new(Bug) 69 if err := db.Get(c, bugKey, bug); err != nil { 70 return fmt.Errorf("failed to get bug: %w", err) 71 } 72 ctx := &bugTreeContext{ 73 c: c, 74 cGlobal: cGlobal, 75 bug: bug, 76 bugKey: bug.key(c), 77 noNewJobs: true, 78 } 79 ret := ctx.pollBugTreeJobs( 80 map[string]dashapi.ManagerJobs{job.Manager: {TestPatches: true}}, 81 ) 82 switch ret.(type) { 83 case pollResultError: 84 return ret.(error) 85 case pollResultPending: 86 bug.TreeTests.NextPoll = time.Time{} 87 bug.TreeTests.NeedPoll = true 88 } 89 if _, err := db.Put(c, bugKey, bug); err != nil { 90 return fmt.Errorf("failed to put bug: %w", err) 91 } 92 return nil 93 } 94 return runInTransaction(cGlobal, tx, &db.TransactionOptions{XG: true}) 95 } 96 97 type pollTreeJobResult interface{} 98 99 // pollResultPending is returned when we wait some job to finish. 100 type pollResultPending struct{} 101 102 // pollResultWait is returned when we know the next time the process could be repeated. 103 type pollResultWait time.Time 104 105 // pollResultSkip means that there are no poll jobs we could run at the moment. 106 // It's impossible to say when it changes, so it's better not to repeat polling soon. 107 type pollResultSkip struct{} 108 109 type pollResultError error 110 111 type pollResultDone struct { 112 Crashed bool 113 Finished time.Time 114 } 115 116 type bugTreeContext struct { 117 c context.Context 118 // Datastore puts limits on how often a single entity can be accessed by transactions. 119 // And we actually don't always need a consistent view of the DB, we just want to query 120 // a single entity. So, when possible, let's make queries outside of a transaction. 121 cGlobal context.Context 122 crash *Crash 123 crashKey *db.Key 124 bugKey *db.Key 125 bug *Bug 126 build *Build 127 repoNode *repoNode 128 noNewJobs bool 129 130 // If any jobs were created, here'll be one of them. 131 job *Job 132 jobKey *db.Key 133 } 134 135 func (ctx *bugTreeContext) pollBugTreeJobs(managers map[string]dashapi.ManagerJobs) pollTreeJobResult { 136 // Determine the crash we'd stick to. 137 err := ctx.loadCrashInfo() 138 if err != nil { 139 log.Errorf(ctx.c, "bug %q: failed to load crash info: %s", ctx.bug.displayTitle(), err) 140 return pollResultError(err) 141 } 142 if ctx.crash == nil { 143 // There are no crashes we could further work with. 144 // TODO: consider looking at the recent repro retest results. 145 log.Infof(ctx.c, "bug %q: no suitable crash", ctx.bug.displayTitle()) 146 return pollResultSkip{} 147 } 148 if ctx.repoNode == nil { 149 // We have no information about the tree on which the bug happened. 150 log.Errorf(ctx.c, "bug %q: no information about the tree", ctx.bug.displayTitle()) 151 return pollResultSkip{} 152 } 153 if !managers[ctx.crash.Manager].TestPatches { 154 return pollResultSkip{} 155 } 156 if len(ctx.bug.TreeTests.List) > 0 && ctx.crashKey.IntID() != ctx.bug.TreeTests.List[0].CrashID { 157 // Clean up old job records, they are no longer relevant. 158 ctx.bug.TreeTests.List = nil 159 } 160 for i := range ctx.bug.TreeTests.List { 161 err := ctx.bug.TreeTests.List[i].applyPending(ctx.c) 162 if err != nil { 163 return pollResultError(err) 164 } 165 } 166 return ctx.groupResults([]pollTreeJobResult{ 167 ctx.setOriginLabels(), 168 ctx.missingBackports(), 169 }) 170 } 171 172 func (ctx *bugTreeContext) setOriginLabels() pollTreeJobResult { 173 if !ctx.labelsCanBeSet() || ctx.bug.HasUserLabel(OriginLabel) { 174 return pollResultSkip{} 175 } 176 ctx.bug.UnsetLabels(OriginLabel) 177 178 var results []pollTreeJobResult 179 perNode := map[*repoNode]pollTreeJobResult{} 180 for node, merge := range ctx.repoNode.allReachable() { 181 var result pollTreeJobResult 182 if merge { 183 // Merge base gives a much better result quality, so use it whenever possible. 184 result = ctx.runRepro(node.repo, wantFirstAny{}, runOnMergeBase{ 185 Repo: ctx.build.KernelRepo, 186 Branch: ctx.build.KernelBranch, 187 }) 188 } else { 189 result = ctx.runRepro(node.repo, wantFirstAny{}, runOnHEAD{}) 190 } 191 perNode[node] = result 192 results = append(results, result) 193 } 194 result := ctx.groupResults(results) 195 if _, ok := result.(pollResultPending); ok { 196 // At least wait until all started jobs have finished (successfully or not). 197 return result 198 } 199 lastDone := ctx.lastDone(results) 200 if lastDone.IsZero() { 201 // Demand that at least one of the finished jobs has finished successfully. 202 return pollResultSkip{} 203 } 204 // Since we have a repro for it, it definitely crashed at some point. 205 perNode[ctx.repoNode] = pollResultDone{Crashed: true} 206 allLabels := append(ctx.selectRepoLabels(true, perNode), ctx.selectRepoLabels(false, perNode)...) 207 for _, label := range allLabels { 208 if label == ctx.repoNode.repo.LabelIntroduced || label == ctx.repoNode.repo.LabelReached { 209 // It looks like our reproducer does not work on other trees. 210 // Just in case verify that it still works on the original one. 211 result := ctx.runRepro(ctx.repoNode.repo, wantNewAny(lastDone), runOnHEAD{}) 212 resultDone, ok := result.(pollResultDone) 213 if !ok { 214 return result 215 } 216 if !resultDone.Crashed { 217 // Unfortunately the repro no longer works. Don't assign labels. 218 return pollResultSkip{} 219 } 220 } 221 } 222 var labels []BugLabel 223 for _, label := range allLabels { 224 labels = append(labels, BugLabel{Label: OriginLabel, Value: label}) 225 } 226 ctx.bug.SetLabels(makeLabelSet(ctx.c, ctx.bug.Namespace), labels) 227 return pollResultSkip{} 228 } 229 230 // selectRepoLabels attributes bugs to trees depending on the patch testing results. 231 func (ctx *bugTreeContext) selectRepoLabels(in bool, results map[*repoNode]pollTreeJobResult) []string { 232 crashed := map[*repoNode]bool{} 233 for node, result := range results { 234 done, ok := result.(pollResultDone) 235 if ok { 236 crashed[node] = done.Crashed 237 } 238 } 239 for node := range crashed { 240 if !crashed[node] { 241 continue 242 } 243 // (1) The in = true case: 244 // If, for a tree X, there's a tree Y from which commits flow to X and the reproducer crashed 245 // on Y, X cannot be among bug origin trees. 246 // (1) The in = false case: 247 // If, for a tree X, there's a tree Y to which commits flow to X and the reproducer crashed 248 // on Y, X cannot be the last tree to which the bug has spread. 249 for otherNode := range node.reachable(!in) { 250 crashed[otherNode] = false 251 } 252 } 253 ret := []string{} 254 for node, set := range crashed { 255 if !set { 256 continue 257 } 258 if in && node.repo.LabelIntroduced != "" { 259 ret = append(ret, node.repo.LabelIntroduced) 260 } else if !in && node.repo.LabelReached != "" { 261 ret = append(ret, node.repo.LabelReached) 262 } 263 } 264 return ret 265 } 266 267 // Test if there's any sense in testing other trees. 268 // For example, if we hit a bug on a mainline, there's no sense to test linux-next to check 269 // if it's a linux-next bug. 270 func (ctx *bugTreeContext) labelsCanBeSet() bool { 271 for node := range ctx.repoNode.reachable(true) { 272 if node.repo.LabelIntroduced != "" { 273 return true 274 } 275 } 276 for node := range ctx.repoNode.reachable(false) { 277 if node.repo.LabelReached != "" { 278 return true 279 } 280 } 281 return ctx.repoNode.repo.LabelIntroduced != "" || 282 ctx.repoNode.repo.LabelReached != "" 283 } 284 285 func (ctx *bugTreeContext) missingBackports() pollTreeJobResult { 286 if !ctx.repoNode.repo.DetectMissingBackports || ctx.bug.HasUserLabel(MissingBackportLabel) { 287 return pollResultSkip{} 288 } 289 var okDate time.Time 290 results := []pollTreeJobResult{} 291 for node, merge := range ctx.repoNode.reachable(true) { 292 resultOK := ctx.runRepro(node.repo, wantFirstOK{}, runOnHEAD{}) 293 doneOK, ok := resultOK.(pollResultDone) 294 if !ok { 295 results = append(results, resultOK) 296 continue 297 } 298 var resultCrash pollTreeJobResult 299 if merge { 300 resultCrash = ctx.runRepro(node.repo, wantFirstAny{}, runOnMergeBase{ 301 Repo: ctx.build.KernelRepo, 302 Branch: ctx.build.KernelBranch, 303 }) 304 } else { 305 // We already know that the reproducer doesn't crash the tree. 306 // There'd be no sense to call runRepro in the hope of getting a crash, 307 // so let's just look into the past tree testing results. 308 resultCrash, _ = ctx.bug.findResult(ctx.c, node.repo, wantFirstCrash{}, runOnAny{}) 309 } 310 doneCrash, ok := resultCrash.(pollResultDone) 311 if !ok { 312 results = append(results, resultCrash) 313 continue 314 } else if merge && doneCrash.Crashed || doneOK.Finished.After(doneCrash.Finished) { 315 // That's what we want: earlier it crashed and then stopped. 316 okDate = doneOK.Finished 317 break 318 } 319 } 320 if okDate.IsZero() { 321 return ctx.groupResults(results) 322 } 323 // We are about to assign the "missing backport" label. 324 // To reduce the number of backports, just in case run once more on HEAD. 325 // The bug fix could have already reached the repository. 326 result := ctx.runRepro(ctx.repoNode.repo, wantNewAny(okDate), runOnHEAD{}) 327 resultDone, ok := result.(pollResultDone) 328 if !ok { 329 return result 330 } 331 ctx.bug.UnsetLabels(MissingBackportLabel) 332 if resultDone.Crashed { 333 ctx.bug.SetLabels(makeLabelSet(ctx.c, ctx.bug.Namespace), []BugLabel{ 334 {Label: MissingBackportLabel}, 335 }) 336 } 337 return pollResultSkip{} 338 } 339 340 func (ctx *bugTreeContext) lastDone(results []pollTreeJobResult) time.Time { 341 var maxTime time.Time 342 for _, item := range results { 343 done, ok := item.(pollResultDone) 344 if !ok { 345 continue 346 } 347 if done.Finished.After(maxTime) { 348 maxTime = done.Finished 349 } 350 } 351 return maxTime 352 } 353 354 func (ctx *bugTreeContext) groupResults(results []pollTreeJobResult) pollTreeJobResult { 355 var minWait time.Time 356 for _, result := range results { 357 switch v := result.(type) { 358 case pollResultPending, pollResultError: 359 // Wait for the job result to continue. 360 return result 361 case pollResultWait: 362 t := time.Time(v) 363 if minWait.IsZero() || minWait.After(t) { 364 minWait = t 365 } 366 } 367 } 368 if !minWait.IsZero() { 369 return pollResultWait(minWait) 370 } 371 return pollResultSkip{} 372 } 373 374 type expectedResult interface{} 375 376 // resultFreshness subtypes. 377 type wantFirstOK struct{} 378 type wantFirstCrash struct{} 379 type wantFirstAny struct{} 380 type wantNewAny time.Time 381 382 type runReproOn interface{} 383 384 // runReproOn subtypes. 385 type runOnAny struct{} // attempts to find any result, if unsuccessful, runs on HEAD 386 type runOnHEAD struct{} 387 type runOnMergeBase struct { 388 Repo string 389 Branch string 390 } 391 392 func (ctx *bugTreeContext) runRepro(repo KernelRepo, result expectedResult, runOn runReproOn) pollTreeJobResult { 393 ret := ctx.doRunRepro(repo, result, runOn) 394 log.Infof(ctx.c, "runRepro on %s, %T, %T: %#v", repo.Alias, result, runOn, ret) 395 return ret 396 } 397 398 func (ctx *bugTreeContext) doRunRepro(repo KernelRepo, result expectedResult, runOn runReproOn) pollTreeJobResult { 399 existingResult, _ := ctx.bug.findResult(ctx.c, repo, result, runOn) 400 if _, ok := existingResult.(pollResultSkip); !ok { 401 return existingResult 402 } 403 // Okay, nothing suitable was found. We need to set up a new job. 404 if ctx.noNewJobs { 405 return pollResultPending{} 406 } 407 // First check if there's existing BugTreeTest object. 408 if _, ok := runOn.(runOnAny); ok { 409 runOn = runOnHEAD{} 410 } 411 candidates := ctx.bug.matchingTreeTests(repo, runOn) 412 var bugTreeTest *BugTreeTest 413 if len(candidates) > 0 { 414 bugTreeTest = &ctx.bug.TreeTests.List[candidates[0]] 415 } else { 416 item := BugTreeTest{ 417 CrashID: ctx.crashKey.IntID(), 418 Repo: repo.URL, 419 Branch: repo.Branch, 420 } 421 if v, ok := runOn.(runOnMergeBase); ok { 422 item.MergeBaseRepo = v.Repo 423 item.MergeBaseBranch = v.Branch 424 } 425 ctx.bug.TreeTests.List = append(ctx.bug.TreeTests.List, item) 426 bugTreeTest = &ctx.bug.TreeTests.List[len(ctx.bug.TreeTests.List)-1] 427 } 428 429 if bugTreeTest.Error != "" { 430 const errorRetryTime = 24 * time.Hour * 14 431 result := ctx.ensureRepeatPeriod(bugTreeTest.Error, errorRetryTime) 432 if _, ok := result.(pollResultSkip); !ok { 433 return result 434 } 435 bugTreeTest.Error = "" 436 } 437 if bugTreeTest.Last != "" { 438 const fixRetryTime = 24 * time.Hour * 45 439 result := ctx.ensureRepeatPeriod(bugTreeTest.Last, fixRetryTime) 440 if _, ok := result.(pollResultSkip); !ok { 441 return result 442 } 443 } 444 var err error 445 ctx.job, ctx.jobKey, err = addTestJob(ctx.c, &testJobArgs{ 446 crash: ctx.crash, 447 crashKey: ctx.crashKey, 448 configRef: ctx.build.KernelConfig, 449 configAppend: repo.AppendConfig, 450 inTransaction: true, 451 treeOrigin: true, 452 testReqArgs: testReqArgs{ 453 bug: ctx.bug, 454 bugKey: ctx.bugKey, 455 repo: bugTreeTest.Repo, 456 branch: bugTreeTest.Branch, 457 mergeBaseRepo: bugTreeTest.MergeBaseRepo, 458 mergeBaseBranch: bugTreeTest.MergeBaseBranch, 459 }, 460 }) 461 if err != nil { 462 return pollResultError(err) 463 } 464 bugTreeTest.Pending = ctx.jobKey.Encode() 465 return pollResultPending{} 466 } 467 468 func (ctx *bugTreeContext) ensureRepeatPeriod(jobKey string, period time.Duration) pollTreeJobResult { 469 job, _, err := fetchJob(ctx.c, jobKey) 470 if err != nil { 471 return pollResultError(err) 472 } 473 timePassed := timeNow(ctx.c).Sub(job.Finished) 474 if timePassed < period { 475 return pollResultWait(job.Finished.Add(period)) 476 } 477 return pollResultSkip{} 478 } 479 480 func (bug *Bug) findResult(c context.Context, 481 repo KernelRepo, result expectedResult, runOn runReproOn) (pollTreeJobResult, *Job) { 482 anyPending := false 483 for _, i := range bug.matchingTreeTests(repo, runOn) { 484 info := &bug.TreeTests.List[i] 485 anyPending = anyPending || info.Pending != "" 486 key := "" 487 switch result.(type) { 488 case wantFirstOK: 489 key = info.FirstOK 490 case wantFirstCrash: 491 key = info.FirstCrash 492 case wantFirstAny: 493 key = info.First 494 case wantNewAny: 495 key = info.Last 496 default: 497 return pollResultError(fmt.Errorf("unexpected expected result: %T", result)), nil 498 } 499 if key == "" { 500 continue 501 } 502 job, _, err := fetchJob(c, key) 503 if err != nil { 504 return pollResultError(err), nil 505 } 506 if date, ok := result.(wantNewAny); ok { 507 if job.Finished.Before(time.Time(date)) { 508 continue 509 } 510 } 511 return pollResultDone{ 512 Crashed: job.CrashTitle != "", 513 Finished: job.Finished, 514 }, job 515 } 516 if anyPending { 517 return pollResultPending{}, nil 518 } else { 519 return pollResultSkip{}, nil 520 } 521 } 522 523 func (bug *Bug) matchingTreeTests(repo KernelRepo, runOn runReproOn) []int { 524 ret := []int{} 525 for i, item := range bug.TreeTests.List { 526 if item.Repo != repo.URL { 527 continue 528 } 529 ok := true 530 switch v := runOn.(type) { 531 case runOnHEAD: 532 // TODO: should we check for an empty merge base here? 533 ok = item.Branch == repo.Branch 534 case runOnMergeBase: 535 ok = item.Branch == repo.Branch && 536 item.MergeBaseRepo == v.Repo && 537 item.MergeBaseBranch == v.Branch 538 } 539 if ok { 540 ret = append(ret, i) 541 } 542 } 543 return ret 544 } 545 546 func (ctx *bugTreeContext) loadCrashInfo() error { 547 // First look at the crash from previous tests. 548 if len(ctx.bug.TreeTests.List) > 0 { 549 crashID := ctx.bug.TreeTests.List[len(ctx.bug.TreeTests.List)-1].CrashID 550 crashKey := db.NewKey(ctx.c, "Crash", "", crashID, ctx.bugKey) 551 crash := new(Crash) 552 // We need to also tolerate the case when the crash was just deleted. 553 err := db.Get(ctx.cGlobal, crashKey, crash) 554 if err != nil && err != db.ErrNoSuchEntity { 555 return fmt.Errorf("failed to get crash: %w", err) 556 } else if err == nil { 557 ok, build, err := ctx.isCrashRelevant(crash) 558 if err != nil { 559 return err 560 } 561 if ok { 562 ctx.build = build 563 ctx.crash = crash 564 ctx.crashKey = crashKey 565 } 566 } 567 } 568 569 // Query the most relevant crash with repro. 570 crash, crashKey, err := findCrashForBug(ctx.cGlobal, ctx.bug) 571 if err != nil { 572 return err 573 } 574 ok, build, err := ctx.isCrashRelevant(crash) 575 if err != nil { 576 return err 577 } else if ok && (ctx.crash == nil || crash.ReportLen > ctx.crash.ReportLen) { 578 // Update the crash only if we found a better one. 579 ctx.build = build 580 ctx.crash = crash 581 ctx.crashKey = crashKey 582 } 583 // Load the rest of the data. 584 if ctx.crash != nil { 585 var err error 586 ns := ctx.bug.Namespace 587 repoGraph, err := makeRepoGraph(getNsConfig(ctx.c, ns).Repos) 588 if err != nil { 589 return err 590 } 591 ctx.repoNode = repoGraph.nodeByRepo(ctx.build.KernelRepo, ctx.build.KernelBranch) 592 } 593 return nil 594 } 595 596 func (ctx *bugTreeContext) isCrashRelevant(crash *Crash) (bool, *Build, error) { 597 if crash.ReproIsRevoked { 598 // No sense in running the reproducer. 599 return false, nil, nil 600 } else if crash.ReproC == 0 && crash.ReproSyz == 0 { 601 // Let's wait for the repro. 602 return false, nil, nil 603 } 604 newManager, _ := activeManager(ctx.cGlobal, crash.Manager, ctx.bug.Namespace) 605 if newManager != crash.Manager { 606 // The manager was deprecated since the crash. 607 // Let's just ignore such bugs for now. 608 return false, nil, nil 609 } 610 build, err := loadBuild(ctx.cGlobal, ctx.bug.Namespace, crash.BuildID) 611 if err != nil { 612 return false, nil, err 613 } 614 mgrBuild, err := lastManagerBuild(ctx.cGlobal, build.Namespace, newManager) 615 if err != nil { 616 return false, build, err 617 } 618 // It does happen that we sometimes update the tested tree. 619 // It's not frequent at all, but it will make all results very confusing. 620 return build.KernelRepo == mgrBuild.KernelRepo && 621 build.KernelBranch == mgrBuild.KernelBranch, build, nil 622 } 623 624 func (test *BugTreeTest) applyPending(c context.Context) error { 625 if test.Pending == "" { 626 return nil 627 } 628 job, _, err := fetchJob(c, test.Pending) 629 if err != nil { 630 return err 631 } 632 if job.Finished.IsZero() { 633 // Not yet ready. 634 return nil 635 } 636 pendingKey := test.Pending 637 test.Pending = "" 638 if job.Error != 0 { 639 test.Error = pendingKey 640 return nil 641 } 642 test.Last = pendingKey 643 if test.First == "" { 644 test.First = pendingKey 645 } 646 if test.FirstOK == "" && job.CrashTitle == "" { 647 test.FirstOK = pendingKey 648 } else if test.FirstCrash == "" && job.CrashTitle != "" { 649 test.FirstCrash = pendingKey 650 } 651 return nil 652 } 653 654 // treeTestJobs fetches relevant tree testing results. 655 func treeTestJobs(c context.Context, bug *Bug) ([]*dashapi.JobInfo, error) { 656 g, _ := errgroup.WithContext(context.Background()) 657 jobIDs := make(chan string) 658 659 var ret []*dashapi.JobInfo 660 var mu sync.Mutex 661 662 // The underlying code makes a number of queries, so let's do it in parallel to speed up processing. 663 const threads = 3 664 for i := 0; i < threads; i++ { 665 g.Go(func() error { 666 for id := range jobIDs { 667 job, jobKey, err := fetchJob(c, id) 668 if err != nil { 669 return err 670 } 671 build, err := loadBuild(c, job.Namespace, job.BuildID) 672 if err != nil { 673 return err 674 } 675 crashKey := db.NewKey(c, "Crash", "", job.CrashID, bug.key(c)) 676 crash := new(Crash) 677 if err := db.Get(c, crashKey, crash); err != nil { 678 return fmt.Errorf("failed to get crash: %w", err) 679 } 680 info := makeJobInfo(c, job, jobKey, bug, build, crash) 681 mu.Lock() 682 ret = append(ret, info) 683 mu.Unlock() 684 } 685 return nil 686 }) 687 } 688 for _, info := range bug.TreeTests.List { 689 if info.FirstOK != "" { 690 jobIDs <- info.FirstOK 691 } 692 if info.FirstCrash != "" { 693 jobIDs <- info.FirstCrash 694 } 695 if info.Error != "" { 696 jobIDs <- info.Error 697 } 698 } 699 // Wait until we have all information. 700 close(jobIDs) 701 err := g.Wait() 702 if err != nil { 703 return nil, err 704 } 705 // Sort structures to keep output consistent. 706 sort.Slice(ret, func(i, j int) bool { 707 if ret[i].KernelAlias != ret[j].KernelAlias { 708 return ret[i].KernelAlias < ret[j].KernelAlias 709 } 710 return ret[i].Finished.Before(ret[j].Finished) 711 }) 712 return ret, nil 713 } 714 715 // Create a cross-tree bisection job (if needed). 716 // Returns: 717 // a) Job object and its key -- in case of success. 718 // b) Whether the lookup was expensive (it can help optimize crossTreeBisection calls). 719 func crossTreeBisection(c context.Context, bug *Bug, 720 managers map[string]dashapi.ManagerJobs) (*Job, *db.Key, bool, error) { 721 repoGraph, err := makeRepoGraph(getNsConfig(c, bug.Namespace).Repos) 722 if err != nil { 723 return nil, nil, false, err 724 } 725 bugJobs := &lazyJobList{ 726 c: c, 727 bug: bug, 728 jobType: JobBisectFix, 729 } 730 var job *Job 731 var jobKey *db.Key 732 expensive := false 733 err = repoGraph.forEachEdge(func(from, to *repoNode, info KernelRepoLink) error { 734 if jobKey != nil { 735 return nil 736 } 737 if !info.BisectFixes { 738 return nil 739 } 740 expensive = true 741 log.Infof(c, "%s: considering cross-tree bisection %s/%s", 742 bug.displayTitle(), from.repo.Alias, to.repo.Alias) 743 _, crashJob := bug.findResult(c, to.repo, wantNewAny{}, runOnHEAD{}) 744 if crashJob == nil { 745 // No patch testing was performed yet. 746 return nil 747 } 748 if crashJob.CrashTitle == "" { 749 // The bug is already fixed on the target tree. 750 return nil 751 } 752 crashBuild, err := loadBuild(c, bug.Namespace, crashJob.BuildID) 753 if err != nil { 754 return err 755 } 756 manager, _ := activeManager(c, crashJob.Manager, crashJob.Namespace) 757 if !managers[manager].BisectFix { 758 return nil 759 } 760 _, successJob := bug.findResult(c, from.repo, wantNewAny{}, runOnHEAD{}) 761 if successJob == nil { 762 // The jobs is not done yet. 763 return nil 764 } 765 if successJob.CrashTitle != "" { 766 // The kernel tree is still crashed by the repro. 767 return nil 768 } 769 newJob := &Job{ 770 Type: JobBisectFix, 771 Created: timeNow(c), 772 Namespace: bug.Namespace, 773 Manager: crashJob.Manager, 774 BisectFrom: crashBuild.KernelCommit, 775 KernelRepo: from.repo.URL, 776 KernelBranch: from.repo.Branch, 777 MergeBaseRepo: to.repo.URL, 778 MergeBaseBranch: to.repo.Branch, 779 BugTitle: bug.displayTitle(), 780 CrashID: crashJob.CrashID, 781 } 782 // It's expected that crossTreeBisection is not concurrently called with the same 783 // manager list. 784 prevJob, err := bugJobs.lastMatch(newJob) 785 if err != nil { 786 return err 787 } 788 const repeatPeriod = time.Hour * 24 * 30 789 if prevJob != nil && (prevJob.Error == 0 || 790 prevJob.Finished.After(timeNow(c).Add(-repeatPeriod))) { 791 // The job is already pending or failed recently. Skip. 792 return nil 793 } 794 job = newJob 795 jobKey, err = saveJob(c, newJob, bug.key(c)) 796 return err 797 }) 798 return job, jobKey, expensive, err 799 } 800 801 type lazyJobList struct { 802 c context.Context 803 bug *Bug 804 jobType JobType 805 jobs *bugJobs 806 } 807 808 func (list *lazyJobList) lastMatch(job *Job) (*Job, error) { 809 if list.jobs == nil { 810 var err error 811 list.jobs, err = queryBugJobs(list.c, list.bug, list.jobType) 812 if err != nil { 813 return nil, err 814 } 815 } 816 var best *Job 817 for _, item := range list.jobs.all() { 818 otherJob := item.job 819 same := otherJob.Manager == job.Manager && 820 otherJob.KernelRepo == job.KernelRepo && 821 otherJob.KernelBranch == job.KernelBranch && 822 otherJob.CrashID == job.CrashID && 823 otherJob.MergeBaseRepo == job.MergeBaseRepo && 824 otherJob.MergeBaseBranch == job.MergeBaseBranch 825 if !same { 826 continue 827 } 828 if best == nil || best.Created.Before(otherJob.Created) { 829 best = otherJob 830 } 831 } 832 return best, nil 833 } 834 835 func doneCrossTreeBisection(c context.Context, jobKey *db.Key, job *Job) error { 836 if job.Type != JobBisectFix || job.MergeBaseRepo == "" { 837 // Not a cross tree bisection. 838 return nil 839 } 840 if job.Error != 0 || job.isUnreliableBisect() || len(job.Commits) != 1 { 841 // The result is not interesting. 842 return nil 843 } 844 return updateSingleBug(c, jobKey.Parent(), func(bug *Bug) error { 845 bug.FixCandidateJob = jobKey.Encode() 846 return nil 847 }) 848 } 849 850 type repoNode struct { 851 repo KernelRepo 852 edges []repoEdge 853 } 854 855 type repoEdge struct { 856 in bool 857 info KernelRepoLink 858 other *repoNode 859 } 860 861 type repoGraph struct { 862 nodes map[string]*repoNode 863 } 864 865 func makeRepoGraph(repos []KernelRepo) (*repoGraph, error) { 866 g := &repoGraph{ 867 nodes: map[string]*repoNode{}, 868 } 869 for _, repo := range repos { 870 if repo.Alias == "" { 871 return nil, fmt.Errorf("one of the repos has an empty alias") 872 } 873 g.nodes[repo.Alias] = &repoNode{repo: repo} 874 } 875 for _, repo := range repos { 876 for _, link := range repo.CommitInflow { 877 if g.nodes[link.Alias] == nil { 878 return nil, fmt.Errorf("no repo with alias %q", link.Alias) 879 } 880 g.nodes[repo.Alias].addEdge(true, link, g.nodes[link.Alias]) 881 g.nodes[link.Alias].addEdge(false, link, g.nodes[repo.Alias]) 882 } 883 } 884 for alias, node := range g.nodes { 885 reachable := node.reachable(true) 886 if _, ok := reachable[node]; ok { 887 return nil, fmt.Errorf("%q lies on a cycle", alias) 888 } 889 } 890 return g, nil 891 } 892 893 func (g *repoGraph) nodeByRepo(url, branch string) *repoNode { 894 for _, node := range g.nodes { 895 if node.repo.URL == url && node.repo.Branch == branch { 896 return node 897 } 898 } 899 return nil 900 } 901 902 func (g *repoGraph) nodeByAlias(alias string) *repoNode { 903 for _, node := range g.nodes { 904 if node.repo.Alias == alias { 905 return node 906 } 907 } 908 return nil 909 } 910 911 func (g *repoGraph) forEachEdge(cb func(from, to *repoNode, info KernelRepoLink) error) error { 912 for _, node := range g.nodes { 913 for _, e := range node.edges { 914 if !e.in { 915 continue 916 } 917 err := cb(e.other, node, e.info) 918 if err != nil { 919 return err 920 } 921 } 922 } 923 return nil 924 } 925 926 // reachable returns a map *repoNode -> bool (whether commits are merged). 927 func (n *repoNode) reachable(in bool) map[*repoNode]bool { 928 ret := map[*repoNode]bool{} 929 // First collect nodes only reachable via merge=true links. 930 n.reachableMerged(in, true, ret) 931 n.reachableMerged(in, false, ret) 932 return ret 933 } 934 935 func (n *repoNode) reachableMerged(in, onlyMerge bool, ret map[*repoNode]bool) { 936 var dfs func(*repoNode, bool) 937 dfs = func(node *repoNode, merge bool) { 938 for _, edge := range node.edges { 939 if edge.in != in || onlyMerge && !edge.info.Merge { 940 continue 941 } 942 if _, ok := ret[edge.other]; ok { 943 continue 944 } 945 ret[edge.other] = merge && edge.info.Merge 946 dfs(edge.other, merge && edge.info.Merge) 947 } 948 } 949 dfs(n, true) 950 } 951 952 func (n *repoNode) allReachable() map[*repoNode]bool { 953 ret := n.reachable(true) 954 for node, merge := range n.reachable(false) { 955 ret[node] = merge 956 } 957 return ret 958 } 959 960 func (n *repoNode) addEdge(in bool, info KernelRepoLink, other *repoNode) { 961 n.edges = append(n.edges, repoEdge{ 962 in: in, 963 info: info, 964 other: other, 965 }) 966 }