github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/dashboard/app/jobs.go (about) 1 // Copyright 2017 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package main 5 6 import ( 7 "context" 8 "encoding/json" 9 "fmt" 10 "math/rand" 11 "sort" 12 "strconv" 13 "strings" 14 "time" 15 16 "github.com/google/syzkaller/dashboard/dashapi" 17 "github.com/google/syzkaller/pkg/email" 18 "github.com/google/syzkaller/pkg/vcs" 19 db "google.golang.org/appengine/v2/datastore" 20 "google.golang.org/appengine/v2/log" 21 "google.golang.org/appengine/v2/user" 22 ) 23 24 type testReqArgs struct { 25 bug *Bug 26 bugKey *db.Key 27 bugReporting *BugReporting 28 user string 29 extID string 30 link string 31 patch []byte 32 repo string 33 branch string 34 jobCC []string 35 mergeBaseRepo string 36 mergeBaseBranch string 37 } 38 39 // handleTestRequest added new job to db. 40 // Returns nil if job added successfully. 41 // If the arguments are invalid, the error is of type *BadTestRequest. 42 // If the request was denied, the error is of type *TestRequestDenied. 43 // All other errors correspond to internal processing problems. 44 func handleTestRequest(c context.Context, args *testReqArgs) error { 45 log.Infof(c, "test request: bug=%s user=%q extID=%q patch=%v, repo=%q branch=%q", 46 args.bug.Title, args.user, args.extID, len(args.patch), args.repo, args.branch) 47 for _, blocked := range getConfig(c).EmailBlocklist { 48 if args.user == blocked { 49 return &TestRequestDeniedError{ 50 fmt.Sprintf("test request from blocked user: %v", args.user), 51 } 52 } 53 } 54 crash, crashKey, err := findCrashForBug(c, args.bug) 55 if err != nil { 56 return fmt.Errorf("failed to find a crash: %w", err) 57 } 58 _, _, err = addTestJob(c, &testJobArgs{ 59 testReqArgs: *args, 60 crash: crash, crashKey: crashKey, 61 }) 62 if err != nil { 63 return err 64 } 65 // Update bug CC and last activity time. 66 tx := func(c context.Context) error { 67 bug := new(Bug) 68 if err := db.Get(c, args.bugKey, bug); err != nil { 69 return err 70 } 71 bug.LastActivity = timeNow(c) 72 bugReporting := args.bugReporting 73 bugReporting = bugReportingByName(bug, bugReporting.Name) 74 bugCC := strings.Split(bugReporting.CC, "|") 75 merged := email.MergeEmailLists(bugCC, args.jobCC) 76 bugReporting.CC = strings.Join(merged, "|") 77 if _, err := db.Put(c, args.bugKey, bug); err != nil { 78 return fmt.Errorf("failed to put bug: %w", err) 79 } 80 return nil 81 } 82 if err := db.RunInTransaction(c, tx, nil); err != nil { 83 // We've already stored the job, so just log the error. 84 log.Errorf(c, "failed to update bug: %v", err) 85 } 86 return nil 87 } 88 89 type testJobArgs struct { 90 crash *Crash 91 crashKey *db.Key 92 configRef int64 93 configAppend string 94 treeOrigin bool 95 inTransaction bool 96 testReqArgs 97 } 98 99 func addTestJob(c context.Context, args *testJobArgs) (*Job, *db.Key, error) { 100 now := timeNow(c) 101 if err := patchTestJobArgs(c, args); err != nil { 102 return nil, nil, err 103 } 104 if reason := checkTestJob(args); reason != "" { 105 return nil, nil, &BadTestRequestError{reason} 106 } 107 manager, mgrConfig := activeManager(c, args.crash.Manager, args.bug.Namespace) 108 if mgrConfig != nil && mgrConfig.RestrictedTestingRepo != "" && 109 args.repo != mgrConfig.RestrictedTestingRepo { 110 return nil, nil, &BadTestRequestError{mgrConfig.RestrictedTestingReason} 111 } 112 patchID, err := putText(c, args.bug.Namespace, textPatch, args.patch, false) 113 if err != nil { 114 return nil, nil, err 115 } 116 configRef := args.configRef 117 if args.configAppend != "" { 118 kernelConfig, _, err := getText(c, textKernelConfig, configRef) 119 if err != nil { 120 return nil, nil, err 121 } 122 configRef, err = putText(c, args.bug.Namespace, textKernelConfig, 123 append(kernelConfig, []byte(args.configAppend)...), true) 124 if err != nil { 125 return nil, nil, err 126 } 127 } 128 reportingName := "" 129 if args.bugReporting != nil { 130 reportingName = args.bugReporting.Name 131 } 132 job := &Job{ 133 Type: JobTestPatch, 134 Created: now, 135 User: args.user, 136 CC: args.jobCC, 137 Reporting: reportingName, 138 ExtID: args.extID, 139 Link: args.link, 140 Namespace: args.bug.Namespace, 141 Manager: manager, 142 BugTitle: args.bug.displayTitle(), 143 CrashID: args.crashKey.IntID(), 144 KernelRepo: args.repo, 145 KernelBranch: args.branch, 146 MergeBaseRepo: args.mergeBaseRepo, 147 MergeBaseBranch: args.mergeBaseBranch, 148 Patch: patchID, 149 KernelConfig: configRef, 150 TreeOrigin: args.treeOrigin, 151 } 152 153 var jobKey *db.Key 154 deletePatch := false 155 tx := func(c context.Context) error { 156 deletePatch = false 157 // We can get 2 emails for the same request: one direct and one from a mailing list. 158 // Filter out such duplicates (for dup we only need link update). 159 var jobs []*Job 160 var keys []*db.Key 161 var err error 162 if args.extID != "" { 163 keys, err = db.NewQuery("Job"). 164 Ancestor(args.bugKey). 165 Filter("ExtID=", args.extID). 166 GetAll(c, &jobs) 167 if len(jobs) > 1 || err != nil { 168 return fmt.Errorf("failed to query jobs: jobs=%v err=%w", len(jobs), err) 169 } 170 } 171 if len(jobs) != 0 { 172 // The job is already present, update link. 173 deletePatch = true 174 job, jobKey = jobs[0], keys[0] 175 if job.Link != "" || args.link == "" { 176 return nil 177 } 178 job.Link = args.link 179 if jobKey, err = db.Put(c, jobKey, job); err != nil { 180 return fmt.Errorf("failed to put job: %w", err) 181 } 182 return nil 183 } 184 jobKey, err = saveJob(c, job, args.bugKey) 185 return err 186 } 187 if args.inTransaction { 188 err = tx(c) 189 } else { 190 err = db.RunInTransaction(c, tx, &db.TransactionOptions{XG: true, Attempts: 30}) 191 } 192 if patchID != 0 && (deletePatch || err != nil) { 193 if err := db.Delete(c, db.NewKey(c, textPatch, "", patchID, nil)); err != nil { 194 log.Errorf(c, "failed to delete patch for dup job: %v", err) 195 } 196 } 197 if err != nil { 198 return nil, nil, fmt.Errorf("job tx failed: %w", err) 199 } 200 return job, jobKey, nil 201 } 202 203 func saveJob(c context.Context, job *Job, bugKey *db.Key) (*db.Key, error) { 204 jobKey := db.NewIncompleteKey(c, "Job", bugKey) 205 var err error 206 if jobKey, err = db.Put(c, jobKey, job); err != nil { 207 return nil, fmt.Errorf("failed to put job: %w", err) 208 } 209 return jobKey, addCrashReference(c, job.CrashID, bugKey, 210 CrashReference{CrashReferenceJob, extJobID(jobKey), timeNow(c)}) 211 } 212 213 func patchTestJobArgs(c context.Context, args *testJobArgs) error { 214 if args.branch == "" && args.repo == "" { 215 // If no arguments were passed, we need to auto-guess them. 216 build, err := loadBuild(c, args.bug.Namespace, args.crash.BuildID) 217 if err != nil { 218 return fmt.Errorf("failed to find the bug reporting object: %w", err) 219 } 220 args.branch = build.KernelBranch 221 args.repo = build.KernelRepo 222 } 223 // Let trees be also identified by their alias names. 224 for _, repo := range getNsConfig(c, args.bug.Namespace).Repos { 225 if repo.Alias != "" && repo.Alias == args.repo { 226 args.repo = repo.URL 227 break 228 } 229 } 230 return nil 231 } 232 233 func crashNeedsRepro(title string) bool { 234 return !strings.Contains(title, "boot error:") && 235 !strings.Contains(title, "test error:") && 236 !strings.Contains(title, "build error") 237 } 238 239 func checkTestJob(args *testJobArgs) string { 240 crash, bug := args.crash, args.bug 241 needRepro := crashNeedsRepro(crash.Title) 242 switch { 243 case needRepro && crash.ReproC == 0 && crash.ReproSyz == 0: 244 return "This crash does not have a reproducer. I cannot test it." 245 case !vcs.CheckRepoAddress(args.repo): 246 return fmt.Sprintf("%q does not look like a valid git repo address.", args.repo) 247 case !vcs.CheckBranch(args.branch) && !vcs.CheckCommitHash(args.branch): 248 return fmt.Sprintf("%q does not look like a valid git branch or commit.", args.branch) 249 case bug.Status == BugStatusFixed: 250 return "This bug is already marked as fixed. No point in testing." 251 case bug.Status == BugStatusInvalid: 252 return "This bug is already marked as invalid. No point in testing." 253 // TODO(dvyukov): for BugStatusDup check status of the canonical bug. 254 case args.bugReporting != nil && !args.bugReporting.Closed.IsZero(): 255 return "This bug is already upstreamed. Please test upstream." 256 } 257 return "" 258 } 259 260 // Mark bisection job as invalid and, if restart=true, reset bisection state of the related bug. 261 func invalidateBisection(c context.Context, jobKey *db.Key, restart bool) error { 262 u := user.Current(c) 263 tx := func(c context.Context) error { 264 job := new(Job) 265 if err := db.Get(c, jobKey, job); err != nil { 266 return fmt.Errorf("failed to get job: %w", err) 267 } 268 269 if job.Type != JobBisectCause && job.Type != JobBisectFix { 270 return fmt.Errorf("can only invalidate bisection jobs") 271 } 272 273 // Update the job. 274 job.InvalidatedBy = u.Email 275 if _, err := db.Put(c, jobKey, job); err != nil { 276 return fmt.Errorf("failed to put job: %w", err) 277 } 278 279 if restart { 280 // Update the bug. 281 bug := new(Bug) 282 bugKey := jobKey.Parent() 283 if err := db.Get(c, bugKey, bug); err != nil { 284 return fmt.Errorf("failed to get bug: %w", err) 285 } 286 if job.Type == JobBisectCause { 287 bug.BisectCause = BisectNot 288 } else if job.IsCrossTree() { 289 bug.FixCandidateJob = "" 290 } else if job.Type == JobBisectFix { 291 bug.BisectFix = BisectNot 292 } 293 if _, err := db.Put(c, bugKey, bug); err != nil { 294 return fmt.Errorf("failed to put bug: %w", err) 295 } 296 } 297 return nil 298 } 299 if err := db.RunInTransaction(c, tx, &db.TransactionOptions{XG: true, Attempts: 10}); err != nil { 300 return fmt.Errorf("update failed: %w", err) 301 } 302 303 return nil 304 } 305 306 type BadTestRequestError struct { 307 message string 308 } 309 310 func (e *BadTestRequestError) Error() string { 311 return e.message 312 } 313 314 type TestRequestDeniedError struct { 315 message string 316 } 317 318 func (e *TestRequestDeniedError) Error() string { 319 return e.message 320 } 321 322 // pollPendingJobs returns the next job to execute for the provided list of managers. 323 func pollPendingJobs(c context.Context, managers map[string]dashapi.ManagerJobs) ( 324 *dashapi.JobPollResp, error) { 325 retry: 326 job, jobKey, err := getNextJob(c, managers) 327 if job == nil || err != nil { 328 return nil, err 329 } 330 resp, stale, err := createJobResp(c, job, jobKey) 331 if err != nil { 332 return nil, err 333 } 334 if stale { 335 goto retry 336 } 337 return resp, nil 338 } 339 340 func getNextJob(c context.Context, managers map[string]dashapi.ManagerJobs) (*Job, *db.Key, error) { 341 job, jobKey, err := loadPendingJob(c, managers) 342 if job != nil || err != nil { 343 return job, jobKey, err 344 } 345 // Each syz-ci polls dashboard every 10 seconds. At the times when there are no 346 // matching jobs, it just doesn't make much sense to execute heavy algorithms that 347 // try to generate them too often. 348 // Note that it won't affect user-created jobs as they are not auto-generated. 349 if err := throttleJobGeneration(c, managers); err != nil { 350 return nil, nil, err 351 } 352 var handlers []func(context.Context, map[string]dashapi.ManagerJobs) (*Job, *db.Key, error) 353 // Let's alternate handlers, so that neither patch tests nor bisections overrun one another. 354 if timeNow(c).UnixMilli()%2 == 0 { 355 handlers = append(handlers, jobFromBugSample, createBisectJob) 356 } else { 357 handlers = append(handlers, createBisectJob, jobFromBugSample) 358 } 359 for _, f := range handlers { 360 job, jobKey, err := f(c, managers) 361 if job != nil || err != nil { 362 return job, jobKey, err 363 } 364 } 365 return nil, nil, nil 366 } 367 368 const jobGenerationPeriod = time.Minute 369 370 func throttleJobGeneration(c context.Context, managers map[string]dashapi.ManagerJobs) error { 371 drop := map[string]struct{}{} 372 for name := range managers { 373 // Technically the key is Namespace+Manager, so it's not guaranteed 374 // that there'll be only one. 375 // But for throttling purposes any single entity will do. 376 // Also note that we do the query outside of the transaction as 377 // datastore prohibits non-ancestor queries. 378 keys, err := db.NewQuery("Manager"). 379 Filter("Name=", name). 380 Limit(1). 381 KeysOnly(). 382 GetAll(c, nil) 383 if err != nil { 384 return err 385 } 386 if len(keys) == 0 { 387 drop[name] = struct{}{} 388 continue 389 } 390 tx := func(c context.Context) error { 391 manager := new(Manager) 392 if err := db.Get(c, keys[0], manager); err != nil { 393 return fmt.Errorf("failed to get %v: %w", keys[0], err) 394 } 395 if timeNow(c).Sub(manager.LastGeneratedJob) < jobGenerationPeriod { 396 drop[name] = struct{}{} 397 return nil 398 } 399 manager.LastGeneratedJob = timeNow(c) 400 if _, err = db.Put(c, keys[0], manager); err != nil { 401 return fmt.Errorf("failed to put Manager: %w", err) 402 } 403 return nil 404 } 405 if err := db.RunInTransaction(c, tx, &db.TransactionOptions{}); err != nil { 406 return fmt.Errorf("failed to throttle: %w", err) 407 } 408 } 409 for name := range drop { 410 delete(managers, name) 411 } 412 return nil 413 } 414 415 // Randomly sample a subset of open bugs with reproducers and try to generate 416 // a job for them. 417 // Suitable for cases when we must look deeper than just into Bug fields. 418 // Sampling allows to evenly spread the load over time. 419 func jobFromBugSample(c context.Context, managers map[string]dashapi.ManagerJobs) (*Job, 420 *db.Key, error) { 421 var managersList []string 422 for name, jobs := range managers { 423 if !jobs.Any() { 424 continue 425 } 426 managersList = append(managersList, name) 427 managersList = append(managersList, decommissionedInto(c, name)...) 428 } 429 managersList = unique(managersList) 430 431 var allBugs []*Bug 432 var allBugKeys []*db.Key 433 for _, mgrName := range managersList { 434 bugs, bugKeys, err := loadAllBugs(c, func(query *db.Query) *db.Query { 435 return query.Filter("Status=", BugStatusOpen). 436 Filter("HappenedOn=", mgrName). 437 Filter("HeadReproLevel>", 0) 438 }) 439 if err != nil { 440 return nil, nil, err 441 } 442 bugs, bugKeys = filterBugs(bugs, bugKeys, func(bug *Bug) bool { 443 if len(bug.Commits) > 0 { 444 // Let's save resources -- there's no point in doing analysis for bugs 445 // for which we were already given fixing commits. 446 return false 447 } 448 if getNsConfig(c, bug.Namespace).Decommissioned { 449 return false 450 } 451 return true 452 }) 453 allBugs = append(allBugs, bugs...) 454 allBugKeys = append(allBugKeys, bugKeys...) 455 } 456 r := rand.New(rand.NewSource(timeNow(c).UnixNano())) 457 // Bugs often happen on multiple instances, so let's filter out duplicates. 458 allBugs, allBugKeys = uniqueBugs(c, allBugs, allBugKeys) 459 r.Shuffle(len(allBugs), func(i, j int) { 460 allBugs[i], allBugs[j] = allBugs[j], allBugs[i] 461 allBugKeys[i], allBugKeys[j] = allBugKeys[j], allBugKeys[i] 462 }) 463 // Also shuffle the creator functions. 464 funcs := []func(context.Context, []*Bug, []*db.Key, 465 map[string]dashapi.ManagerJobs) (*Job, *db.Key, error){ 466 createPatchRetestingJobs, 467 createTreeTestJobs, 468 createTreeBisectionJobs, 469 } 470 r.Shuffle(len(funcs), func(i, j int) { funcs[i], funcs[j] = funcs[j], funcs[i] }) 471 for _, f := range funcs { 472 job, jobKey, err := f(c, allBugs, allBugKeys, managers) 473 if job != nil || err != nil { 474 return job, jobKey, err 475 } 476 } 477 return nil, nil, nil 478 } 479 480 func createTreeBisectionJobs(c context.Context, bugs []*Bug, bugKeys []*db.Key, 481 managers map[string]dashapi.ManagerJobs) (*Job, *db.Key, error) { 482 log.Infof(c, "createTreeBisectionJobs is called for %d bugs", len(bugs)) 483 const maxProcess = 5 484 processed := 0 485 for _, bug := range bugs { 486 if bug.FixCandidateJob != "" { 487 continue 488 } 489 if processed >= maxProcess { 490 break 491 } 492 any := false 493 for _, mgr := range bug.HappenedOn { 494 newMgr, _ := activeManager(c, mgr, bug.Namespace) 495 any = any || managers[newMgr].BisectFix 496 } 497 if !any { 498 continue 499 } 500 job, key, expensive, err := crossTreeBisection(c, bug, managers) 501 if job != nil || err != nil { 502 return job, key, err 503 } 504 if expensive { 505 // Only count expensive lookups. 506 // If we didn't have to query anything from the DB, it's not a problem to 507 // examine more bugs. 508 processed++ 509 } 510 } 511 return nil, nil, nil 512 } 513 514 func createTreeTestJobs(c context.Context, bugs []*Bug, bugKeys []*db.Key, 515 managers map[string]dashapi.ManagerJobs) (*Job, *db.Key, error) { 516 takeBugs := 5 517 prio, next := []int{}, []int{} 518 for i, bug := range bugs { 519 if !getNsConfig(c, bug.Namespace).FindBugOriginTrees { 520 continue 521 } 522 if timeNow(c).Before(bug.TreeTests.NextPoll) { 523 continue 524 } 525 if bug.TreeTests.NeedPoll { 526 prio = append(prio, i) 527 } else { 528 next = append(next, i) 529 } 530 if len(prio) >= takeBugs { 531 prio = prio[:takeBugs] 532 break 533 } else if len(prio)+len(next) > takeBugs { 534 next = next[:takeBugs-len(prio)] 535 } 536 } 537 for _, i := range append(prio, next...) { 538 job, jobKey, err := generateTreeOriginJobs(c, bugKeys[i], managers) 539 if err != nil { 540 return nil, nil, fmt.Errorf("bug %v job creation failed: %w", bugKeys[i], err) 541 } else if job != nil { 542 return job, jobKey, nil 543 } 544 } 545 return nil, nil, nil 546 } 547 548 func createPatchRetestingJobs(c context.Context, bugs []*Bug, bugKeys []*db.Key, 549 managers map[string]dashapi.ManagerJobs) (*Job, *db.Key, error) { 550 takeBugs := 5 551 for i, bug := range bugs { 552 if !getNsConfig(c, bug.Namespace).RetestRepros { 553 // Repro retesting is disabled for the namespace. 554 continue 555 } 556 if getConfig(c).Obsoleting.ReproRetestPeriod == 0 || 557 timeNow(c).Sub(bug.LastTime) < getConfig(c).Obsoleting.ReproRetestStart { 558 // Don't retest reproducers if crashes are still happening. 559 continue 560 } 561 takeBugs-- 562 if takeBugs == 0 { 563 break 564 } 565 job, jobKey, err := handleRetestForBug(c, bug, bugKeys[i], managers) 566 if err != nil { 567 return nil, nil, fmt.Errorf("bug %v repro retesting failed: %w", bugKeys[i], err) 568 } else if job != nil { 569 return job, jobKey, nil 570 } 571 } 572 return nil, nil, nil 573 } 574 575 func decommissionedInto(c context.Context, jobMgr string) []string { 576 var ret []string 577 for _, nsConfig := range getConfig(c).Namespaces { 578 for name, mgr := range nsConfig.Managers { 579 if mgr.DelegatedTo == jobMgr { 580 ret = append(ret, name) 581 } 582 } 583 } 584 return ret 585 } 586 587 // There are bugs with dozens of reproducer. 588 // Let's spread the load more evenly by limiting the number of jobs created at a time. 589 const maxRetestJobsAtOnce = 5 590 591 func handleRetestForBug(c context.Context, bug *Bug, bugKey *db.Key, 592 managers map[string]dashapi.ManagerJobs) (*Job, *db.Key, error) { 593 crashes, crashKeys, err := queryCrashesForBug(c, bugKey, maxCrashes()) 594 if err != nil { 595 return nil, nil, err 596 } 597 var job *Job 598 var jobKey *db.Key 599 now := timeNow(c) 600 jobsLeft := maxRetestJobsAtOnce 601 for crashID, crash := range crashes { 602 if crash.ReproSyz == 0 && crash.ReproC == 0 { 603 continue 604 } 605 if now.Sub(crash.LastReproRetest) < getConfig(c).Obsoleting.ReproRetestPeriod { 606 continue 607 } 608 if crash.ReproIsRevoked { 609 // No sense in retesting the already revoked repro. 610 continue 611 } 612 // We could have decommissioned the original manager since then. 613 manager, _ := activeManager(c, crash.Manager, bug.Namespace) 614 if manager == "" || !managers[manager].TestPatches { 615 continue 616 } 617 if jobsLeft == 0 { 618 break 619 } 620 jobsLeft-- 621 // Take the last successful build -- the build on which this crash happened 622 // might contain already obsolete repro and branch values. 623 build, err := lastManagerBuild(c, bug.Namespace, manager) 624 if err != nil { 625 return nil, nil, err 626 } 627 job, jobKey, err = addTestJob(c, &testJobArgs{ 628 crash: crash, 629 crashKey: crashKeys[crashID], 630 configRef: build.KernelConfig, 631 testReqArgs: testReqArgs{ 632 bug: bug, 633 bugKey: bugKey, 634 repo: build.KernelRepo, 635 branch: build.KernelBranch, 636 }, 637 }) 638 if err != nil { 639 return nil, nil, fmt.Errorf("failed to add job: %w", err) 640 } 641 } 642 return job, jobKey, nil 643 } 644 645 func createBisectJob(c context.Context, managers map[string]dashapi.ManagerJobs) (*Job, *db.Key, error) { 646 // We need both C and syz repros, but the crazy datastore query restrictions 647 // do not allow to use ReproLevel>ReproLevelNone in the query. So we do 2 separate queries. 648 // C repros tend to be of higher reliability so maybe it's not bad. 649 job, jobKey, err := createBisectJobRepro(c, managers, ReproLevelC) 650 if job != nil || err != nil { 651 return job, jobKey, err 652 } 653 return createBisectJobRepro(c, managers, ReproLevelSyz) 654 } 655 656 func createBisectJobRepro(c context.Context, managers map[string]dashapi.ManagerJobs, 657 reproLevel dashapi.ReproLevel) (*Job, *db.Key, error) { 658 causeManagers := make(map[string]bool) 659 fixManagers := make(map[string]bool) 660 for mgr, jobs := range managers { 661 if jobs.BisectCause { 662 causeManagers[mgr] = true 663 } 664 if jobs.BisectFix { 665 fixManagers[mgr] = true 666 } 667 } 668 job, jobKey, err := findBugsForBisection(c, causeManagers, reproLevel, JobBisectCause) 669 if job != nil || err != nil { 670 return job, jobKey, err 671 } 672 return findBugsForBisection(c, fixManagers, reproLevel, JobBisectFix) 673 } 674 675 func findBugsForBisection(c context.Context, managers map[string]bool, 676 reproLevel dashapi.ReproLevel, jobType JobType) (*Job, *db.Key, error) { 677 if len(managers) == 0 { 678 return nil, nil, nil 679 } 680 // Note: we could also include len(Commits)==0 but datastore does not work this way. 681 // So we would need an additional HasCommits field or something. 682 // Note: For JobBisectCause, order the bugs from newest to oldest. For JobBisectFix, 683 // order the bugs from oldest to newest. 684 // Sort property should be the same as property used in the inequality filter. 685 // We only need 1 job, but we skip some because the query is not precise. 686 bugs, keys, err := loadAllBugs(c, func(query *db.Query) *db.Query { 687 query = query.Filter("Status=", BugStatusOpen) 688 if jobType == JobBisectCause { 689 query = query.Filter("FirstTime>", time.Time{}). 690 Filter("ReproLevel=", reproLevel). 691 Filter("BisectCause=", BisectNot). 692 Order("-FirstTime") 693 } else { 694 query = query.Filter("LastTime>", time.Time{}). 695 Filter("ReproLevel=", reproLevel). 696 Filter("BisectFix=", BisectNot). 697 Order("LastTime") 698 } 699 return query 700 }) 701 if err != nil { 702 return nil, nil, fmt.Errorf("failed to query bugs: %w", err) 703 } 704 for bi, bug := range bugs { 705 if !shouldBisectBug(c, bug, managers, jobType) { 706 continue 707 } 708 crash, crashKey, err := bisectCrashForBug(c, bug, keys[bi], managers, jobType) 709 if err != nil { 710 return nil, nil, err 711 } 712 if crash == nil { 713 continue 714 } 715 return createBisectJobForBug(c, bug, crash, keys[bi], crashKey, jobType) 716 } 717 return nil, nil, nil 718 } 719 720 func shouldBisectBug(c context.Context, bug *Bug, managers map[string]bool, jobType JobType) bool { 721 // We already have a fixing commit, no need to bisect. 722 if len(bug.Commits) != 0 { 723 return false 724 } 725 726 if getNsConfig(c, bug.Namespace).Decommissioned { 727 return false 728 } 729 730 // There likely is no fix yet, as the bug recently reproduced. 731 const fixJobRepeat = 24 * 30 * time.Hour 732 if jobType == JobBisectFix && timeSince(c, bug.LastTime) < fixJobRepeat { 733 return false 734 } 735 // Likely to find the same (invalid) result without admin intervention, don't try too often. 736 const causeJobRepeat = 24 * 7 * time.Hour 737 if jobType == JobBisectCause && timeSince(c, bug.LastCauseBisect) < causeJobRepeat { 738 return false 739 } 740 741 // Ensure one of the managers the bug reproduced on is taking bisection jobs. 742 for _, mgr := range bug.HappenedOn { 743 if managers[mgr] { 744 return true 745 } 746 } 747 return false 748 } 749 750 func bisectCrashForBug(c context.Context, bug *Bug, bugKey *db.Key, managers map[string]bool, jobType JobType) ( 751 *Crash, *db.Key, error) { 752 crashes, crashKeys, err := queryCrashesForBug(c, bugKey, maxCrashes()) 753 if err != nil { 754 return nil, nil, err 755 } 756 for ci, crash := range crashes { 757 if crash.ReproSyz == 0 || !managers[crash.Manager] { 758 continue 759 } 760 if jobType == JobBisectFix && 761 getNsConfig(c, bug.Namespace).Managers[crash.Manager].FixBisectionDisabled { 762 continue 763 } 764 return crash, crashKeys[ci], nil 765 } 766 return nil, nil, nil 767 } 768 769 func createBisectJobForBug(c context.Context, bug0 *Bug, crash *Crash, bugKey, crashKey *db.Key, jobType JobType) ( 770 *Job, *db.Key, error) { 771 build, err := loadBuild(c, bug0.Namespace, crash.BuildID) 772 if err != nil { 773 return nil, nil, err 774 } 775 now := timeNow(c) 776 job := &Job{ 777 Type: jobType, 778 Created: now, 779 Namespace: bug0.Namespace, 780 Manager: crash.Manager, 781 KernelRepo: build.KernelRepo, 782 KernelBranch: build.KernelBranch, 783 BugTitle: bug0.displayTitle(), 784 CrashID: crashKey.IntID(), 785 } 786 var jobKey *db.Key 787 tx := func(c context.Context) error { 788 jobKey = nil 789 bug := new(Bug) 790 if err := db.Get(c, bugKey, bug); err != nil { 791 return fmt.Errorf("failed to get bug %v: %w", bugKey.StringID(), err) 792 } 793 if jobType == JobBisectFix && bug.BisectFix != BisectNot || 794 jobType == JobBisectCause && bug.BisectCause != BisectNot { 795 // Race, we could do a more complex retry, but we just rely on the next poll. 796 job = nil 797 return nil 798 } 799 if jobType == JobBisectCause { 800 bug.BisectCause = BisectPending 801 } else { 802 bug.BisectFix = BisectPending 803 } 804 if _, err := db.Put(c, bugKey, bug); err != nil { 805 return fmt.Errorf("failed to put bug: %w", err) 806 } 807 jobKey, err = saveJob(c, job, bugKey) 808 return err 809 } 810 if err := db.RunInTransaction(c, tx, &db.TransactionOptions{ 811 // We're accessing two different kinds in addCrashReference. 812 XG: true, 813 }); err != nil { 814 return nil, nil, fmt.Errorf("create bisect job tx failed: %w", err) 815 } 816 return job, jobKey, nil 817 } 818 819 func createJobResp(c context.Context, job *Job, jobKey *db.Key) (*dashapi.JobPollResp, bool, error) { 820 jobID := extJobID(jobKey) 821 patch, _, err := getText(c, textPatch, job.Patch) 822 if err != nil { 823 return nil, false, err 824 } 825 bugKey := jobKey.Parent() 826 crashKey := db.NewKey(c, "Crash", "", job.CrashID, bugKey) 827 crash := new(Crash) 828 if err := db.Get(c, crashKey, crash); err != nil { 829 return nil, false, fmt.Errorf("job %v: failed to get crash: %w", jobID, err) 830 } 831 832 build, err := loadBuild(c, job.Namespace, crash.BuildID) 833 if err != nil { 834 return nil, false, err 835 } 836 837 configRef := job.KernelConfig 838 if configRef == 0 { 839 configRef = build.KernelConfig 840 } 841 kernelConfig, _, err := getText(c, textKernelConfig, configRef) 842 if err != nil { 843 return nil, false, err 844 } 845 846 reproC, _, err := getText(c, textReproC, crash.ReproC) 847 if err != nil { 848 return nil, false, err 849 } 850 reproSyz, err := loadReproSyz(c, crash) 851 if err != nil { 852 return nil, false, err 853 } 854 855 now := timeNow(c) 856 stale := false 857 tx := func(c context.Context) error { 858 stale = false 859 job = new(Job) 860 if err := db.Get(c, jobKey, job); err != nil { 861 return fmt.Errorf("job %v: failed to get in tx: %w", jobID, err) 862 } 863 if !job.Finished.IsZero() { 864 // This happens sometimes due to inconsistent db. 865 stale = true 866 return nil 867 } 868 job.Attempts++ 869 job.IsRunning = true 870 job.LastStarted = now 871 if _, err := db.Put(c, jobKey, job); err != nil { 872 return fmt.Errorf("job %v: failed to put: %w", jobID, err) 873 } 874 return nil 875 } 876 if err := db.RunInTransaction(c, tx, nil); err != nil { 877 return nil, false, err 878 } 879 if stale { 880 return nil, true, nil 881 } 882 resp := &dashapi.JobPollResp{ 883 ID: jobID, 884 Manager: job.Manager, 885 KernelRepo: job.KernelRepo, 886 KernelBranch: job.KernelBranch, 887 MergeBaseRepo: job.MergeBaseRepo, 888 MergeBaseBranch: job.MergeBaseBranch, 889 KernelCommit: job.BisectFrom, 890 KernelConfig: kernelConfig, 891 SyzkallerCommit: build.SyzkallerCommit, 892 Patch: patch, 893 ReproOpts: crash.ReproOpts, 894 ReproSyz: reproSyz, 895 ReproC: reproC, 896 } 897 if resp.KernelCommit == "" { 898 resp.KernelCommit = build.KernelCommit 899 resp.KernelCommitTitle = build.KernelCommitTitle 900 } 901 switch job.Type { 902 case JobTestPatch: 903 resp.Type = dashapi.JobTestPatch 904 case JobBisectCause: 905 resp.Type = dashapi.JobBisectCause 906 case JobBisectFix: 907 resp.Type = dashapi.JobBisectFix 908 default: 909 return nil, false, fmt.Errorf("bad job type %v", job.Type) 910 } 911 return resp, false, nil 912 } 913 914 // It would be easier to just check if the User field is empty, but let's also not 915 // miss the situation when some actual user sends a patch testing request without 916 // patch. 917 func isRetestReproJob(job *Job, build *Build) bool { 918 return (job.Type == JobTestPatch || job.Type == JobBisectFix) && 919 job.Patch == 0 && 920 job.KernelRepo == build.KernelRepo && 921 job.KernelBranch == build.KernelBranch 922 } 923 924 func handleRetestedRepro(c context.Context, now time.Time, job *Job, jobKey *db.Key, 925 bug *Bug, lastBuild *Build, req *dashapi.JobDoneReq) (*Bug, error) { 926 bugKey := jobKey.Parent() 927 if bug == nil { 928 bug = new(Bug) 929 if err := db.Get(c, bugKey, bug); err != nil { 930 return nil, fmt.Errorf("failed to get bug: %v", bugKey) 931 } 932 } 933 crashKey := db.NewKey(c, "Crash", "", job.CrashID, bugKey) 934 crash := new(Crash) 935 if err := db.Get(c, crashKey, crash); err != nil { 936 return nil, fmt.Errorf("failed to get crash: %v", crashKey) 937 } 938 allTitles := gatherCrashTitles(req) 939 // Update the crash. 940 crash.LastReproRetest = now 941 if req.Error == nil && !crash.ReproIsRevoked { 942 // If repro testing itself failed, it might be just a temporary issue. 943 if job.Type == JobTestPatch { 944 // If there was any crash at all, the repro is still not worth discarding. 945 crash.ReproIsRevoked = len(allTitles) == 0 946 } else if job.Type == JobBisectFix { 947 // More than one commit is suspected => repro stopped working at some point. 948 crash.ReproIsRevoked = len(req.Commits) > 0 949 } 950 } 951 crash.UpdateReportingPriority(c, lastBuild, bug) 952 if _, err := db.Put(c, crashKey, crash); err != nil { 953 return nil, fmt.Errorf("failed to put crash: %w", err) 954 } 955 reproCrashes, crashKeys, err := queryCrashesForBug(c, bugKey, 2) 956 if err != nil { 957 return nil, fmt.Errorf("failed to fetch crashes with repro: %w", err) 958 } 959 // Now we can update the bug. 960 bug.HeadReproLevel = ReproLevelNone 961 for id, bestCrash := range reproCrashes { 962 if crashKeys[id].Equal(crashKey) { 963 // In Datastore, we don't see previous writes in a transaction... 964 bestCrash = crash 965 } 966 if bestCrash.ReproIsRevoked { 967 continue 968 } 969 if bestCrash.ReproC > 0 { 970 bug.HeadReproLevel = ReproLevelC 971 } else if bug.HeadReproLevel != ReproLevelC && bestCrash.ReproSyz > 0 { 972 bug.HeadReproLevel = ReproLevelSyz 973 } 974 } 975 if stringInList(allTitles, bug.Title) || stringListsIntersect(bug.AltTitles, allTitles) { 976 // We don't want to confuse users, so only update LastTime if the generated crash 977 // really relates to the existing bug. 978 bug.LastTime = now 979 } 980 return bug, nil 981 } 982 983 func gatherCrashTitles(req *dashapi.JobDoneReq) []string { 984 ret := append([]string{}, req.CrashAltTitles...) 985 if req.CrashTitle != "" { 986 ret = append(ret, req.CrashTitle) 987 } 988 return ret 989 } 990 991 // resetJobs is called to indicate that, for the specified managers, all started jobs are no longer 992 // in progress. 993 func resetJobs(c context.Context, req *dashapi.JobResetReq) error { 994 var jobs []*Job 995 keys, err := db.NewQuery("Job"). 996 Filter("Finished=", time.Time{}). 997 Filter("IsRunning=", true). 998 GetAll(c, &jobs) 999 if err != nil { 1000 return err 1001 } 1002 managerMap := map[string]bool{} 1003 for _, name := range req.Managers { 1004 managerMap[name] = true 1005 } 1006 for idx, job := range jobs { 1007 if !managerMap[job.Manager] { 1008 continue 1009 } 1010 jobKey := keys[idx] 1011 tx := func(c context.Context) error { 1012 job = new(Job) 1013 if err := db.Get(c, jobKey, job); err != nil { 1014 return fmt.Errorf("job %v: failed to get in tx: %w", jobKey, err) 1015 } 1016 if job.IsFinished() { 1017 // Just in case. 1018 return nil 1019 } 1020 job.IsRunning = false 1021 if _, err := db.Put(c, jobKey, job); err != nil { 1022 return fmt.Errorf("job %v: failed to put: %w", jobKey, err) 1023 } 1024 return nil 1025 } 1026 if err := db.RunInTransaction(c, tx, nil); err != nil { 1027 return err 1028 } 1029 } 1030 return nil 1031 } 1032 1033 // doneJob is called by syz-ci to mark completion of a job. 1034 // nolint: gocyclo 1035 func doneJob(c context.Context, req *dashapi.JobDoneReq) error { 1036 jobID := req.ID 1037 jobKey, err := jobID2Key(c, req.ID) 1038 if err != nil { 1039 return err 1040 } 1041 // Datastore prohibits cross-group queries even inside XG transactions. 1042 // So we have to query last build for the manager before the transaction. 1043 job := new(Job) 1044 if err := db.Get(c, jobKey, job); err != nil { 1045 return fmt.Errorf("job %v: failed to get job: %w", jobID, err) 1046 } 1047 lastBuild, err := lastManagerBuild(c, job.Namespace, job.Manager) 1048 if err != nil { 1049 return err 1050 } 1051 now := timeNow(c) 1052 tx := func(c context.Context) error { 1053 job = new(Job) 1054 if err := db.Get(c, jobKey, job); err != nil { 1055 return fmt.Errorf("job %v: failed to get job: %w", jobID, err) 1056 } 1057 if !job.Finished.IsZero() { 1058 return fmt.Errorf("job %v: already finished", jobID) 1059 } 1060 var bug *Bug 1061 if isRetestReproJob(job, lastBuild) { 1062 var err error 1063 bug, err = handleRetestedRepro(c, now, job, jobKey, bug, lastBuild, req) 1064 if err != nil { 1065 return fmt.Errorf("job %v: failed to handle retested repro, %w", jobID, err) 1066 } 1067 } 1068 ns := job.Namespace 1069 if req.Build.ID != "" { 1070 if _, isNewBuild, err := uploadBuild(c, now, ns, &req.Build, BuildJob); err != nil { 1071 return err 1072 } else if !isNewBuild { 1073 log.Errorf(c, "job %v: duplicate build %v", jobID, req.Build.ID) 1074 } 1075 } 1076 if job.Log, err = putText(c, ns, textLog, req.Log, false); err != nil { 1077 return err 1078 } 1079 if job.Error, err = putText(c, ns, textError, req.Error, false); err != nil { 1080 return err 1081 } 1082 if job.CrashLog, err = putText(c, ns, textCrashLog, req.CrashLog, false); err != nil { 1083 return err 1084 } 1085 if job.CrashReport, err = putText(c, ns, textCrashReport, req.CrashReport, false); err != nil { 1086 return err 1087 } 1088 for _, com := range req.Commits { 1089 cc := email.MergeEmailLists(com.CC, 1090 GetEmails(com.Recipients, dashapi.To), 1091 GetEmails(com.Recipients, dashapi.Cc)) 1092 job.Commits = append(job.Commits, Commit{ 1093 Hash: com.Hash, 1094 Title: com.Title, 1095 Author: com.Author, 1096 AuthorName: com.AuthorName, 1097 CC: strings.Join(sanitizeCC(c, cc), "|"), 1098 Date: com.Date, 1099 }) 1100 } 1101 job.BuildID = req.Build.ID 1102 job.CrashTitle = req.CrashTitle 1103 job.Finished = now 1104 job.IsRunning = false 1105 job.Flags = req.Flags 1106 if job.Type == JobBisectCause || job.Type == JobBisectFix { 1107 // Update bug.BisectCause/Fix status and also remember current bug reporting to send results. 1108 var err error 1109 bug, err = updateBugBisection(c, job, jobKey, req, bug, now) 1110 if err != nil { 1111 return err 1112 } 1113 } 1114 if jobKey, err = db.Put(c, jobKey, job); err != nil { 1115 return fmt.Errorf("failed to put job: %w", err) 1116 } 1117 if bug != nil { 1118 if _, err := db.Put(c, jobKey.Parent(), bug); err != nil { 1119 return fmt.Errorf("failed to put bug: %w", err) 1120 } 1121 } 1122 log.Infof(c, "DONE JOB %v: reported=%v reporting=%v", jobID, job.Reported, job.Reporting) 1123 return nil 1124 } 1125 err = db.RunInTransaction(c, tx, &db.TransactionOptions{XG: true, Attempts: 30}) 1126 if err != nil { 1127 return err 1128 } 1129 return postJob(c, jobKey, job) 1130 } 1131 1132 func postJob(c context.Context, jobKey *db.Key, job *Job) error { 1133 if job.TreeOrigin { 1134 err := treeOriginJobDone(c, jobKey, job) 1135 if err != nil { 1136 return fmt.Errorf("job %v: failed to execute tree origin handlers: %w", jobKey, err) 1137 } 1138 } 1139 err := doneCrossTreeBisection(c, jobKey, job) 1140 if err != nil { 1141 return fmt.Errorf("job %s: cross tree bisection handlers failed: %w", jobKey, err) 1142 } 1143 return nil 1144 } 1145 1146 func updateBugBisection(c context.Context, job *Job, jobKey *db.Key, req *dashapi.JobDoneReq, 1147 bug *Bug, now time.Time) (*Bug, error) { 1148 if bug == nil { 1149 bug = new(Bug) 1150 if err := db.Get(c, jobKey.Parent(), bug); err != nil { 1151 return nil, fmt.Errorf("failed to get bug: %v", jobKey.Parent()) 1152 } 1153 } 1154 result := BisectYes 1155 if len(req.Error) != 0 { 1156 result = BisectError 1157 } else if len(req.Commits) > 1 { 1158 result = BisectInconclusive 1159 } else if len(req.Commits) == 0 { 1160 result = BisectHorizont 1161 } else if job.isUnreliableBisect() { 1162 result = BisectUnreliable 1163 } 1164 if job.Type == JobBisectCause { 1165 bug.BisectCause = result 1166 bug.LastCauseBisect = now 1167 } else { 1168 bug.BisectFix = result 1169 } 1170 infraError := (req.Flags & dashapi.BisectResultInfraError) == dashapi.BisectResultInfraError 1171 if infraError { 1172 log.Errorf(c, "bisection of %q failed due to infra errors", job.BugTitle) 1173 } 1174 // If the crash still occurs on HEAD, update the bug's LastTime so that it will be 1175 // retried after 30 days. 1176 if job.Type == JobBisectFix && (result != BisectError || infraError) && 1177 len(req.Commits) == 0 && len(req.CrashLog) != 0 { 1178 bug.BisectFix = BisectNot 1179 bug.LastTime = now 1180 } 1181 // If the cause bisection failed due to infrastructure problems, also repeat it. 1182 if job.Type == JobBisectCause && infraError { 1183 bug.BisectCause = BisectNot 1184 } 1185 _, bugReporting, _, _, _ := currentReporting(c, bug) 1186 // The bug is either already closed or not yet reported in the current reporting, 1187 // either way we don't need to report it. If it wasn't reported, it will be reported 1188 // with the bisection results. 1189 if bugReporting == nil || bugReporting.Reported.IsZero() || 1190 // Don't report errors for non-user-initiated jobs. 1191 job.Error != 0 || 1192 // Don't report unreliable/wrong bisections. 1193 job.isUnreliableBisect() { 1194 job.Reported = true 1195 } else { 1196 job.Reporting = bugReporting.Name 1197 } 1198 return bug, nil 1199 } 1200 1201 // TODO: this is temporal for gradual bisection rollout. 1202 // Notify only about successful cause bisection for now. 1203 // For now we only enable this in tests. 1204 var notifyAboutUnsuccessfulBisections = false 1205 1206 // There's really no reason to query all our completed jobs every time. 1207 // If we did not report a finished job within a month, let it stay unreported. 1208 const maxReportedJobAge = time.Hour * 24 * 30 1209 1210 func pollCompletedJobs(c context.Context, typ string) ([]*dashapi.BugReport, error) { 1211 var jobs []*Job 1212 keys, err := db.NewQuery("Job"). 1213 Filter("Finished>", timeNow(c).Add(-maxReportedJobAge)). 1214 Filter("Reported=", false). 1215 GetAll(c, &jobs) 1216 if err != nil { 1217 return nil, fmt.Errorf("failed to query jobs: %w", err) 1218 } 1219 var reports []*dashapi.BugReport 1220 for i, job := range jobs { 1221 if job.Reporting == "" { 1222 if job.User != "" { 1223 log.Criticalf(c, "no reporting for job %v", extJobID(keys[i])) 1224 } 1225 // In some cases (e.g. repro retesting), it's ok not to have a reporting. 1226 continue 1227 } 1228 reporting := getNsConfig(c, job.Namespace).ReportingByName(job.Reporting) 1229 if reporting.Config.Type() != typ { 1230 continue 1231 } 1232 if job.Type == JobBisectCause && !notifyAboutUnsuccessfulBisections && len(job.Commits) != 1 { 1233 continue 1234 } 1235 // If BisectFix results in a crash on HEAD, no notification is sent out. 1236 if job.Type == JobBisectFix && len(job.Commits) != 1 { 1237 continue 1238 } 1239 // If the bug is already known to be fixed, invalid or duplicate, do not report the bisection results. 1240 if job.Type == JobBisectCause || job.Type == JobBisectFix { 1241 bug := new(Bug) 1242 bugKey := keys[i].Parent() 1243 if err := db.Get(c, bugKey, bug); err != nil { 1244 return nil, fmt.Errorf("job %v: failed to get bug: %w", extJobID(keys[i]), err) 1245 } 1246 if len(bug.Commits) != 0 || bug.Status != BugStatusOpen { 1247 jobReported(c, extJobID(keys[i])) 1248 continue 1249 } 1250 } 1251 rep, err := createBugReportForJob(c, job, keys[i], reporting.Config) 1252 if err != nil { 1253 log.Errorf(c, "failed to create report for job: %v", err) 1254 continue 1255 } 1256 reports = append(reports, rep) 1257 } 1258 return reports, nil 1259 } 1260 1261 func createBugReportForJob(c context.Context, job *Job, jobKey *db.Key, config interface{}) ( 1262 *dashapi.BugReport, error) { 1263 reportingConfig, err := json.Marshal(config) 1264 if err != nil { 1265 return nil, err 1266 } 1267 crashLog, _, err := getText(c, textCrashLog, job.CrashLog) 1268 if err != nil { 1269 return nil, err 1270 } 1271 report, _, err := getText(c, textCrashReport, job.CrashReport) 1272 if err != nil { 1273 return nil, err 1274 } 1275 if len(report) > maxMailReportLen { 1276 report = report[:maxMailReportLen] 1277 } 1278 jobError, _, err := getText(c, textError, job.Error) 1279 if err != nil { 1280 return nil, err 1281 } 1282 build, err := loadBuild(c, job.Namespace, job.BuildID) 1283 if err != nil { 1284 return nil, err 1285 } 1286 bugKey := jobKey.Parent() 1287 crashKey := db.NewKey(c, "Crash", "", job.CrashID, bugKey) 1288 crash := new(Crash) 1289 if err := db.Get(c, crashKey, crash); err != nil { 1290 return nil, fmt.Errorf("failed to get crash: %w", err) 1291 } 1292 bug := new(Bug) 1293 if err := db.Get(c, bugKey, bug); err != nil { 1294 return nil, fmt.Errorf("failed to load job parent bug: %w", err) 1295 } 1296 bugReporting := bugReportingByName(bug, job.Reporting) 1297 if bugReporting == nil { 1298 return nil, fmt.Errorf("job bug has no reporting %q", job.Reporting) 1299 } 1300 kernelRepo := kernelRepoInfo(c, build) 1301 rep := &dashapi.BugReport{ 1302 Type: job.Type.toDashapiReportType(), 1303 Config: reportingConfig, 1304 JobID: extJobID(jobKey), 1305 ExtID: job.ExtID, 1306 CC: append(job.CC, kernelRepo.CC.Always...), 1307 Log: crashLog, 1308 LogLink: externalLink(c, textCrashLog, job.CrashLog), 1309 Report: report, 1310 ReportLink: externalLink(c, textCrashReport, job.CrashReport), 1311 ReproCLink: externalLink(c, textReproC, crash.ReproC), 1312 ReproSyzLink: externalLink(c, textReproSyz, crash.ReproSyz), 1313 ReproOpts: crash.ReproOpts, 1314 MachineInfoLink: externalLink(c, textMachineInfo, crash.MachineInfo), 1315 CrashTitle: job.CrashTitle, 1316 Error: jobError, 1317 ErrorLink: externalLink(c, textError, job.Error), 1318 PatchLink: externalLink(c, textPatch, job.Patch), 1319 } 1320 if job.Type == JobBisectCause || job.Type == JobBisectFix { 1321 rep.Maintainers = append(crash.Maintainers, kernelRepo.CC.Maintainers...) 1322 rep.ExtID = bugReporting.ExtID 1323 if bugReporting.CC != "" { 1324 rep.CC = strings.Split(bugReporting.CC, "|") 1325 } 1326 var emails []string 1327 switch job.Type { 1328 case JobBisectCause: 1329 rep.BisectCause, emails = bisectFromJob(c, job) 1330 case JobBisectFix: 1331 rep.BisectFix, emails = bisectFromJob(c, job) 1332 } 1333 rep.Maintainers = append(rep.Maintainers, emails...) 1334 } 1335 if mgr := bug.managerConfig(c); mgr != nil { 1336 rep.CC = append(rep.CC, mgr.CC.Always...) 1337 if job.Type == JobBisectCause || job.Type == JobBisectFix { 1338 rep.Maintainers = append(rep.Maintainers, mgr.CC.Maintainers...) 1339 } 1340 } 1341 // Build error output and failing VM boot log can be way too long to inline. 1342 if len(rep.Error) > maxInlineError { 1343 rep.Error = rep.Error[len(rep.Error)-maxInlineError:] 1344 rep.ErrorTruncated = true 1345 } 1346 if err := fillBugReport(c, rep, bug, bugReporting, build); err != nil { 1347 return nil, err 1348 } 1349 return rep, nil 1350 } 1351 1352 func bisectFromJob(c context.Context, job *Job) (*dashapi.BisectResult, []string) { 1353 bisect := &dashapi.BisectResult{ 1354 LogLink: externalLink(c, textLog, job.Log), 1355 CrashLogLink: externalLink(c, textCrashLog, job.CrashLog), 1356 CrashReportLink: externalLink(c, textCrashReport, job.CrashReport), 1357 Fix: job.Type == JobBisectFix, 1358 CrossTree: job.IsCrossTree(), 1359 } 1360 for _, com := range job.Commits { 1361 bisect.Commits = append(bisect.Commits, com.toDashapi()) 1362 } 1363 var newEmails []string 1364 if len(bisect.Commits) == 1 { 1365 bisect.Commit = bisect.Commits[0] 1366 bisect.Commits = nil 1367 com := job.Commits[0] 1368 newEmails = []string{com.Author} 1369 newEmails = append(newEmails, strings.Split(com.CC, "|")...) 1370 } 1371 if job.BackportedCommit.Title != "" { 1372 bisect.Backported = job.BackportedCommit.toDashapi() 1373 } 1374 return bisect, newEmails 1375 } 1376 1377 func jobReported(c context.Context, jobID string) error { 1378 jobKey, err := jobID2Key(c, jobID) 1379 if err != nil { 1380 return err 1381 } 1382 now := timeNow(c) 1383 tx := func(c context.Context) error { 1384 job := new(Job) 1385 if err := db.Get(c, jobKey, job); err != nil { 1386 return fmt.Errorf("job %v: failed to get job: %w", jobID, err) 1387 } 1388 job.Reported = true 1389 // Auto-mark the bug as fixed by the result of fix bisection, 1390 // if the setting is enabled for the namespace. 1391 if job.Type == JobBisectFix && 1392 getNsConfig(c, job.Namespace).FixBisectionAutoClose && 1393 !job.IsCrossTree() && 1394 len(job.Commits) == 1 { 1395 bug := new(Bug) 1396 bugKey := jobKey.Parent() 1397 if err := db.Get(c, bugKey, bug); err != nil { 1398 return fmt.Errorf("failed to get bug: %w", err) 1399 } 1400 if bug.Status == BugStatusOpen && len(bug.Commits) == 0 { 1401 bug.updateCommits([]string{job.Commits[0].Title}, now) 1402 if _, err := db.Put(c, bugKey, bug); err != nil { 1403 return fmt.Errorf("failed to put bug: %w", err) 1404 } 1405 } 1406 } 1407 if _, err := db.Put(c, jobKey, job); err != nil { 1408 return fmt.Errorf("failed to put job: %w", err) 1409 } 1410 return nil 1411 } 1412 return db.RunInTransaction(c, tx, nil) 1413 } 1414 1415 func handleExternalTestRequest(c context.Context, req *dashapi.TestPatchRequest) error { 1416 bug, bugKey, err := findBugByReportingID(c, req.BugID) 1417 if err != nil { 1418 return fmt.Errorf("failed to find the bug: %w", err) 1419 } 1420 bugReporting, _ := bugReportingByID(bug, req.BugID) 1421 if bugReporting == nil { 1422 return fmt.Errorf("failed to find the bug reporting object") 1423 } 1424 crash, crashKey, err := findCrashForBug(c, bug) 1425 if err != nil { 1426 return fmt.Errorf("failed to find a crash: %w", err) 1427 } 1428 _, _, err = addTestJob(c, &testJobArgs{ 1429 crash: crash, 1430 crashKey: crashKey, 1431 testReqArgs: testReqArgs{ 1432 bug: bug, 1433 bugKey: bugKey, 1434 bugReporting: bugReporting, 1435 repo: req.Repo, 1436 branch: req.Branch, 1437 user: req.User, 1438 link: req.Link, 1439 patch: req.Patch, 1440 }, 1441 }) 1442 return err 1443 } 1444 1445 type jobSorter struct { 1446 jobs []*Job 1447 keys []*db.Key 1448 } 1449 1450 func (sorter *jobSorter) Len() int { return len(sorter.jobs) } 1451 func (sorter *jobSorter) Less(i, j int) bool { 1452 // Give priority to user-initiated jobs to reduce the perceived processing time. 1453 return sorter.jobs[i].User != "" && sorter.jobs[j].User == "" 1454 } 1455 func (sorter *jobSorter) Swap(i, j int) { 1456 sorter.jobs[i], sorter.jobs[j] = sorter.jobs[j], sorter.jobs[i] 1457 sorter.keys[i], sorter.keys[j] = sorter.keys[j], sorter.keys[i] 1458 } 1459 1460 func loadPendingJob(c context.Context, managers map[string]dashapi.ManagerJobs) (*Job, *db.Key, error) { 1461 var jobs []*Job 1462 keys, err := db.NewQuery("Job"). 1463 Filter("Finished=", time.Time{}). 1464 Filter("IsRunning=", false). 1465 Order("Attempts"). 1466 Order("Created"). 1467 GetAll(c, &jobs) 1468 if err != nil { 1469 return nil, nil, fmt.Errorf("failed to query jobs: %w", err) 1470 } 1471 sort.Stable(&jobSorter{jobs: jobs, keys: keys}) 1472 for i, job := range jobs { 1473 switch job.Type { 1474 case JobTestPatch: 1475 if !managers[job.Manager].TestPatches { 1476 continue 1477 } 1478 case JobBisectCause, JobBisectFix: 1479 if job.Type == JobBisectCause && !managers[job.Manager].BisectCause || 1480 job.Type == JobBisectFix && !managers[job.Manager].BisectFix { 1481 continue 1482 } 1483 // Don't retry bisection jobs too often. 1484 // This allows to have several syz-ci's doing bisection 1485 // and protects from bisection job crashing syz-ci. 1486 const bisectRepeat = 3 * 24 * time.Hour 1487 if timeSince(c, job.Created) < bisectRepeat || 1488 timeSince(c, job.LastStarted) < bisectRepeat { 1489 continue 1490 } 1491 default: 1492 return nil, nil, fmt.Errorf("bad job type %v", job.Type) 1493 } 1494 return job, keys[i], nil 1495 } 1496 return nil, nil, nil 1497 } 1498 1499 // activeManager determines the manager currently responsible for all bugs found by 1500 // the specified manager. 1501 func activeManager(c context.Context, manager, ns string) (string, *ConfigManager) { 1502 nsConfig := getNsConfig(c, ns) 1503 if mgr, ok := nsConfig.Managers[manager]; ok { 1504 if mgr.Decommissioned { 1505 newMgr := nsConfig.Managers[mgr.DelegatedTo] 1506 return mgr.DelegatedTo, &newMgr 1507 } 1508 return manager, &mgr 1509 } 1510 // This manager is not mentioned in the configuration, therefore it was 1511 // definitely not decommissioned. 1512 return manager, nil 1513 } 1514 1515 func extJobID(jobKey *db.Key) string { 1516 return fmt.Sprintf("%v|%v", jobKey.Parent().StringID(), jobKey.IntID()) 1517 } 1518 1519 func jobID2Key(c context.Context, id string) (*db.Key, error) { 1520 keyStr := strings.Split(id, "|") 1521 if len(keyStr) != 2 { 1522 return nil, fmt.Errorf("bad job id %q", id) 1523 } 1524 jobKeyID, err := strconv.ParseInt(keyStr[1], 10, 64) 1525 if err != nil { 1526 return nil, fmt.Errorf("bad job id %q", id) 1527 } 1528 bugKey := db.NewKey(c, "Bug", keyStr[0], 0, nil) 1529 jobKey := db.NewKey(c, "Job", "", jobKeyID, bugKey) 1530 return jobKey, nil 1531 } 1532 1533 func fetchJob(c context.Context, key string) (*Job, *db.Key, error) { 1534 jobKey, err := db.DecodeKey(key) 1535 if err != nil { 1536 return nil, nil, err 1537 } 1538 job := new(Job) 1539 if err := db.Get(c, jobKey, job); err != nil { 1540 return nil, nil, fmt.Errorf("failed to get job: %w", err) 1541 } 1542 return job, jobKey, nil 1543 } 1544 1545 func makeJobInfo(c context.Context, job *Job, jobKey *db.Key, bug *Bug, build *Build, 1546 crash *Crash) *dashapi.JobInfo { 1547 kernelRepo, kernelCommit := job.KernelRepo, job.KernelBranch 1548 if build != nil { 1549 kernelCommit = build.KernelCommit 1550 } 1551 info := &dashapi.JobInfo{ 1552 JobKey: jobKey.Encode(), 1553 Type: dashapi.JobType(job.Type), 1554 Flags: job.Flags, 1555 Created: job.Created, 1556 BugLink: bugLink(jobKey.Parent().StringID()), 1557 ExternalLink: job.Link, 1558 User: job.User, 1559 Reporting: job.Reporting, 1560 Namespace: job.Namespace, 1561 Manager: job.Manager, 1562 BugTitle: job.BugTitle, 1563 KernelRepo: job.KernelRepo, 1564 KernelBranch: job.KernelBranch, 1565 KernelAlias: kernelRepoInfoRaw(c, job.Namespace, job.KernelRepo, job.KernelBranch).Alias, 1566 KernelLink: vcs.CommitLink(job.KernelRepo, job.KernelBranch), 1567 KernelCommit: kernelCommit, 1568 KernelCommitLink: vcs.CommitLink(kernelRepo, kernelCommit), 1569 PatchLink: textLink(textPatch, job.Patch), 1570 Attempts: job.Attempts, 1571 Started: job.LastStarted, 1572 Finished: job.Finished, 1573 CrashTitle: job.CrashTitle, 1574 CrashLogLink: externalLink(c, textCrashLog, job.CrashLog), 1575 CrashReportLink: externalLink(c, textCrashReport, job.CrashReport), 1576 LogLink: externalLink(c, textLog, job.Log), 1577 ErrorLink: externalLink(c, textError, job.Error), 1578 Reported: job.Reported, 1579 InvalidatedBy: job.InvalidatedBy, 1580 TreeOrigin: job.TreeOrigin, 1581 OnMergeBase: job.MergeBaseRepo != "", 1582 } 1583 if !job.Finished.IsZero() { 1584 info.Duration = job.Finished.Sub(job.LastStarted) 1585 } 1586 if job.Type == JobBisectCause || job.Type == JobBisectFix { 1587 // We don't report these yet (or at all), see pollCompletedJobs. 1588 if len(job.Commits) != 1 || 1589 bug != nil && (len(bug.Commits) != 0 || bug.Status != BugStatusOpen) { 1590 info.Reported = true 1591 } 1592 } 1593 for _, com := range job.Commits { 1594 info.Commits = append(info.Commits, &dashapi.Commit{ 1595 Hash: com.Hash, 1596 Title: com.Title, 1597 Author: fmt.Sprintf("%v <%v>", com.AuthorName, com.Author), 1598 CC: strings.Split(com.CC, "|"), 1599 Date: com.Date, 1600 Link: vcs.CommitLink(kernelRepo, com.Hash), 1601 }) 1602 } 1603 if len(info.Commits) == 1 { 1604 info.Commit = info.Commits[0] 1605 info.Commits = nil 1606 } 1607 if crash != nil { 1608 info.ReproCLink = externalLink(c, textReproC, crash.ReproC) 1609 info.ReproSyzLink = externalLink(c, textReproSyz, crash.ReproSyz) 1610 } 1611 return info 1612 } 1613 1614 func uniqueBugs(c context.Context, inBugs []*Bug, inKeys []*db.Key) ([]*Bug, []*db.Key) { 1615 var bugs []*Bug 1616 var keys []*db.Key 1617 1618 dups := map[string]bool{} 1619 for i, bug := range inBugs { 1620 hash := bug.keyHash(c) 1621 if dups[hash] { 1622 continue 1623 } 1624 dups[hash] = true 1625 bugs = append(bugs, bug) 1626 keys = append(keys, inKeys[i]) 1627 } 1628 return bugs, keys 1629 } 1630 1631 func relevantBackportJobs(c context.Context) ( 1632 bugs []*Bug, jobs []*Job, jobKeys []*db.Key, err error) { 1633 allBugs, _, bugsErr := loadAllBugs(c, func(query *db.Query) *db.Query { 1634 return query.Filter("FixCandidateJob>", "").Filter("Status=", BugStatusOpen) 1635 }) 1636 if bugsErr != nil { 1637 err = bugsErr 1638 return 1639 } 1640 var allJobKeys []*db.Key 1641 for _, bug := range allBugs { 1642 jobKey, decodeErr := db.DecodeKey(bug.FixCandidateJob) 1643 if decodeErr != nil { 1644 err = decodeErr 1645 return 1646 } 1647 allJobKeys = append(allJobKeys, jobKey) 1648 } 1649 allJobs := make([]*Job, len(allJobKeys)) 1650 err = db.GetMulti(c, allJobKeys, allJobs) 1651 if err != nil { 1652 return 1653 } 1654 for i, job := range allJobs { 1655 // Some assertions just in case. 1656 jobKey := allJobKeys[i] 1657 if !job.IsCrossTree() { 1658 err = fmt.Errorf("job %s: expected to be cross-tree", jobKey) 1659 return 1660 } 1661 if len(job.Commits) != 1 || job.InvalidatedBy != "" || 1662 job.BackportedCommit.Title != "" { 1663 continue 1664 } 1665 bugs = append(bugs, allBugs[i]) 1666 jobs = append(jobs, job) 1667 jobKeys = append(jobKeys, jobKey) 1668 } 1669 return 1670 } 1671 1672 func updateBackportCommits(c context.Context, ns string, commits []dashapi.Commit) error { 1673 if len(commits) == 0 { 1674 return nil 1675 } 1676 perTitle := map[string]dashapi.Commit{} 1677 for _, commit := range commits { 1678 perTitle[commit.Title] = commit 1679 } 1680 bugs, jobs, jobKeys, err := relevantBackportJobs(c) 1681 if err != nil { 1682 return fmt.Errorf("failed to query backport jobs: %w", err) 1683 } 1684 for i, job := range jobs { 1685 rawCommit, ok := perTitle[job.Commits[0].Title] 1686 if !ok { 1687 continue 1688 } 1689 if bugs[i].Namespace != ns { 1690 continue 1691 } 1692 commit := Commit{ 1693 Hash: rawCommit.Hash, 1694 Title: rawCommit.Title, 1695 Author: rawCommit.Author, 1696 AuthorName: rawCommit.AuthorName, 1697 Date: rawCommit.Date, 1698 } 1699 err := commitBackported(c, jobKeys[i], commit) 1700 if err != nil { 1701 return fmt.Errorf("failed to update backport job: %w", err) 1702 } 1703 } 1704 return nil 1705 } 1706 1707 func commitBackported(c context.Context, jobKey *db.Key, commit Commit) error { 1708 tx := func(c context.Context) error { 1709 job := new(Job) 1710 if err := db.Get(c, jobKey, job); err != nil { 1711 return fmt.Errorf("failed to get job: %w", err) 1712 } 1713 if job.BackportedCommit.Title != "" { 1714 // Nothing to update. 1715 return nil 1716 } 1717 job.BackportedCommit = commit 1718 job.Reported = false 1719 if _, err := db.Put(c, jobKey, job); err != nil { 1720 return fmt.Errorf("failed to put job: %w", err) 1721 } 1722 return nil 1723 } 1724 return db.RunInTransaction(c, tx, &db.TransactionOptions{Attempts: 5}) 1725 } 1726 1727 type bugJobs struct { 1728 list []*bugJob 1729 } 1730 1731 type bugJob struct { 1732 bug *Bug 1733 job *Job 1734 key *db.Key 1735 crash *Crash 1736 crashKey *db.Key 1737 build *Build 1738 } 1739 1740 func queryBugJobs(c context.Context, bug *Bug, jobType JobType) (*bugJobs, error) { 1741 // Just in case. 1742 const limitJobs = 25 1743 var jobs []*Job 1744 jobKeys, err := db.NewQuery("Job"). 1745 Ancestor(bug.key(c)). 1746 Filter("Type=", jobType). 1747 Order("-Finished"). 1748 Limit(limitJobs). 1749 GetAll(c, &jobs) 1750 if err != nil { 1751 return nil, fmt.Errorf("failed to fetch bug jobs: %w", err) 1752 } 1753 bugKey := bug.key(c) 1754 ret := &bugJobs{} 1755 for i := range jobs { 1756 job := jobs[i] 1757 var crashKey *db.Key 1758 if job.CrashID != 0 { 1759 crashKey = db.NewKey(c, "Crash", "", job.CrashID, bugKey) 1760 } 1761 ret.list = append(ret.list, &bugJob{ 1762 bug: bug, 1763 job: job, 1764 key: jobKeys[i], 1765 crashKey: crashKey, 1766 }) 1767 } 1768 return ret, nil 1769 } 1770 1771 func queryBestBisection(c context.Context, bug *Bug, jobType JobType) (*bugJob, error) { 1772 jobs, err := queryBugJobs(c, bug, jobType) 1773 if err != nil { 1774 return nil, err 1775 } 1776 return jobs.bestBisection(), nil 1777 } 1778 1779 // Find the most representative bisection result. 1780 func (b *bugJobs) bestBisection() *bugJob { 1781 // Let's take the most recent finished one. 1782 for _, j := range b.list { 1783 if !j.job.IsFinished() { 1784 continue 1785 } 1786 if j.job.InvalidatedBy != "" { 1787 continue 1788 } 1789 if j.job.MergeBaseRepo != "" { 1790 // It was a cross-tree bisection. 1791 continue 1792 } 1793 return j 1794 } 1795 return nil 1796 } 1797 1798 // Find the most representative fix candidate bisection result. 1799 func (b *bugJobs) bestFixCandidate() *bugJob { 1800 // Let's take the most recent finished one. 1801 for _, j := range b.list { 1802 if !j.job.IsFinished() { 1803 continue 1804 } 1805 if j.job.InvalidatedBy != "" { 1806 continue 1807 } 1808 if !j.job.IsCrossTree() { 1809 continue 1810 } 1811 return j 1812 } 1813 return nil 1814 } 1815 1816 func (b *bugJobs) all() []*bugJob { 1817 return b.list 1818 } 1819 1820 func (j *bugJob) load(c context.Context) error { 1821 err := j.loadCrash(c) 1822 if err != nil { 1823 return fmt.Errorf("failed to load crash: %w", err) 1824 } 1825 return j.loadBuild(c) 1826 } 1827 1828 func (j *bugJob) loadCrash(c context.Context) error { 1829 if j.crash != nil { 1830 return nil 1831 } 1832 j.crash = new(Crash) 1833 return db.Get(c, j.crashKey, j.crash) 1834 } 1835 1836 func (j *bugJob) loadBuild(c context.Context) error { 1837 if j.build != nil { 1838 return nil 1839 } 1840 err := j.loadCrash(c) 1841 if err != nil { 1842 return fmt.Errorf("failed to load crash: %w", err) 1843 } 1844 j.build, err = loadBuild(c, j.bug.Namespace, j.crash.BuildID) 1845 if err != nil { 1846 return err 1847 } 1848 return nil 1849 }