github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/syz-ci/jobs.go (about) 1 // Copyright 2017 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package main 5 6 import ( 7 "bytes" 8 "context" 9 "errors" 10 "fmt" 11 "io" 12 "os" 13 "path/filepath" 14 "strings" 15 "sync" 16 "time" 17 18 "github.com/google/syzkaller/dashboard/dashapi" 19 "github.com/google/syzkaller/pkg/bisect" 20 "github.com/google/syzkaller/pkg/build" 21 "github.com/google/syzkaller/pkg/debugtracer" 22 "github.com/google/syzkaller/pkg/instance" 23 "github.com/google/syzkaller/pkg/log" 24 "github.com/google/syzkaller/pkg/mgrconfig" 25 "github.com/google/syzkaller/pkg/osutil" 26 "github.com/google/syzkaller/pkg/report" 27 "github.com/google/syzkaller/pkg/vcs" 28 "github.com/google/syzkaller/vm" 29 ) 30 31 type JobManager struct { 32 cfg *Config 33 dash *dashapi.Dashboard 34 managers []*Manager 35 parallelJobFilter *ManagerJobs 36 shutdownPending <-chan struct{} 37 } 38 39 type JobProcessor struct { 40 *JobManager 41 name string 42 instanceSuffix string 43 knownCommits map[string]bool 44 baseDir string 45 jobFilter *ManagerJobs 46 jobTicker <-chan time.Time 47 commitTicker <-chan time.Time 48 } 49 50 func newJobManager(cfg *Config, managers []*Manager, shutdownPending chan struct{}) (*JobManager, error) { 51 dash, err := dashapi.New(cfg.DashboardClient, cfg.DashboardAddr, cfg.DashboardKey) 52 if err != nil { 53 return nil, err 54 } 55 return &JobManager{ 56 cfg: cfg, 57 dash: dash, 58 managers: managers, 59 shutdownPending: shutdownPending, 60 // For now let's only parallelize patch testing requests. 61 parallelJobFilter: &ManagerJobs{TestPatches: true}, 62 }, nil 63 } 64 65 // startLoop starts a job loop in parallel. 66 func (jm *JobManager) startLoop(ctx context.Context, wg *sync.WaitGroup) { 67 wg.Add(1) 68 go func() { 69 defer wg.Done() 70 jm.loop(ctx) 71 }() 72 } 73 74 func (jm *JobManager) loop(ctx context.Context) { 75 if err := jm.resetJobs(); err != nil { 76 if jm.dash != nil { 77 jm.dash.LogError("syz-ci", "reset jobs failed: %v", err) 78 } 79 return 80 } 81 commitTicker := time.NewTicker(time.Duration(jm.cfg.CommitPollPeriod) * time.Second) 82 defer commitTicker.Stop() 83 jobTicker := time.NewTicker(time.Duration(jm.cfg.JobPollPeriod) * time.Second) 84 defer jobTicker.Stop() 85 var wg sync.WaitGroup 86 for main := true; ; main = false { 87 jp := &JobProcessor{ 88 JobManager: jm, 89 jobTicker: jobTicker.C, 90 } 91 if main { 92 jp.instanceSuffix = "-job" 93 jp.baseDir = osutil.Abs("jobs") 94 jp.commitTicker = commitTicker.C 95 jp.knownCommits = make(map[string]bool) 96 } else { 97 jp.instanceSuffix = "-job-parallel" 98 jp.baseDir = osutil.Abs("jobs-2") 99 jp.jobFilter = jm.parallelJobFilter 100 } 101 jp.name = fmt.Sprintf("%v%v", jm.cfg.Name, jp.instanceSuffix) 102 wg.Add(1) 103 go func() { 104 defer wg.Done() 105 jp.loop(ctx) 106 }() 107 if !main || !jm.needParallelProcessor() { 108 break 109 } 110 } 111 wg.Wait() 112 } 113 114 func (jm *JobManager) needParallelProcessor() bool { 115 if !jm.cfg.ParallelJobs { 116 return false 117 } 118 for _, mgr := range jm.managers { 119 if mgr.mgrcfg.Jobs.Filter(jm.parallelJobFilter).AnyEnabled() { 120 return true 121 } 122 } 123 return false 124 } 125 126 func (jm *JobManager) resetJobs() error { 127 managerNames := []string{} 128 for _, mgr := range jm.managers { 129 if mgr.mgrcfg.Jobs.AnyEnabled() { 130 managerNames = append(managerNames, mgr.name) 131 } 132 } 133 if len(managerNames) > 0 { 134 return jm.dash.JobReset(&dashapi.JobResetReq{Managers: managerNames}) 135 } 136 return nil 137 } 138 139 func (jp *JobProcessor) loop(ctx context.Context) { 140 jp.Logf(0, "job loop started") 141 loop: 142 for { 143 // Check jp.stop separately first, otherwise if stop signal arrives during a job execution, 144 // we can still grab the next job with 50% probability. 145 select { 146 case <-ctx.Done(): 147 break loop 148 default: 149 } 150 // Similar for commit polling: if we grab 2-3 bisect jobs in a row, 151 // it can delay commit polling by days. 152 select { 153 case <-jp.commitTicker: 154 jp.pollCommits() 155 default: 156 } 157 select { 158 case <-jp.jobTicker: 159 jp.pollJobs() 160 case <-jp.commitTicker: 161 jp.pollCommits() 162 case <-ctx.Done(): 163 break loop 164 } 165 } 166 jp.Logf(0, "job loop stopped") 167 } 168 169 func (jp *JobProcessor) pollCommits() { 170 for _, mgr := range jp.managers { 171 if !mgr.mgrcfg.Jobs.PollCommits { 172 continue 173 } 174 if err := jp.pollManagerCommits(mgr); err != nil { 175 jp.Errorf("failed to poll commits on %v: %v", mgr.name, err) 176 } 177 } 178 } 179 180 func brokenRepo(url string) bool { 181 // TODO(dvyukov): mmots contains weird squashed commits titled "linux-next" or "origin", 182 // which contain hundreds of other commits. This makes fix attribution totally broken. 183 return strings.Contains(url, "git.cmpxchg.org/linux-mmots") 184 } 185 186 func (jp *JobProcessor) pollManagerCommits(mgr *Manager) error { 187 resp, err := mgr.dash.CommitPoll() 188 if err != nil { 189 return err 190 } 191 jp.Logf(0, "polling commits for %v: repos %v, commits %v", mgr.name, len(resp.Repos), len(resp.Commits)) 192 if len(resp.Repos) == 0 { 193 return fmt.Errorf("no repos") 194 } 195 commits := make(map[string]*vcs.Commit) 196 for i, repo := range resp.Repos { 197 if brokenRepo(repo.URL) { 198 continue 199 } 200 if resp.ReportEmail != "" { 201 commits1, err := jp.pollRepo(mgr, repo.URL, repo.Branch, resp.ReportEmail) 202 if err != nil { 203 jp.Errorf("failed to poll %v %v: %v", repo.URL, repo.Branch, err) 204 continue 205 } 206 jp.Logf(1, "got %v commits from %v/%v repo", len(commits1), repo.URL, repo.Branch) 207 for _, com := range commits1 { 208 // Only the "main" repo is the source of true hashes. 209 if i != 0 { 210 com.Hash = "" 211 } 212 // Not overwrite existing commits, in particular commit from the main repo with hash. 213 if _, ok := commits[com.Title]; !ok && !jp.knownCommits[com.Title] && len(commits) < 100 { 214 commits[com.Title] = com 215 jp.knownCommits[com.Title] = true 216 } 217 } 218 } 219 if i == 0 && len(resp.Commits) != 0 { 220 commits1, err := jp.getCommitInfo(mgr, repo.URL, repo.Branch, resp.Commits) 221 if err != nil { 222 jp.Errorf("failed to poll %v %v: %v", repo.URL, repo.Branch, err) 223 continue 224 } 225 jp.Logf(1, "got %v commit infos from %v/%v repo", len(commits1), repo.URL, repo.Branch) 226 for _, com := range commits1 { 227 // GetCommitByTitle does not accept ReportEmail and does not return tags, 228 // so don't replace the existing commit. 229 if _, ok := commits[com.Title]; !ok { 230 commits[com.Title] = com 231 } 232 } 233 } 234 } 235 results := make([]dashapi.Commit, 0, len(commits)) 236 for _, com := range commits { 237 results = append(results, dashapi.Commit{ 238 Hash: com.Hash, 239 Title: com.Title, 240 Author: com.Author, 241 BugIDs: com.Tags, 242 Date: com.Date, 243 }) 244 } 245 return mgr.dash.UploadCommits(results) 246 } 247 248 func (jp *JobProcessor) pollRepo(mgr *Manager, URL, branch, reportEmail string) ([]*vcs.Commit, error) { 249 dir := filepath.Join(jp.baseDir, mgr.managercfg.TargetOS, "kernel") 250 repo, err := vcs.NewRepo(mgr.managercfg.TargetOS, mgr.managercfg.Type, dir) 251 if err != nil { 252 return nil, fmt.Errorf("failed to create kernel repo: %w", err) 253 } 254 if _, err = repo.CheckoutBranch(URL, branch); err != nil { 255 return nil, fmt.Errorf("failed to checkout kernel repo %v/%v: %w", URL, branch, err) 256 } 257 return repo.ExtractFixTagsFromCommits("HEAD", reportEmail) 258 } 259 260 func (jp *JobProcessor) getCommitInfo(mgr *Manager, URL, branch string, commits []string) ([]*vcs.Commit, error) { 261 dir := filepath.Join(jp.baseDir, mgr.managercfg.TargetOS, "kernel") 262 repo, err := vcs.NewRepo(mgr.managercfg.TargetOS, mgr.managercfg.Type, dir) 263 if err != nil { 264 return nil, fmt.Errorf("failed to create kernel repo: %w", err) 265 } 266 if _, err = repo.CheckoutBranch(URL, branch); err != nil { 267 return nil, fmt.Errorf("failed to checkout kernel repo %v/%v: %w", URL, branch, err) 268 } 269 results, missing, err := repo.GetCommitsByTitles(commits) 270 if err != nil { 271 return nil, err 272 } 273 for _, title := range missing { 274 jp.Logf(0, "did not find commit %q in kernel repo %v/%v", title, URL, branch) 275 } 276 return results, nil 277 } 278 279 func (jp *JobProcessor) pollJobs() { 280 poll := &dashapi.JobPollReq{ 281 Managers: make(map[string]dashapi.ManagerJobs), 282 } 283 for _, mgr := range jp.managers { 284 jobs := &mgr.mgrcfg.Jobs 285 if jp.jobFilter != nil { 286 jobs = jobs.Filter(jp.jobFilter) 287 } 288 apiJobs := dashapi.ManagerJobs{ 289 TestPatches: jobs.TestPatches, 290 BisectCause: jobs.BisectCause, 291 BisectFix: jobs.BisectFix, 292 } 293 if apiJobs.Any() { 294 poll.Managers[mgr.name] = apiJobs 295 } 296 } 297 if len(poll.Managers) == 0 { 298 return 299 } 300 req, err := jp.dash.JobPoll(poll) 301 if err != nil { 302 jp.Errorf("failed to poll jobs: %v", err) 303 return 304 } 305 if req.ID == "" { 306 return 307 } 308 var mgr *Manager 309 for _, m := range jp.managers { 310 if m.name == req.Manager { 311 mgr = m 312 break 313 } 314 } 315 if mgr == nil { 316 jp.Errorf("got job for unknown manager: %v", req.Manager) 317 return 318 } 319 job := &Job{ 320 req: req, 321 mgr: mgr, 322 } 323 jp.processJob(job) 324 } 325 326 func (jp *JobProcessor) processJob(job *Job) { 327 req := job.req 328 jp.Logf(0, "starting job %v type %v for manager %v on %v/%v", 329 req.ID, req.Type, req.Manager, req.KernelRepo, req.KernelBranch) 330 resp := jp.process(job) 331 jp.Logf(0, "done job %v: commit %v, crash %q, error: %s", 332 resp.ID, resp.Build.KernelCommit, resp.CrashTitle, resp.Error) 333 select { 334 case <-jp.shutdownPending: 335 if len(resp.Error) != 0 { 336 // Ctrl+C can kill a child process which will cause an error. 337 jp.Logf(0, "ignoring error: shutdown pending") 338 return 339 } 340 default: 341 } 342 if err := jp.dash.JobDone(resp); err != nil { 343 jp.Errorf("failed to mark job as done: %v", err) 344 return 345 } 346 } 347 348 type Job struct { 349 req *dashapi.JobPollResp 350 resp *dashapi.JobDoneReq 351 mgr *Manager 352 } 353 354 func (jp *JobProcessor) process(job *Job) *dashapi.JobDoneReq { 355 req, mgr := job.req, job.mgr 356 357 dir := filepath.Join(jp.baseDir, mgr.managercfg.TargetOS) 358 mgrcfg := new(mgrconfig.Config) 359 *mgrcfg = *mgr.managercfg 360 mgrcfg.Workdir = filepath.Join(dir, "workdir") 361 mgrcfg.KernelSrc = filepath.Join(dir, "kernel", mgr.mgrcfg.KernelSrcSuffix) 362 mgrcfg.Syzkaller = filepath.Join(dir, "gopath", "src", "github.com", "google", "syzkaller") 363 os.RemoveAll(mgrcfg.Workdir) 364 defer os.RemoveAll(mgrcfg.Workdir) 365 366 resp := &dashapi.JobDoneReq{ 367 ID: req.ID, 368 Build: dashapi.Build{ 369 Manager: mgr.name, 370 ID: req.ID, 371 OS: mgr.managercfg.TargetOS, 372 Arch: mgr.managercfg.TargetArch, 373 VMArch: mgr.managercfg.TargetVMArch, 374 SyzkallerCommit: req.SyzkallerCommit, 375 }, 376 } 377 job.resp = resp 378 resp.Build.KernelRepo = req.KernelRepo 379 resp.Build.KernelBranch = req.KernelBranch 380 resp.Build.KernelConfig = req.KernelConfig 381 switch req.Type { 382 case dashapi.JobTestPatch: 383 resp.Build.KernelCommit = "[unknown]" 384 mgrcfg.Name += "-test" + jp.instanceSuffix 385 case dashapi.JobBisectCause, dashapi.JobBisectFix: 386 resp.Build.KernelCommit = req.KernelCommit 387 resp.Build.KernelCommitTitle = req.KernelCommitTitle 388 mgrcfg.Name += "-bisect" + jp.instanceSuffix 389 default: 390 err := fmt.Errorf("bad job type %v", req.Type) 391 job.resp.Error = []byte(err.Error()) 392 jp.Errorf("%s", err) 393 return job.resp 394 } 395 if req.KernelRepo == "" { 396 req.KernelRepo = mgr.mgrcfg.Repo 397 req.KernelBranch = mgr.mgrcfg.Branch 398 } 399 required := []struct { 400 name string 401 ok bool 402 }{ 403 {"kernel repository", req.KernelRepo != "" || req.Type != dashapi.JobTestPatch}, 404 {"kernel branch", req.KernelBranch != "" || req.Type != dashapi.JobTestPatch}, 405 {"kernel config", len(req.KernelConfig) != 0}, 406 {"syzkaller commit", req.SyzkallerCommit != ""}, 407 // We either want a normal repro (with options and syz repro text) 408 // or it's a boot time bug, in which case both are empty. 409 {"reproducer consistency", (len(req.ReproOpts) != 0 && len(req.ReproSyz) != 0) || 410 (len(req.ReproOpts) == 0 && len(req.ReproSyz) == 0)}, 411 } 412 for _, req := range required { 413 if !req.ok { 414 job.resp.Error = []byte(req.name + " is invalid") 415 jp.Errorf("%s (job id=%q, type=%v)", job.resp.Error, job.req.ID, job.req.Type) 416 return job.resp 417 } 418 } 419 if typ := mgr.managercfg.Type; !vm.AllowsOvercommit(typ) { 420 job.resp.Error = []byte(fmt.Sprintf("testing is not yet supported for %v machine type.", typ)) 421 jp.Errorf("%s", job.resp.Error) 422 return job.resp 423 } 424 425 var err error 426 switch req.Type { 427 case dashapi.JobTestPatch: 428 err = jp.testPatch(job, mgrcfg) 429 case dashapi.JobBisectCause, dashapi.JobBisectFix: 430 err = jp.bisect(job, mgrcfg) 431 } 432 if err != nil { 433 job.resp.Error = []byte(err.Error()) 434 } 435 return job.resp 436 } 437 438 func (jp *JobProcessor) bisect(job *Job, mgrcfg *mgrconfig.Config) error { 439 req, resp, mgr := job.req, job.resp, job.mgr 440 441 // Hack: if the manager has only, say, 5 VMs, but bisect wants 10, try to override number of VMs to 10. 442 // OverrideVMCount is opportunistic and should do it only if it's safe. 443 if err := instance.OverrideVMCount(mgrcfg, bisect.MaxNumTests); err != nil { 444 return err 445 } 446 447 var baseline []byte 448 // Read possible baseline for config minimization. 449 if mgr.mgrcfg.KernelBaselineConfig != "" { 450 var err error 451 baseline, err = os.ReadFile(mgr.mgrcfg.KernelBaselineConfig) 452 if err != nil { 453 return fmt.Errorf("failed to read baseline config: %w", err) 454 } 455 } 456 err := jp.prepareBisectionRepo(mgrcfg, req) 457 if err != nil { 458 return err 459 } 460 trace := new(bytes.Buffer) 461 cfg := &bisect.Config{ 462 Trace: &debugtracer.GenericTracer{ 463 TraceWriter: io.MultiWriter(trace, log.VerboseWriter(3)), 464 OutDir: osutil.Abs(filepath.Join("jobs", "debug", strings.ReplaceAll(req.ID, "|", "_"))), 465 }, 466 // Out of 1049 cause bisections that we have now: 467 // - 891 finished under 6h (84.9%) 468 // - 957 finished under 8h (91.2%) 469 // - 980 finished under 10h (93.4%) 470 // - 989 finished under 12h (94.3%) 471 // - 1011 finished under 18h (96.3%) 472 // - 1025 finished under 24h (97.7%) 473 // There is also a significant increase in errors/inconclusive bisections after ~8h. 474 // Out of 4075 fix bisections: 475 // - 4015 finished under 6h (98.5%) 476 // - 4020 finished under 8h (98.7%) 477 // - 4026 finished under 10h (98.8%) 478 // - 4032 finished under 12h (98.9%) 479 // Significant increase in errors starts after ~12h. 480 // Bisection jobs are now executed in parallel to patch testing, so it doesn't destroy user experience. 481 // Let's set the timeout to 12h. 482 Timeout: 12 * time.Hour, 483 Fix: req.Type == dashapi.JobBisectFix, 484 DefaultCompiler: mgr.mgrcfg.Compiler, 485 CompilerType: mgr.mgrcfg.CompilerType, 486 BinDir: jp.cfg.BisectBinDir, 487 Linker: mgr.mgrcfg.Linker, 488 Ccache: jp.cfg.Ccache, 489 BuildCPUs: jp.cfg.BuildCPUs, 490 Kernel: bisect.KernelConfig{ 491 Repo: req.KernelRepo, 492 Branch: req.KernelBranch, 493 Commit: req.KernelCommit, 494 CommitTitle: req.KernelCommitTitle, 495 Cmdline: mgr.mgrcfg.KernelCmdline, 496 Sysctl: mgr.mgrcfg.KernelSysctl, 497 Config: req.KernelConfig, 498 BaselineConfig: baseline, 499 Userspace: mgr.mgrcfg.Userspace, 500 Backports: mgr.backportCommits(), 501 }, 502 Syzkaller: bisect.SyzkallerConfig{ 503 Repo: jp.cfg.SyzkallerRepo, 504 Commit: req.SyzkallerCommit, 505 }, 506 Repro: bisect.ReproConfig{ 507 Opts: req.ReproOpts, 508 Syz: req.ReproSyz, 509 C: req.ReproC, 510 }, 511 CrossTree: req.MergeBaseRepo != "", 512 Manager: mgrcfg, 513 BuildSemaphore: buildSem, 514 TestSemaphore: testSem, 515 } 516 517 res, err := bisect.Run(cfg) 518 resp.Log = trace.Bytes() 519 if err != nil { 520 var infraErr *build.InfraError 521 if errors.As(err, &infraErr) { 522 resp.Flags |= dashapi.BisectResultInfraError 523 } 524 return err 525 } 526 for _, com := range res.Commits { 527 resp.Commits = append(resp.Commits, dashapi.Commit{ 528 Hash: com.Hash, 529 Title: com.Title, 530 Author: com.Author, 531 AuthorName: com.AuthorName, 532 Recipients: com.Recipients.ToDash(), 533 Date: com.Date, 534 }) 535 } 536 if len(res.Commits) == 1 { 537 if len(res.Commits[0].Parents) > 1 { 538 resp.Flags |= dashapi.BisectResultMerge 539 } 540 if res.NoopChange { 541 resp.Flags |= dashapi.BisectResultNoop 542 } 543 if res.IsRelease { 544 resp.Flags |= dashapi.BisectResultRelease 545 } 546 const confidenceCutOff = 0.66 547 if res.Confidence < confidenceCutOff { 548 resp.Flags |= dashapi.BisectResultIgnore 549 } 550 if jp.ignoreBisectCommit(res.Commits[0]) { 551 resp.Flags |= dashapi.BisectResultIgnore 552 } 553 } 554 if res.Report != nil { 555 resp.CrashTitle = res.Report.Title 556 resp.CrashAltTitles = res.Report.AltTitles 557 resp.CrashReport = res.Report.Report 558 resp.CrashLog = res.Report.Output 559 if len(resp.Commits) != 0 { 560 resp.Commits[0].Recipients = append(resp.Commits[0].Recipients, res.Report.Recipients.ToDash()...) 561 } else { 562 // If there is a report and there is no commit, it means a crash 563 // occurred on HEAD(for BisectFix) and oldest tested release(for BisectCause). 564 resp.Build.KernelCommit = res.Commit.Hash 565 resp.Build.KernelCommitDate = res.Commit.CommitDate 566 resp.Build.KernelCommitTitle = res.Commit.Title 567 } 568 } 569 return nil 570 } 571 572 var ignoredCommits = []string{ 573 // Commit "usb: gadget: add raw-gadget interface" adds a kernel interface for 574 // triggering USB bugs, which ends up being the guilty commit during bisection 575 // for USB bugs introduced before it. 576 "f2c2e717642c66f7fe7e5dd69b2e8ff5849f4d10", 577 // Commit "devlink: bump the instance index directly when iterating" has likely 578 // fixed some frequent task hung, which skews fix bisection results. 579 // TODO: consider backporting it during bisection itself. 580 "d772781964415c63759572b917e21c4f7ec08d9f", 581 } 582 583 func (jp *JobProcessor) ignoreBisectCommit(commit *vcs.Commit) bool { 584 // First look at the always ignored values. 585 for _, hash := range ignoredCommits { 586 if commit.Hash == hash { 587 return true 588 } 589 } 590 _, ok := jp.cfg.BisectIgnore[commit.Hash] 591 return ok 592 } 593 594 func (jp *JobProcessor) testPatch(job *Job, mgrcfg *mgrconfig.Config) error { 595 req, resp, mgr := job.req, job.resp, job.mgr 596 env, err := instance.NewEnv(mgrcfg, buildSem, testSem) 597 if err != nil { 598 return err 599 } 600 jp.Logf(0, "building syzkaller on %v...", req.SyzkallerCommit) 601 syzBuildLog, syzBuildErr := env.BuildSyzkaller(jp.cfg.SyzkallerRepo, req.SyzkallerCommit) 602 if syzBuildErr != nil { 603 return syzBuildErr 604 } 605 jp.Logf(0, "fetching kernel...") 606 repo, err := vcs.NewRepo(mgrcfg.TargetOS, mgrcfg.Type, mgrcfg.KernelSrc) 607 if err != nil { 608 return fmt.Errorf("failed to create kernel repo: %w", err) 609 } 610 kernelCommit, err := jp.checkoutJobCommit(job, repo) 611 if err != nil { 612 return err 613 } 614 resp.Build.KernelCommit = kernelCommit.Hash 615 resp.Build.KernelCommitTitle = kernelCommit.Title 616 resp.Build.KernelCommitDate = kernelCommit.CommitDate 617 618 buildCfg := &instance.BuildKernelConfig{ 619 CompilerBin: mgr.mgrcfg.Compiler, 620 MakeBin: mgr.mgrcfg.Make, 621 LinkerBin: mgr.mgrcfg.Linker, 622 CcacheBin: mgr.mgrcfg.Ccache, 623 UserspaceDir: mgr.mgrcfg.Userspace, 624 CmdlineFile: mgr.mgrcfg.KernelCmdline, 625 SysctlFile: mgr.mgrcfg.KernelSysctl, 626 KernelConfig: req.KernelConfig, 627 } 628 if err := env.CleanKernel(buildCfg); err != nil { 629 return fmt.Errorf("kernel clean failed: %w", err) 630 } 631 if len(req.Patch) != 0 { 632 if err := vcs.Patch(mgrcfg.KernelSrc, req.Patch); err != nil { 633 return err 634 } 635 } 636 637 // Disable CONFIG_DEBUG_INFO_BTF in the config. 638 // DEBUG_INFO_BTF requires a very new pahole binary, which we don't have on syzbot instances. 639 // Currently we don't enable DEBUG_INFO_BTF, but we have some old bugs with DEBUG_INFO_BTF enabled 640 // (at the time requirements for pahole binary were lower, or maybe the config silently disabled itself). 641 // Testing of patches for these bugs fail now because of the config, so we disable it as a work-around. 642 // Ideally we have a new pahole and then we can remove this hack. That's issue #2096. 643 // pkg/vcs/linux.go also disables it for the bisection process. 644 req.KernelConfig = bytes.ReplaceAll(req.KernelConfig, 645 []byte("CONFIG_DEBUG_INFO_BTF=y"), 646 []byte("# CONFIG_DEBUG_INFO_BTF is not set")) 647 648 log.Logf(0, "job: building kernel...") 649 kernelConfig, details, err := env.BuildKernel(buildCfg) 650 resp.Build.CompilerID = details.CompilerID 651 if err != nil { 652 return err 653 } 654 if kernelConfig != "" { 655 resp.Build.KernelConfig, err = os.ReadFile(kernelConfig) 656 if err != nil { 657 return fmt.Errorf("failed to read config file: %w", err) 658 } 659 } 660 jp.Logf(0, "job: testing...") 661 results, err := env.Test(3, req.ReproSyz, req.ReproOpts, req.ReproC) 662 if err != nil { 663 return fmt.Errorf("%w\n\nsyzkaller build log:\n%s", err, syzBuildLog) 664 } 665 ret, err := aggregateTestResults(results) 666 if err != nil { 667 return fmt.Errorf("%w\n\nsyzkaller build log:\n%s", err, syzBuildLog) 668 } 669 rep := ret.report 670 if rep != nil { 671 resp.CrashTitle = rep.Title 672 resp.CrashAltTitles = rep.AltTitles 673 resp.CrashReport = rep.Report 674 } 675 resp.CrashLog = ret.rawOutput 676 return nil 677 } 678 679 func (jp *JobProcessor) prepareBisectionRepo(mgrcfg *mgrconfig.Config, req *dashapi.JobPollResp) error { 680 if req.MergeBaseRepo == "" { 681 // No need to. 682 return nil 683 } 684 repo, err := vcs.NewRepo(mgrcfg.TargetOS, mgrcfg.Type, mgrcfg.KernelSrc) 685 if err != nil { 686 return fmt.Errorf("failed to create kernel repo: %w", err) 687 } 688 _, err = checkoutKernelOrCommit(repo, req.MergeBaseRepo, req.MergeBaseBranch) 689 if err != nil { 690 return fmt.Errorf("failed to checkout the merge base repo %v on %v: %w", 691 req.MergeBaseRepo, req.MergeBaseBranch, err) 692 } 693 return nil 694 } 695 696 func (jp *JobProcessor) checkoutJobCommit(job *Job, repo vcs.Repo) (*vcs.Commit, error) { 697 req, resp := job.req, job.resp 698 var kernelCommit *vcs.Commit 699 if req.MergeBaseRepo != "" { 700 jp.Logf(1, "checking out the base kernel...") 701 firstCommit, err := checkoutKernelOrCommit(repo, req.KernelRepo, req.KernelBranch) 702 if err != nil { 703 return nil, fmt.Errorf("failed to checkout first kernel repo %v on %v: %w", 704 req.KernelRepo, req.KernelBranch, err) 705 } 706 secondCommit, err := checkoutKernelOrCommit(repo, req.MergeBaseRepo, req.MergeBaseBranch) 707 if err != nil { 708 return nil, fmt.Errorf("failed to checkout second kernel repo %v on %v: %w", 709 req.MergeBaseRepo, req.MergeBaseBranch, err) 710 } 711 bases, err := repo.MergeBases(firstCommit.Hash, secondCommit.Hash) 712 if err != nil { 713 return nil, fmt.Errorf("failed to calculate merge bases between %v and %v: %w", 714 firstCommit.Hash, secondCommit.Hash, err) 715 } 716 if len(bases) != 1 { 717 return nil, fmt.Errorf("expected one merge base between %v and %v, got %d", 718 firstCommit.Hash, secondCommit.Hash, len(bases)) 719 } 720 kernelCommit, err = repo.CheckoutCommit(req.KernelRepo, bases[0].Hash) 721 if err != nil { 722 return nil, fmt.Errorf("failed to checkout kernel repo %v on merge base %v: %w", 723 req.KernelRepo, bases[0].Hash, err) 724 } 725 resp.Build.KernelBranch = "" 726 } else if vcs.CheckCommitHash(req.KernelBranch) { 727 var err error 728 kernelCommit, err = repo.CheckoutCommit(req.KernelRepo, req.KernelBranch) 729 if err != nil { 730 return nil, fmt.Errorf("failed to checkout kernel repo %v on commit %v: %w", 731 req.KernelRepo, req.KernelBranch, err) 732 } 733 resp.Build.KernelBranch = "" 734 } else { 735 var err error 736 kernelCommit, err = repo.CheckoutBranch(req.KernelRepo, req.KernelBranch) 737 if err != nil { 738 return nil, fmt.Errorf("failed to checkout kernel repo %v/%v: %w", 739 req.KernelRepo, req.KernelBranch, err) 740 } 741 } 742 return kernelCommit, nil 743 } 744 745 func checkoutKernelOrCommit(repo vcs.Repo, url, branch string) (*vcs.Commit, error) { 746 if vcs.CheckCommitHash(branch) { 747 return repo.CheckoutCommit(url, branch) 748 } 749 return repo.CheckoutBranch(url, branch) 750 } 751 752 type patchTestResult struct { 753 report *report.Report 754 rawOutput []byte 755 } 756 757 func aggregateTestResults(results []instance.EnvTestResult) (*patchTestResult, error) { 758 // We can have transient errors and other errors of different types. 759 // We need to avoid reporting transient "failed to boot" or "failed to copy binary" errors. 760 // If any of the instances crash during testing, we report this with the highest priority. 761 // Then if any of the runs succeed, we report that (to avoid transient errors). 762 // If all instances failed to boot, then we report one of these errors. 763 var anyErr, testErr error 764 var resReport, resSuccess *patchTestResult 765 anyErr = fmt.Errorf("no env test runs") 766 for _, res := range results { 767 if res.Error == nil { 768 resSuccess = &patchTestResult{rawOutput: res.RawOutput} 769 continue 770 } 771 anyErr = res.Error 772 var testError *instance.TestError 773 var crashError *instance.CrashError 774 switch { 775 case errors.As(res.Error, &testError): 776 // We should not put rep into resp.CrashTitle/CrashReport, 777 // because that will be treated as patch not fixing the bug. 778 if rep := testError.Report; rep != nil { 779 testErr = fmt.Errorf("%v\n\n%s\n\n%s", rep.Title, rep.Report, rep.Output) 780 } else { 781 testErr = fmt.Errorf("%v\n\n%s", testError.Title, testError.Output) 782 } 783 case errors.As(res.Error, &crashError): 784 if resReport == nil || (len(resReport.report.Report) == 0 && len(crashError.Report.Report) != 0) { 785 resReport = &patchTestResult{report: crashError.Report, rawOutput: res.RawOutput} 786 } 787 } 788 } 789 if resReport != nil { 790 return resReport, nil 791 } 792 if resSuccess != nil { 793 return resSuccess, nil 794 } 795 if testErr != nil { 796 return nil, testErr 797 } 798 return nil, anyErr 799 } 800 801 func (jp *JobProcessor) Logf(level int, msg string, args ...interface{}) { 802 log.Logf(level, "%s: "+msg, append([]interface{}{jp.name}, args...)...) 803 } 804 805 // Errorf logs non-fatal error and sends it to dashboard. 806 func (jp *JobProcessor) Errorf(msg string, args ...interface{}) { 807 log.Errorf("job: "+msg, args...) 808 if jp.dash != nil { 809 jp.dash.LogError(jp.name, msg, args...) 810 } 811 }