github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/bisect/bisect.go (about) 1 // Copyright 2018 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package bisect 5 6 import ( 7 "errors" 8 "fmt" 9 "math" 10 "os" 11 "sort" 12 "time" 13 14 "github.com/google/syzkaller/pkg/build" 15 "github.com/google/syzkaller/pkg/debugtracer" 16 "github.com/google/syzkaller/pkg/hash" 17 "github.com/google/syzkaller/pkg/instance" 18 "github.com/google/syzkaller/pkg/mgrconfig" 19 "github.com/google/syzkaller/pkg/osutil" 20 "github.com/google/syzkaller/pkg/report" 21 "github.com/google/syzkaller/pkg/report/crash" 22 "github.com/google/syzkaller/pkg/vcs" 23 ) 24 25 type Config struct { 26 Trace debugtracer.DebugTracer 27 Fix bool 28 DefaultCompiler string 29 CompilerType string 30 Linker string 31 BinDir string 32 Ccache string 33 Timeout time.Duration 34 Kernel KernelConfig 35 Syzkaller SyzkallerConfig 36 Repro ReproConfig 37 Manager *mgrconfig.Config 38 BuildSemaphore *instance.Semaphore 39 TestSemaphore *instance.Semaphore 40 // CrossTree specifies whether a cross tree bisection is to take place, i.e. 41 // Kernel.Commit is not reachable from Kernel.Branch. 42 // In this case, bisection starts from their merge base. 43 CrossTree bool 44 } 45 46 type KernelConfig struct { 47 Repo string 48 Branch string 49 Commit string 50 CommitTitle string 51 Cmdline string 52 Sysctl string 53 Config []byte 54 // Baseline configuration is used in commit bisection. If the crash doesn't reproduce 55 // with baseline configuratopm config bisection is run. When triggering configuration 56 // option is found provided baseline configuration is modified according the bisection 57 // results. This new configuration is tested once more with current head. If crash 58 // reproduces with the generated configuration original configuation is replaced with 59 // this minimized one. 60 BaselineConfig []byte 61 Userspace string 62 // Extra commits to cherry pick to older kernel revisions. 63 Backports []vcs.BackportCommit 64 } 65 66 type SyzkallerConfig struct { 67 Repo string 68 Commit string 69 Descriptions string 70 } 71 72 type ReproConfig struct { 73 Opts []byte 74 Syz []byte 75 C []byte 76 } 77 78 type env struct { 79 cfg *Config 80 repo vcs.Repo 81 bisecter vcs.Bisecter 82 minimizer vcs.ConfigMinimizer 83 commit *vcs.Commit 84 head *vcs.Commit 85 kernelConfig []byte 86 inst instance.Env 87 numTests int 88 startTime time.Time 89 buildTime time.Duration 90 testTime time.Duration 91 reportTypes []crash.Type 92 // The current estimate of the reproducer's kernel crashing probability. 93 reproChance float64 94 // The product of our confidence in every bisection step result. 95 confidence float64 96 // Whether we should do 2x more execution runs for every test step. 97 // We could have inferred this data from reproChance, but we want to be 98 // able to react faster to sudden drops of reproducibility than an estimate 99 // can allows us to. 100 flaky bool 101 // A cache of already performed revision tests. 102 results map[string]*testResult 103 } 104 105 const MaxNumTests = 20 // number of tests we do per commit 106 107 // Result describes bisection result: 108 // 1. if bisection is conclusive, the single cause/fix commit in Commits 109 // - for cause bisection report is the crash on the cause commit 110 // - for fix bisection report is nil 111 // - Commit is nil 112 // - NoopChange is set if the commit did not cause any change in the kernel binary 113 // (bisection result it most likely wrong) 114 // 115 // 2. Bisected to a release commit 116 // - if bisection is inconclusive, range of potential cause/fix commits in Commits 117 // - report is nil in such case 118 // 119 // 3. Commit is nil 120 // - if the crash still happens on the oldest release/HEAD (for cause/fix bisection correspondingly) 121 // - no commits in Commits 122 // - the crash report on the oldest release/HEAD; 123 // - Commit points to the oldest/latest commit where crash happens. 124 // 125 // 4. Config contains kernel config used for bisection. 126 type Result struct { 127 Commits []*vcs.Commit 128 Report *report.Report 129 Commit *vcs.Commit 130 Config []byte 131 NoopChange bool 132 IsRelease bool 133 Confidence float64 134 } 135 136 type InfraError struct { 137 Title string 138 } 139 140 func (e InfraError) Error() string { 141 return e.Title 142 } 143 144 // Run does the bisection and returns either the Result, 145 // or, if the crash is not reproduced on the start commit, an error. 146 func Run(cfg *Config) (*Result, error) { 147 if err := checkConfig(cfg); err != nil { 148 return nil, err 149 } 150 cfg.Manager.Cover = false // it's not supported somewhere back in time 151 repo, err := vcs.NewRepo(cfg.Manager.TargetOS, cfg.Manager.Type, cfg.Manager.KernelSrc) 152 if err != nil { 153 return nil, err 154 } 155 inst, err := instance.NewEnv(cfg.Manager, cfg.BuildSemaphore, cfg.TestSemaphore) 156 if err != nil { 157 return nil, err 158 } 159 if _, err = repo.CheckoutBranch(cfg.Kernel.Repo, cfg.Kernel.Branch); err != nil { 160 return nil, &InfraError{Title: fmt.Sprintf("%v", err)} 161 } 162 return runImpl(cfg, repo, inst) 163 } 164 165 func runImpl(cfg *Config, repo vcs.Repo, inst instance.Env) (*Result, error) { 166 bisecter, ok := repo.(vcs.Bisecter) 167 if !ok { 168 return nil, fmt.Errorf("bisection is not implemented for %v", cfg.Manager.TargetOS) 169 } 170 minimizer, ok := repo.(vcs.ConfigMinimizer) 171 if !ok && len(cfg.Kernel.BaselineConfig) != 0 { 172 return nil, fmt.Errorf("config minimization is not implemented for %v", cfg.Manager.TargetOS) 173 } 174 env := &env{ 175 cfg: cfg, 176 repo: repo, 177 bisecter: bisecter, 178 minimizer: minimizer, 179 inst: inst, 180 startTime: time.Now(), 181 confidence: 1.0, 182 } 183 head, err := repo.HeadCommit() 184 if err != nil { 185 return nil, err 186 } 187 defer env.repo.SwitchCommit(head.Hash) 188 env.head = head 189 hostname, err := os.Hostname() 190 if err != nil { 191 hostname = "unnamed host" 192 } 193 env.log("%s starts bisection %s", hostname, env.startTime.String()) 194 if cfg.Fix { 195 env.log("bisecting fixing commit since %v", cfg.Kernel.Commit) 196 } else { 197 env.log("bisecting cause commit starting from %v", cfg.Kernel.Commit) 198 } 199 start := time.Now() 200 res, err := env.bisect() 201 if env.flaky { 202 env.log("reproducer is flaky (%.2f repro chance estimate)", env.reproChance) 203 } 204 env.log("revisions tested: %v, total time: %v (build: %v, test: %v)", 205 env.numTests, time.Since(start), env.buildTime, env.testTime) 206 if err != nil { 207 env.log("error: %v", err) 208 return nil, err 209 } 210 if len(res.Commits) == 0 { 211 if cfg.Fix { 212 env.log("crash still not fixed or there were kernel test errors") 213 } else { 214 env.log("oldest tested release already had the bug or it had kernel test errors") 215 } 216 217 env.log("commit msg: %v", res.Commit.Title) 218 if res.Report != nil { 219 env.log("crash: %v\n%s", res.Report.Title, res.Report.Report) 220 } 221 return res, nil 222 } 223 what := "bad" 224 if cfg.Fix { 225 what = "good" 226 } 227 if len(res.Commits) > 1 { 228 env.log("bisection is inconclusive, the first %v commit could be any of:", what) 229 for _, com := range res.Commits { 230 env.log("%v", com.Hash) 231 } 232 return res, nil 233 } 234 com := res.Commits[0] 235 env.log("first %v commit: %v %v", what, com.Hash, com.Title) 236 env.log("recipients (to): %q", com.Recipients.GetEmails(vcs.To)) 237 env.log("recipients (cc): %q", com.Recipients.GetEmails(vcs.Cc)) 238 if res.Report != nil { 239 env.log("crash: %v\n%s", res.Report.Title, res.Report.Report) 240 } 241 return res, nil 242 } 243 244 func (env *env) bisect() (*Result, error) { 245 err := env.bisecter.PrepareBisect() 246 if err != nil { 247 return nil, err 248 } 249 250 cfg := env.cfg 251 if err := build.Clean(cfg.Manager.TargetOS, cfg.Manager.TargetVMArch, 252 cfg.Manager.Type, cfg.Manager.KernelSrc); err != nil { 253 return nil, fmt.Errorf("kernel clean failed: %w", err) 254 } 255 env.log("building syzkaller on %v", cfg.Syzkaller.Commit) 256 if _, err := env.inst.BuildSyzkaller(cfg.Syzkaller.Repo, cfg.Syzkaller.Commit); err != nil { 257 return nil, err 258 } 259 260 cfg.Kernel.Commit, err = env.identifyRewrittenCommit() 261 if err != nil { 262 return nil, err 263 } 264 com, err := env.repo.SwitchCommit(cfg.Kernel.Commit) 265 if err != nil { 266 return nil, err 267 } 268 269 env.log("ensuring issue is reproducible on original commit %v\n", cfg.Kernel.Commit) 270 env.commit = com 271 env.kernelConfig = cfg.Kernel.Config 272 testRes, err := env.test() 273 if err != nil { 274 return nil, err 275 } else if testRes.verdict != vcs.BisectBad { 276 return nil, fmt.Errorf("the crash wasn't reproduced on the original commit") 277 } 278 env.reportTypes = testRes.types 279 env.reproChance = testRes.badRatio 280 281 testRes1, err := env.minimizeConfig() 282 if err != nil { 283 return nil, fmt.Errorf("config minimization failed: %w", err) 284 } 285 if testRes1 != nil { 286 // If config minimization even partially succeeds, minimizeConfig() 287 // would return a non-nil value of a new report. 288 testRes = testRes1 289 // Overwrite bug's reproducibility - it may be different after config minimization. 290 env.reproChance = testRes.badRatio 291 } 292 293 bad, good, results1, fatalResult, err := env.commitRange() 294 if fatalResult != nil || err != nil { 295 return fatalResult, err 296 } 297 if env.cfg.Fix { 298 env.commit = good 299 } else { 300 env.commit = bad 301 } 302 env.results = map[string]*testResult{cfg.Kernel.Commit: testRes} 303 for _, res := range results1 { 304 env.results[res.com.Hash] = res 305 } 306 commits, err := env.bisecter.Bisect(bad.Hash, good.Hash, cfg.Trace, env.testPredicate) 307 if err != nil { 308 return nil, err 309 } 310 env.log("accumulated error probability: %0.2f", 1.0-env.confidence) 311 res := &Result{ 312 Commits: commits, 313 Config: env.kernelConfig, 314 Confidence: env.confidence, 315 } 316 if len(commits) == 1 { 317 com := commits[0] 318 testRes := env.results[com.Hash] 319 if testRes == nil { 320 return nil, fmt.Errorf("no result for culprit commit") 321 } 322 res.Report = testRes.rep 323 isRelease, err := env.bisecter.IsRelease(com.Hash) 324 if err != nil { 325 env.log("failed to detect release: %v", err) 326 } 327 res.IsRelease = isRelease 328 noopChange, err := env.detectNoopChange(com) 329 if err != nil { 330 env.log("failed to detect noop change: %v", err) 331 } 332 res.NoopChange = noopChange 333 } 334 return res, nil 335 } 336 337 func (env *env) identifyRewrittenCommit() (string, error) { 338 cfg := env.cfg 339 if cfg.Kernel.Commit != "" && cfg.CrossTree { 340 // If the failing commit is on another tree, just take it as is. 341 return cfg.Kernel.Commit, nil 342 } 343 _, err := env.repo.CheckoutBranch(cfg.Kernel.Repo, cfg.Kernel.Branch) 344 if err != nil { 345 return cfg.Kernel.Commit, err 346 } 347 contained, err := env.repo.Contains(cfg.Kernel.Commit) 348 if err != nil || contained { 349 return cfg.Kernel.Commit, err 350 } 351 352 if !cfg.Fix { 353 // If we're doing a cause bisection, we don't really need the commit to be 354 // reachable from cfg.Kernel.Branch. 355 // So let's try to force tag fetch and check if the commit is present in the 356 // repository. 357 env.log("fetch other tags and check if the commit is present") 358 commit, err := env.repo.CheckoutCommit(cfg.Kernel.Repo, cfg.Kernel.Commit) 359 if err != nil { 360 // Ignore the error because the command will fail if the commit is really not 361 // present in the tree. 362 env.log("fetch failed with %s", err) 363 } else if commit != nil { 364 return commit.Hash, nil 365 } 366 } 367 368 // We record the tested kernel commit when syzkaller triggers a crash. These commits can become 369 // unreachable after the crash was found, when the history of the tested kernel branch was 370 // rewritten. The commit might have been completely deleted from the branch or just changed in 371 // some way. Some branches like linux-next are often and heavily rewritten (aka rebased). 372 // This can also happen when changing the branch you fuzz in an existing syz-manager config. 373 // This makes sense when a downstream kernel fork rebased on top of a new upstream version and 374 // you don't want syzkaller to report all your old bugs again. 375 if cfg.Kernel.CommitTitle == "" { 376 // This can happen during a manual bisection, when only a hash is given. 377 return cfg.Kernel.Commit, fmt.Errorf( 378 "commit %v not reachable in branch '%v' and no commit title available", 379 cfg.Kernel.Commit, cfg.Kernel.Branch) 380 } 381 commit, err := env.repo.GetCommitByTitle(cfg.Kernel.CommitTitle) 382 if err != nil { 383 return cfg.Kernel.Commit, err 384 } 385 if commit == nil { 386 return cfg.Kernel.Commit, fmt.Errorf( 387 "commit %v not reachable in branch '%v'", cfg.Kernel.Commit, cfg.Kernel.Branch) 388 } 389 env.log("rewritten commit %v reidentified by title '%v'\n", commit.Hash, cfg.Kernel.CommitTitle) 390 return commit.Hash, nil 391 } 392 393 func (env *env) minimizeConfig() (*testResult, error) { 394 // Find minimal configuration based on baseline to reproduce the crash. 395 testResults := make(map[hash.Sig]*testResult) 396 predMinimize := func(test []byte) (vcs.BisectResult, error) { 397 env.kernelConfig = test 398 testRes, err := env.test() 399 if err != nil { 400 return 0, err 401 } 402 // We want either a > 33% repro probability or at least it should not be 403 // worse than for the non-minimized config. 404 const badRatioThreshold = 1.0 / 3.0 405 if testRes.verdict == vcs.BisectBad && 406 testRes.badRatio < badRatioThreshold && 407 testRes.badRatio < env.reproChance { 408 return vcs.BisectSkip, nil 409 } 410 if testRes.verdict == vcs.BisectBad { 411 // Only remember crashes. 412 testResults[hash.Hash(test)] = testRes 413 } 414 return testRes.verdict, err 415 } 416 minConfig, err := env.minimizer.Minimize(env.cfg.Manager.SysTarget, env.cfg.Kernel.Config, 417 env.cfg.Kernel.BaselineConfig, env.reportTypes, env.cfg.Trace, predMinimize) 418 if err != nil { 419 if errors.Is(err, vcs.ErrBadKconfig) { 420 env.log("config minimization failed due to bad Kconfig %v\nproceeding with the original config", err) 421 } else { 422 return nil, err 423 } 424 } 425 env.kernelConfig = minConfig 426 return testResults[hash.Hash(minConfig)], nil 427 } 428 429 func (env *env) detectNoopChange(com *vcs.Commit) (bool, error) { 430 testRes := env.results[com.Hash] 431 if testRes.kernelSign == "" || len(com.Parents) != 1 { 432 return false, nil 433 } 434 parent := com.Parents[0] 435 parentRes := env.results[parent] 436 if parentRes == nil { 437 env.log("parent commit %v wasn't tested", parent) 438 // We could not test the parent commit if it is not based on the previous release 439 // (instead based on an older release, i.e. a very old non-rebased commit 440 // merged into the current release). 441 // TODO: we can use a differnet compiler for this old commit 442 // since effectively it's in the older release, in that case we may not 443 // detect noop change anyway. 444 if _, err := env.repo.SwitchCommit(parent); err != nil { 445 return false, err 446 } 447 _, kernelSign, err := env.build() 448 if err != nil { 449 return false, err 450 } 451 parentRes = &testResult{kernelSign: kernelSign} 452 } 453 env.log("culprit signature: %v", testRes.kernelSign) 454 env.log("parent signature: %v", parentRes.kernelSign) 455 return testRes.kernelSign == parentRes.kernelSign, nil 456 } 457 458 func (env *env) commitRange() (*vcs.Commit, *vcs.Commit, []*testResult, *Result, error) { 459 rangeFunc := env.commitRangeForCause 460 if env.cfg.Fix { 461 rangeFunc = env.commitRangeForFix 462 } 463 464 bad, good, results1, err := rangeFunc() 465 if err != nil { 466 return bad, good, results1, nil, err 467 } 468 469 fatalResult, err := env.validateCommitRange(bad, good, results1) 470 return bad, good, results1, fatalResult, err 471 } 472 473 func (env *env) commitRangeForFix() (*vcs.Commit, *vcs.Commit, []*testResult, error) { 474 var results []*testResult 475 startCommit := env.commit 476 if env.cfg.CrossTree { 477 env.log("determining the merge base between %v and %v", 478 env.commit.Hash, env.head.Hash) 479 bases, err := env.repo.MergeBases(env.commit.Hash, env.head.Hash) 480 if err != nil { 481 return nil, nil, nil, err 482 } 483 if len(bases) != 1 { 484 env.log("expected 1 merge base, got %d", len(bases)) 485 return nil, nil, nil, fmt.Errorf("expected 1 merge base, got %d", len(bases)) 486 } 487 env.log("%s/%s is a merge base, check if it has the bug", bases[0].Hash, bases[0].Title) 488 startCommit = bases[0] 489 if _, err := env.repo.SwitchCommit(startCommit.Hash); err != nil { 490 return nil, nil, nil, err 491 } 492 res, err := env.test() 493 if err != nil { 494 return nil, nil, nil, err 495 } 496 results = append(results, res) 497 if res.verdict != vcs.BisectBad { 498 return nil, startCommit, results, nil 499 } 500 } 501 env.log("testing current HEAD %v", env.head.Hash) 502 if _, err := env.repo.SwitchCommit(env.head.Hash); err != nil { 503 return nil, nil, nil, err 504 } 505 res, err := env.test() 506 if err != nil { 507 return nil, nil, nil, err 508 } 509 results = append(results, res) 510 if res.verdict != vcs.BisectGood { 511 return env.head, nil, results, nil 512 } 513 return env.head, startCommit, results, nil 514 } 515 516 func (env *env) commitRangeForCause() (*vcs.Commit, *vcs.Commit, []*testResult, error) { 517 cfg := env.cfg 518 tags, err := env.bisecter.PreviousReleaseTags(cfg.Kernel.Commit, cfg.CompilerType) 519 if err != nil { 520 return nil, nil, nil, err 521 } 522 if len(tags) == 0 { 523 return nil, nil, nil, fmt.Errorf("no release tags before this commit") 524 } 525 pickedTags := pickReleaseTags(tags) 526 env.log("picked %v out of %d release tags", pickedTags, len(tags)) 527 528 lastBad := env.commit 529 var results []*testResult 530 for _, tag := range pickedTags { 531 env.log("testing release %v", tag) 532 com, err := env.repo.SwitchCommit(tag) 533 if err != nil { 534 return nil, nil, nil, err 535 } 536 res, err := env.test() 537 if err != nil { 538 return nil, nil, nil, err 539 } 540 results = append(results, res) 541 if res.verdict == vcs.BisectGood { 542 return lastBad, com, results, nil 543 } 544 if res.verdict == vcs.BisectBad { 545 lastBad = com 546 } 547 } 548 // All tags were vcs.BisectBad or vcs.BisectSkip. 549 return lastBad, nil, results, nil 550 } 551 552 func (env *env) validateCommitRange(bad, good *vcs.Commit, results []*testResult) (*Result, error) { 553 if len(results) < 1 { 554 return nil, fmt.Errorf("commitRange returned no results") 555 } 556 557 if env.cfg.Fix && env.cfg.CrossTree && len(results) < 2 { 558 // For cross-tree bisections, it can be the case that the bug was introduced 559 // after the merge base, so there's no sense to continue the fix bisection. 560 env.log("reproducer does not crash the merge base, so there's no known bad commit") 561 return &Result{Commit: good, Config: env.kernelConfig}, nil 562 } 563 564 finalResult := results[len(results)-1] // HEAD test for fix, oldest tested test for cause bisection 565 if finalResult.verdict == vcs.BisectBad { 566 // For cause bisection: Oldest tested release already had the bug. Giving up. 567 // For fix bisection: Crash still not fixed on HEAD. Leaving Result.Commits empty causes 568 // syzbot to retry this bisection later. 569 env.log("crash still not fixed/happens on the oldest tested release") 570 return &Result{Report: finalResult.rep, Commit: bad, Config: env.kernelConfig}, nil 571 } 572 if finalResult.verdict == vcs.BisectSkip { 573 if env.cfg.Fix { 574 // HEAD is moving target. Sometimes changes break syzkaller fuzzing. 575 // Leaving Result.Commits empty so syzbot retries this bisection again later. 576 env.log("HEAD had kernel build, boot or test errors") 577 return &Result{Report: finalResult.rep, Commit: bad, Config: env.kernelConfig}, nil 578 } 579 // The oldest tested release usually doesn't change. Retrying would give us the same result, 580 // unless we change the syz-ci setup (e.g. new rootfs, new compilers). 581 return nil, fmt.Errorf("oldest tested release had kernel build, boot or test errors") 582 } 583 584 return nil, nil 585 } 586 587 type testResult struct { 588 verdict vcs.BisectResult 589 com *vcs.Commit 590 rep *report.Report 591 types []crash.Type 592 kernelSign string 593 // The ratio of bad/(good+bad) results. 594 badRatio float64 595 // An estimate how much we can trust the result. 596 confidence float64 597 } 598 599 func (env *env) build() (*vcs.Commit, string, error) { 600 current, err := env.repo.HeadCommit() 601 if err != nil { 602 return nil, "", err 603 } 604 605 bisectEnv, err := env.bisecter.EnvForCommit( 606 env.cfg.DefaultCompiler, env.cfg.CompilerType, 607 env.cfg.BinDir, current.Hash, env.kernelConfig, 608 env.cfg.Kernel.Backports, 609 ) 610 if err != nil { 611 return current, "", err 612 } 613 env.log("testing commit %v %v", current.Hash, env.cfg.CompilerType) 614 buildStart := time.Now() 615 mgr := env.cfg.Manager 616 if err := build.Clean(mgr.TargetOS, mgr.TargetVMArch, mgr.Type, mgr.KernelSrc); err != nil { 617 return current, "", fmt.Errorf("kernel clean failed: %w", err) 618 } 619 kern := &env.cfg.Kernel 620 _, imageDetails, err := env.inst.BuildKernel(&instance.BuildKernelConfig{ 621 CompilerBin: bisectEnv.Compiler, 622 LinkerBin: env.cfg.Linker, 623 CcacheBin: env.cfg.Ccache, 624 UserspaceDir: kern.Userspace, 625 CmdlineFile: kern.Cmdline, 626 SysctlFile: kern.Sysctl, 627 KernelConfig: bisectEnv.KernelConfig, 628 }) 629 if imageDetails.CompilerID != "" { 630 env.log("compiler: %v", imageDetails.CompilerID) 631 } 632 if imageDetails.Signature != "" { 633 env.log("kernel signature: %v", imageDetails.Signature) 634 } 635 env.buildTime += time.Since(buildStart) 636 return current, imageDetails.Signature, err 637 } 638 639 // Note: When this function returns an error, the bisection it was called from is aborted. 640 // Hence recoverable errors must be handled and the callers must treat testResult with care. 641 // e.g. testResult.verdict will be vcs.BisectSkip for a broken build, but err will be nil. 642 func (env *env) test() (*testResult, error) { 643 cfg := env.cfg 644 if cfg.Timeout != 0 && time.Since(env.startTime) > cfg.Timeout { 645 return nil, fmt.Errorf("bisection is taking too long (>%v), aborting", cfg.Timeout) 646 } 647 current, kernelSign, err := env.build() 648 res := &testResult{ 649 verdict: vcs.BisectSkip, 650 com: current, 651 kernelSign: kernelSign, 652 confidence: 1.0, 653 } 654 if current == nil { 655 // This is not recoverable, as the caller must know which commit to skip. 656 return res, fmt.Errorf("couldn't get repo HEAD: %w", err) 657 } 658 if err != nil { 659 errInfo := fmt.Sprintf("failed building %v: ", current.Hash) 660 var verr *osutil.VerboseError 661 var kerr *build.KernelError 662 if errors.As(err, &verr) { 663 errInfo += verr.Title 664 env.saveDebugFile(current.Hash, 0, verr.Output) 665 } else if errors.As(err, &kerr) { 666 errInfo += string(kerr.Report) 667 env.saveDebugFile(current.Hash, 0, kerr.Output) 668 } else { 669 errInfo += err.Error() 670 env.log("%v", err) 671 } 672 673 env.log("%s", errInfo) 674 res.rep = &report.Report{Title: errInfo} 675 return res, nil 676 } 677 678 numTests := MaxNumTests / 2 679 if env.flaky || env.numTests == 0 { 680 // Use twice as many instances if the bug is flaky and during initial testing 681 // (as we don't know yet if it's flaky or not). 682 numTests *= 2 683 } 684 env.numTests++ 685 686 testStart := time.Now() 687 688 results, err := env.inst.Test(numTests, cfg.Repro.Syz, cfg.Repro.Opts, cfg.Repro.C) 689 env.testTime += time.Since(testStart) 690 if err != nil { 691 problem := fmt.Sprintf("repro testing failure: %v", err) 692 env.log(problem) 693 return res, &InfraError{Title: problem} 694 } 695 bad, good, infra, rep, types := env.processResults(current, results) 696 res.verdict, err = env.bisectionDecision(len(results), bad, good, infra) 697 if err != nil { 698 return nil, err 699 } 700 if bad+good > 0 { 701 res.badRatio = float64(bad) / float64(bad+good) 702 } 703 if res.verdict == vcs.BisectGood { 704 // The result could be a false negative. 705 res.confidence = 1.0 - math.Pow(1.0-env.reproChance, float64(good)) 706 env.log("false negative chance: %.3f", 1.0-res.confidence) 707 } 708 if res.verdict == vcs.BisectSkip { 709 res.rep = &report.Report{ 710 Title: fmt.Sprintf("failed testing reproducer on %v", current.Hash), 711 } 712 } else { 713 // Pick the most relevant as the main one. 714 res.rep = rep 715 } 716 res.types = types 717 env.updateFlaky(res) 718 // TODO: when we start supporting boot/test error bisection, we need to make 719 // processResults treat that verdit as "good". 720 return res, nil 721 } 722 723 // testPredicate() is meant to be invoked by bisecter.Bisect(). 724 func (env *env) testPredicate() (vcs.BisectResult, error) { 725 var testRes1 *testResult 726 if env.cfg.Fix { 727 // There's a chance we might test a revision that does not yet contain the bug. 728 // Perform extra checks (see #4117). 729 env.log("determine whether the revision contains the guilty commit") 730 hadBug, err := env.revisionHadBug() 731 if err == errUnknownBugPresence { 732 // Let's skip the revision just in case. 733 testRes1 = &testResult{verdict: vcs.BisectSkip} 734 } else if err != nil { 735 return 0, err 736 } 737 if !hadBug { 738 // For result consistency, pretend that the kernel crashed. 739 env.log("the bug was not introduced yet; pretend that kernel crashed") 740 testRes1 = &testResult{verdict: vcs.BisectBad} 741 } 742 } 743 if testRes1 == nil { 744 var err error 745 testRes1, err = env.test() 746 if err != nil { 747 return 0, err 748 } 749 env.postTestResult(testRes1) 750 env.results[testRes1.com.Hash] = testRes1 751 } 752 // For fix bisections, results are inverted. 753 if env.cfg.Fix { 754 if testRes1.verdict == vcs.BisectBad { 755 testRes1.verdict = vcs.BisectGood 756 } else if testRes1.verdict == vcs.BisectGood { 757 testRes1.verdict = vcs.BisectBad 758 } 759 } 760 return testRes1.verdict, nil 761 } 762 763 // If there's a merge from a branch that was based on a much older code revision, 764 // it's likely that the bug was not yet present at all. 765 var errUnknownBugPresence = errors.New("unable to determine whether there was a bug") 766 767 func (env *env) revisionHadBug() (bool, error) { 768 // Check if any already tested revision that is reachable from HEAD crashed. 769 for hash, res := range env.results { 770 if res.rep == nil { 771 continue 772 } 773 ok, err := env.repo.Contains(hash) 774 if err != nil { 775 return false, err 776 } 777 if ok { 778 env.log("revision %s crashed and is reachable", hash) 779 return true, nil 780 } 781 } 782 783 // TODO: it's also possible to extract useful information from non-crashed runs. 784 // But let's first see how many extra test() runs we get without it. 785 786 // We'll likely change the revision below. Ensure we get back to the original one. 787 curr, err := env.repo.HeadCommit() 788 if err != nil { 789 return false, err 790 } 791 defer env.repo.SwitchCommit(curr.Hash) 792 793 // Check all merge bases between the original bad commit (*) and the current HEAD revision. 794 // If at least one crashed, bug was definitely present. 795 // (*) Using the same bad commit hopefully helps us reuse many of the results. 796 bases, err := env.repo.MergeBases(curr.Hash, env.commit.Hash) 797 if err != nil { 798 return false, fmt.Errorf("failed to get the merge base between %s and %s: %w", 799 curr.Hash, env.commit.Hash, err) 800 } 801 anyResult := false 802 for _, base := range bases { 803 env.log("checking the merge base %s", base.Hash) 804 res := env.results[base.Hash] 805 if res == nil { 806 env.log("no existing result, test the revision") 807 env.repo.SwitchCommit(base.Hash) 808 res, err = env.test() 809 if err != nil { 810 return false, err 811 } 812 env.results[base.Hash] = res 813 } 814 if res.verdict == vcs.BisectSkip { 815 continue 816 } 817 anyResult = true 818 if res.rep != nil { 819 // No reason to test other bases. 820 return true, nil 821 } 822 } 823 if anyResult { 824 return false, nil 825 } 826 return false, errUnknownBugPresence 827 } 828 829 func (env *env) bisectionDecision(total, bad, good, infra int) (vcs.BisectResult, error) { 830 // Boot errors, image test errors, skipped crashes. 831 skip := total - bad - good - infra 832 833 wantGoodRuns := total / 2 834 wantTotalRuns := total / 2 835 if env.flaky { 836 // The reproducer works less than 50% of time, so we need really many good results. 837 wantGoodRuns = total * 3 / 4 838 } 839 if bad == 0 && good >= wantGoodRuns { 840 // We need a big enough number of good results, otherwise the chance of a false 841 // positive is too high. 842 return vcs.BisectGood, nil 843 } else if bad > 0 && (good+bad) >= wantTotalRuns { 844 // We need enough (good+bad) results to conclude that the kernel revision itself 845 // is not too broken. 846 return vcs.BisectBad, nil 847 } else if infra > skip { 848 // We have been unable to determine a verdict mostly because of infra errors. 849 // Abort the bisection. 850 return vcs.BisectSkip, 851 &InfraError{Title: "unable to determine the verdict because of infra errors"} 852 } 853 env.log("unable to determine the verdict: %d good runs (wanted %d), for bad wanted %d in total, got %d", 854 good, wantGoodRuns, wantTotalRuns, good+bad) 855 return vcs.BisectSkip, nil 856 } 857 858 func (env *env) processResults(current *vcs.Commit, results []instance.EnvTestResult) ( 859 bad, good, infra int, rep *report.Report, types []crash.Type) { 860 var verdicts []string 861 var reports []*report.Report 862 for i, res := range results { 863 if res.Error == nil { 864 good++ 865 verdicts = append(verdicts, "OK") 866 continue 867 } 868 var testError *instance.TestError 869 var crashError *instance.CrashError 870 switch { 871 case errors.As(res.Error, &testError): 872 if testError.Infra { 873 infra++ 874 verdicts = append(verdicts, fmt.Sprintf("infra problem: %v", testError)) 875 } else if testError.Boot { 876 verdicts = append(verdicts, fmt.Sprintf("boot failed: %v", testError)) 877 } else { 878 verdicts = append(verdicts, fmt.Sprintf("basic kernel testing failed: %v", testError)) 879 } 880 output := testError.Output 881 if testError.Report != nil { 882 output = testError.Report.Output 883 } 884 env.saveDebugFile(current.Hash, i, output) 885 case errors.As(res.Error, &crashError): 886 output := crashError.Report.Report 887 if len(output) == 0 { 888 output = crashError.Report.Output 889 } 890 env.saveDebugFile(current.Hash, i, output) 891 if env.isTransientError(crashError.Report) { 892 verdicts = append(verdicts, fmt.Sprintf("ignore: %v", crashError)) 893 break 894 } 895 bad++ 896 reports = append(reports, crashError.Report) 897 verdicts = append(verdicts, fmt.Sprintf("crashed: %v", crashError)) 898 default: 899 infra++ 900 verdicts = append(verdicts, fmt.Sprintf("failed: %v", res.Error)) 901 } 902 } 903 unique := make(map[string]bool) 904 for _, verdict := range verdicts { 905 unique[verdict] = true 906 } 907 if len(unique) == 1 { 908 env.log("all runs: %v", verdicts[0]) 909 } else { 910 for i, verdict := range verdicts { 911 env.log("run #%v: %v", i, verdict) 912 } 913 } 914 var others bool 915 rep, types, others = mostFrequentReports(reports) 916 if rep != nil || others { 917 // TODO: set flaky=true or in some other way indicate that the bug 918 // triggers multiple different crashes? 919 env.log("representative crash: %v, types: %v", rep.Title, types) 920 } 921 return 922 } 923 924 // postTestResult() is to be run after we have got the results of a test() call for a revision. 925 // It updates the estimates of reproducibility and the overall result confidence. 926 func (env *env) postTestResult(res *testResult) { 927 env.confidence *= res.confidence 928 if res.verdict == vcs.BisectBad { 929 // Let's be conservative and only decrease our reproduction likelihood estimate. 930 // As the estimate of each test() can also be flaky, only partially update the result. 931 avg := (env.reproChance + res.badRatio) / 2.0 932 if env.reproChance > avg { 933 env.reproChance = avg 934 } 935 } 936 } 937 938 // updateFlaky() updates the current flakiness estimate. 939 func (env *env) updateFlaky(res *testResult) { 940 // We require at least 5 good+bad runs for a verdict, so 941 // with a 50% reproducility there's a ~3% chance of a false negative result. 942 // If there are 10 "good" results, that's a ~36% accumulated error probability. 943 // That's already noticeable, so let's do 2x more runs from there. 944 const flakyThreshold = 0.5 945 if res.verdict == vcs.BisectBad && res.badRatio < flakyThreshold { 946 // Once flaky => always treat as flaky. 947 env.flaky = true 948 } 949 } 950 951 // mostFrequentReports() processes the list of run results and determines: 952 // 1) The most representative crash types. 953 // 2) The most representative crash report. 954 // The algorithm is described in code comments. 955 func mostFrequentReports(reports []*report.Report) (*report.Report, []crash.Type, bool) { 956 // First find most frequent report types. 957 type info struct { 958 t crash.Type 959 count int 960 report *report.Report 961 } 962 crashes := 0 963 perType := []*info{} 964 perTypeMap := map[crash.Type]*info{} 965 for _, rep := range reports { 966 if rep.Title == "" { 967 continue 968 } 969 crashes++ 970 if perTypeMap[rep.Type] == nil { 971 obj := &info{ 972 t: rep.Type, 973 report: rep, 974 } 975 perType = append(perType, obj) 976 perTypeMap[rep.Type] = obj 977 } 978 perTypeMap[rep.Type].count++ 979 } 980 sort.Slice(perType, func(i, j int) bool { 981 return perType[i].count > perType[j].count 982 }) 983 // Then pick those that are representative enough. 984 var bestTypes []crash.Type 985 var bestReport *report.Report 986 taken := 0 987 for _, info := range perType { 988 if info.t == crash.Hang && info.count*2 < crashes && len(perType) > 1 { 989 // To pick a Hang as a representative one, require >= 50% 990 // of all crashes to be of this type. 991 // Hang crashes can appear in various parts of the kernel, so 992 // we only want to take them into account only if we are actually 993 // bisecting this kind of a bug. 994 continue 995 } 996 // Take further crash types until we have considered 2/3 of all crashes, but 997 // no more than 3. 998 needTaken := (crashes + 2) * 2 / 3 999 if taken < needTaken && len(bestTypes) < 3 { 1000 if bestReport == nil { 1001 bestReport = info.report 1002 } 1003 bestTypes = append(bestTypes, info.t) 1004 taken += info.count 1005 } 1006 } 1007 return bestReport, bestTypes, len(bestTypes) != len(perType) 1008 } 1009 1010 func (env *env) isTransientError(rep *report.Report) bool { 1011 // If we're not chasing a SYZFATAL error, ignore them. 1012 // Otherwise it indicates some transient problem of the tested kernel revision. 1013 hadSyzFailure := false 1014 for _, t := range env.reportTypes { 1015 hadSyzFailure = hadSyzFailure || t == crash.SyzFailure 1016 } 1017 return rep.Type == crash.SyzFailure && 1018 len(env.reportTypes) > 0 && !hadSyzFailure 1019 } 1020 1021 func (env *env) saveDebugFile(hash string, idx int, data []byte) { 1022 env.cfg.Trace.SaveFile(fmt.Sprintf("%v.%v", hash, idx), data) 1023 } 1024 1025 func checkConfig(cfg *Config) error { 1026 if !osutil.IsExist(cfg.BinDir) { 1027 return fmt.Errorf("bin dir %v does not exist", cfg.BinDir) 1028 } 1029 if cfg.Kernel.Userspace != "" && !osutil.IsExist(cfg.Kernel.Userspace) { 1030 return fmt.Errorf("userspace dir %v does not exist", cfg.Kernel.Userspace) 1031 } 1032 if cfg.Kernel.Sysctl != "" && !osutil.IsExist(cfg.Kernel.Sysctl) { 1033 return fmt.Errorf("sysctl file %v does not exist", cfg.Kernel.Sysctl) 1034 } 1035 if cfg.Kernel.Cmdline != "" && !osutil.IsExist(cfg.Kernel.Cmdline) { 1036 return fmt.Errorf("cmdline file %v does not exist", cfg.Kernel.Cmdline) 1037 } 1038 return nil 1039 } 1040 1041 func (env *env) log(msg string, args ...interface{}) { 1042 if false { 1043 _ = fmt.Sprintf(msg, args...) // enable printf checker 1044 } 1045 env.cfg.Trace.Log(msg, args...) 1046 } 1047 1048 // pickReleaseTags() picks a subset of revisions to test. 1049 // `all` is an ordered list of tags (from newer to older). 1050 func pickReleaseTags(all []string) []string { 1051 if len(all) == 0 { 1052 return nil 1053 } 1054 // First split into x.y.z, x.y.z-1, ... and x.y, x.y-1, ... 1055 var subReleases, releases []string 1056 releaseBegin := false 1057 for _, tag := range all { 1058 v1, _, rc, v3 := vcs.ParseReleaseTag(tag) 1059 if v1 < 0 || rc < 0 && v3 < 0 { 1060 releaseBegin = true 1061 releases = append(releases, tag) 1062 } 1063 if !releaseBegin { 1064 subReleases = append(subReleases, tag) 1065 } 1066 } 1067 var ret []string 1068 // Take 2 latest sub releases. 1069 takeSubReleases := minInts(2, len(subReleases)) 1070 ret = append(ret, subReleases[:takeSubReleases]...) 1071 // If there are a lot of sub releases, also take the middle one. 1072 if len(subReleases) > 5 { 1073 ret = append(ret, subReleases[len(subReleases)/2]) 1074 } 1075 for i := 0; i < len(releases); i++ { 1076 // Gradually increase step. 1077 step := 1 1078 if i >= 3 { 1079 step = 2 1080 } 1081 if i >= 11 { 1082 step = 3 1083 } 1084 if i%step == 0 || i == len(releases)-1 { 1085 ret = append(ret, releases[i]) 1086 } 1087 } 1088 return ret 1089 } 1090 1091 func minInts(vals ...int) int { 1092 ret := vals[0] 1093 for i := 1; i < len(vals); i++ { 1094 if vals[i] < ret { 1095 ret = vals[i] 1096 } 1097 } 1098 return ret 1099 }