github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/pkg/bisect/bisect.go (about)

     1  // Copyright 2018 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package bisect
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"math"
    10  	"os"
    11  	"sort"
    12  	"time"
    13  
    14  	"github.com/google/syzkaller/pkg/build"
    15  	"github.com/google/syzkaller/pkg/debugtracer"
    16  	"github.com/google/syzkaller/pkg/hash"
    17  	"github.com/google/syzkaller/pkg/instance"
    18  	"github.com/google/syzkaller/pkg/mgrconfig"
    19  	"github.com/google/syzkaller/pkg/osutil"
    20  	"github.com/google/syzkaller/pkg/report"
    21  	"github.com/google/syzkaller/pkg/report/crash"
    22  	"github.com/google/syzkaller/pkg/vcs"
    23  )
    24  
    25  type Config struct {
    26  	Trace           debugtracer.DebugTracer
    27  	Fix             bool
    28  	DefaultCompiler string
    29  	CompilerType    string
    30  	Linker          string
    31  	BinDir          string
    32  	Ccache          string
    33  	Timeout         time.Duration
    34  	Kernel          KernelConfig
    35  	Syzkaller       SyzkallerConfig
    36  	Repro           ReproConfig
    37  	Manager         *mgrconfig.Config
    38  	BuildSemaphore  *instance.Semaphore
    39  	TestSemaphore   *instance.Semaphore
    40  	// CrossTree specifies whether a cross tree bisection is to take place, i.e.
    41  	// Kernel.Commit is not reachable from Kernel.Branch.
    42  	// In this case, bisection starts from their merge base.
    43  	CrossTree bool
    44  }
    45  
    46  type KernelConfig struct {
    47  	Repo        string
    48  	Branch      string
    49  	Commit      string
    50  	CommitTitle string
    51  	Cmdline     string
    52  	Sysctl      string
    53  	Config      []byte
    54  	// Baseline configuration is used in commit bisection. If the crash doesn't reproduce
    55  	// with baseline configuratopm config bisection is run. When triggering configuration
    56  	// option is found provided baseline configuration is modified according the bisection
    57  	// results. This new configuration is tested once more with current head. If crash
    58  	// reproduces with the generated configuration original configuation is replaced with
    59  	// this minimized one.
    60  	BaselineConfig []byte
    61  	Userspace      string
    62  	// Extra commits to cherry pick to older kernel revisions.
    63  	Backports []vcs.BackportCommit
    64  }
    65  
    66  type SyzkallerConfig struct {
    67  	Repo         string
    68  	Commit       string
    69  	Descriptions string
    70  }
    71  
    72  type ReproConfig struct {
    73  	Opts []byte
    74  	Syz  []byte
    75  	C    []byte
    76  }
    77  
    78  type env struct {
    79  	cfg          *Config
    80  	repo         vcs.Repo
    81  	bisecter     vcs.Bisecter
    82  	minimizer    vcs.ConfigMinimizer
    83  	commit       *vcs.Commit
    84  	head         *vcs.Commit
    85  	kernelConfig []byte
    86  	inst         instance.Env
    87  	numTests     int
    88  	startTime    time.Time
    89  	buildTime    time.Duration
    90  	testTime     time.Duration
    91  	reportTypes  []crash.Type
    92  	// The current estimate of the reproducer's kernel crashing probability.
    93  	reproChance float64
    94  	// The product of our confidence in every bisection step result.
    95  	confidence float64
    96  	// Whether we should do 2x more execution runs for every test step.
    97  	// We could have inferred this data from reproChance, but we want to be
    98  	// able to react faster to sudden drops of reproducibility than an estimate
    99  	// can allows us to.
   100  	flaky bool
   101  	// A cache of already performed revision tests.
   102  	results map[string]*testResult
   103  }
   104  
   105  const MaxNumTests = 20 // number of tests we do per commit
   106  
   107  // Result describes bisection result:
   108  // 1. if bisection is conclusive, the single cause/fix commit in Commits
   109  //   - for cause bisection report is the crash on the cause commit
   110  //   - for fix bisection report is nil
   111  //   - Commit is nil
   112  //   - NoopChange is set if the commit did not cause any change in the kernel binary
   113  //     (bisection result it most likely wrong)
   114  //
   115  // 2. Bisected to a release commit
   116  //   - if bisection is inconclusive, range of potential cause/fix commits in Commits
   117  //   - report is nil in such case
   118  //
   119  // 3. Commit is nil
   120  //   - if the crash still happens on the oldest release/HEAD (for cause/fix bisection correspondingly)
   121  //   - no commits in Commits
   122  //   - the crash report on the oldest release/HEAD;
   123  //   - Commit points to the oldest/latest commit where crash happens.
   124  //
   125  // 4. Config contains kernel config used for bisection.
   126  type Result struct {
   127  	Commits    []*vcs.Commit
   128  	Report     *report.Report
   129  	Commit     *vcs.Commit
   130  	Config     []byte
   131  	NoopChange bool
   132  	IsRelease  bool
   133  	Confidence float64
   134  }
   135  
   136  type InfraError struct {
   137  	Title string
   138  }
   139  
   140  func (e InfraError) Error() string {
   141  	return e.Title
   142  }
   143  
   144  // Run does the bisection and returns either the Result,
   145  // or, if the crash is not reproduced on the start commit, an error.
   146  func Run(cfg *Config) (*Result, error) {
   147  	if err := checkConfig(cfg); err != nil {
   148  		return nil, err
   149  	}
   150  	cfg.Manager.Cover = false // it's not supported somewhere back in time
   151  	repo, err := vcs.NewRepo(cfg.Manager.TargetOS, cfg.Manager.Type, cfg.Manager.KernelSrc)
   152  	if err != nil {
   153  		return nil, err
   154  	}
   155  	inst, err := instance.NewEnv(cfg.Manager, cfg.BuildSemaphore, cfg.TestSemaphore)
   156  	if err != nil {
   157  		return nil, err
   158  	}
   159  	if _, err = repo.CheckoutBranch(cfg.Kernel.Repo, cfg.Kernel.Branch); err != nil {
   160  		return nil, &InfraError{Title: fmt.Sprintf("%v", err)}
   161  	}
   162  	return runImpl(cfg, repo, inst)
   163  }
   164  
   165  func runImpl(cfg *Config, repo vcs.Repo, inst instance.Env) (*Result, error) {
   166  	bisecter, ok := repo.(vcs.Bisecter)
   167  	if !ok {
   168  		return nil, fmt.Errorf("bisection is not implemented for %v", cfg.Manager.TargetOS)
   169  	}
   170  	minimizer, ok := repo.(vcs.ConfigMinimizer)
   171  	if !ok && len(cfg.Kernel.BaselineConfig) != 0 {
   172  		return nil, fmt.Errorf("config minimization is not implemented for %v", cfg.Manager.TargetOS)
   173  	}
   174  	env := &env{
   175  		cfg:        cfg,
   176  		repo:       repo,
   177  		bisecter:   bisecter,
   178  		minimizer:  minimizer,
   179  		inst:       inst,
   180  		startTime:  time.Now(),
   181  		confidence: 1.0,
   182  	}
   183  	head, err := repo.HeadCommit()
   184  	if err != nil {
   185  		return nil, err
   186  	}
   187  	defer env.repo.SwitchCommit(head.Hash)
   188  	env.head = head
   189  	hostname, err := os.Hostname()
   190  	if err != nil {
   191  		hostname = "unnamed host"
   192  	}
   193  	env.log("%s starts bisection %s", hostname, env.startTime.String())
   194  	if cfg.Fix {
   195  		env.log("bisecting fixing commit since %v", cfg.Kernel.Commit)
   196  	} else {
   197  		env.log("bisecting cause commit starting from %v", cfg.Kernel.Commit)
   198  	}
   199  	start := time.Now()
   200  	res, err := env.bisect()
   201  	if env.flaky {
   202  		env.log("reproducer is flaky (%.2f repro chance estimate)", env.reproChance)
   203  	}
   204  	env.log("revisions tested: %v, total time: %v (build: %v, test: %v)",
   205  		env.numTests, time.Since(start), env.buildTime, env.testTime)
   206  	if err != nil {
   207  		env.log("error: %v", err)
   208  		return nil, err
   209  	}
   210  	if len(res.Commits) == 0 {
   211  		if cfg.Fix {
   212  			env.log("crash still not fixed or there were kernel test errors")
   213  		} else {
   214  			env.log("oldest tested release already had the bug or it had kernel test errors")
   215  		}
   216  
   217  		env.log("commit msg: %v", res.Commit.Title)
   218  		if res.Report != nil {
   219  			env.log("crash: %v\n%s", res.Report.Title, res.Report.Report)
   220  		}
   221  		return res, nil
   222  	}
   223  	what := "bad"
   224  	if cfg.Fix {
   225  		what = "good"
   226  	}
   227  	if len(res.Commits) > 1 {
   228  		env.log("bisection is inconclusive, the first %v commit could be any of:", what)
   229  		for _, com := range res.Commits {
   230  			env.log("%v", com.Hash)
   231  		}
   232  		return res, nil
   233  	}
   234  	com := res.Commits[0]
   235  	env.log("first %v commit: %v %v", what, com.Hash, com.Title)
   236  	env.log("recipients (to): %q", com.Recipients.GetEmails(vcs.To))
   237  	env.log("recipients (cc): %q", com.Recipients.GetEmails(vcs.Cc))
   238  	if res.Report != nil {
   239  		env.log("crash: %v\n%s", res.Report.Title, res.Report.Report)
   240  	}
   241  	return res, nil
   242  }
   243  
   244  func (env *env) bisect() (*Result, error) {
   245  	err := env.bisecter.PrepareBisect()
   246  	if err != nil {
   247  		return nil, err
   248  	}
   249  
   250  	cfg := env.cfg
   251  	if err := build.Clean(cfg.Manager.TargetOS, cfg.Manager.TargetVMArch,
   252  		cfg.Manager.Type, cfg.Manager.KernelSrc); err != nil {
   253  		return nil, fmt.Errorf("kernel clean failed: %w", err)
   254  	}
   255  	env.log("building syzkaller on %v", cfg.Syzkaller.Commit)
   256  	if _, err := env.inst.BuildSyzkaller(cfg.Syzkaller.Repo, cfg.Syzkaller.Commit); err != nil {
   257  		return nil, err
   258  	}
   259  
   260  	cfg.Kernel.Commit, err = env.identifyRewrittenCommit()
   261  	if err != nil {
   262  		return nil, err
   263  	}
   264  	com, err := env.repo.SwitchCommit(cfg.Kernel.Commit)
   265  	if err != nil {
   266  		return nil, err
   267  	}
   268  
   269  	env.log("ensuring issue is reproducible on original commit %v\n", cfg.Kernel.Commit)
   270  	env.commit = com
   271  	env.kernelConfig = cfg.Kernel.Config
   272  	testRes, err := env.test()
   273  	if err != nil {
   274  		return nil, err
   275  	} else if testRes.verdict != vcs.BisectBad {
   276  		return nil, fmt.Errorf("the crash wasn't reproduced on the original commit")
   277  	}
   278  	env.reportTypes = testRes.types
   279  	env.reproChance = testRes.badRatio
   280  
   281  	testRes1, err := env.minimizeConfig()
   282  	if err != nil {
   283  		return nil, fmt.Errorf("config minimization failed: %w", err)
   284  	}
   285  	if testRes1 != nil {
   286  		// If config minimization even partially succeeds, minimizeConfig()
   287  		// would return a non-nil value of a new report.
   288  		testRes = testRes1
   289  		// Overwrite bug's reproducibility - it may be different after config minimization.
   290  		env.reproChance = testRes.badRatio
   291  	}
   292  
   293  	bad, good, results1, fatalResult, err := env.commitRange()
   294  	if fatalResult != nil || err != nil {
   295  		return fatalResult, err
   296  	}
   297  	if env.cfg.Fix {
   298  		env.commit = good
   299  	} else {
   300  		env.commit = bad
   301  	}
   302  	env.results = map[string]*testResult{cfg.Kernel.Commit: testRes}
   303  	for _, res := range results1 {
   304  		env.results[res.com.Hash] = res
   305  	}
   306  	commits, err := env.bisecter.Bisect(bad.Hash, good.Hash, cfg.Trace, env.testPredicate)
   307  	if err != nil {
   308  		return nil, err
   309  	}
   310  	env.log("accumulated error probability: %0.2f", 1.0-env.confidence)
   311  	res := &Result{
   312  		Commits:    commits,
   313  		Config:     env.kernelConfig,
   314  		Confidence: env.confidence,
   315  	}
   316  	if len(commits) == 1 {
   317  		com := commits[0]
   318  		testRes := env.results[com.Hash]
   319  		if testRes == nil {
   320  			return nil, fmt.Errorf("no result for culprit commit")
   321  		}
   322  		res.Report = testRes.rep
   323  		isRelease, err := env.bisecter.IsRelease(com.Hash)
   324  		if err != nil {
   325  			env.log("failed to detect release: %v", err)
   326  		}
   327  		res.IsRelease = isRelease
   328  		noopChange, err := env.detectNoopChange(com)
   329  		if err != nil {
   330  			env.log("failed to detect noop change: %v", err)
   331  		}
   332  		res.NoopChange = noopChange
   333  	}
   334  	return res, nil
   335  }
   336  
   337  func (env *env) identifyRewrittenCommit() (string, error) {
   338  	cfg := env.cfg
   339  	if cfg.Kernel.Commit != "" && cfg.CrossTree {
   340  		// If the failing commit is on another tree, just take it as is.
   341  		return cfg.Kernel.Commit, nil
   342  	}
   343  	_, err := env.repo.CheckoutBranch(cfg.Kernel.Repo, cfg.Kernel.Branch)
   344  	if err != nil {
   345  		return cfg.Kernel.Commit, err
   346  	}
   347  	contained, err := env.repo.Contains(cfg.Kernel.Commit)
   348  	if err != nil || contained {
   349  		return cfg.Kernel.Commit, err
   350  	}
   351  
   352  	if !cfg.Fix {
   353  		// If we're doing a cause bisection, we don't really need the commit to be
   354  		// reachable from cfg.Kernel.Branch.
   355  		// So let's try to force tag fetch and check if the commit is present in the
   356  		// repository.
   357  		env.log("fetch other tags and check if the commit is present")
   358  		commit, err := env.repo.CheckoutCommit(cfg.Kernel.Repo, cfg.Kernel.Commit)
   359  		if err != nil {
   360  			// Ignore the error because the command will fail if the commit is really not
   361  			// present in the tree.
   362  			env.log("fetch failed with %s", err)
   363  		} else if commit != nil {
   364  			return commit.Hash, nil
   365  		}
   366  	}
   367  
   368  	// We record the tested kernel commit when syzkaller triggers a crash. These commits can become
   369  	// unreachable after the crash was found, when the history of the tested kernel branch was
   370  	// rewritten. The commit might have been completely deleted from the branch or just changed in
   371  	// some way. Some branches like linux-next are often and heavily rewritten (aka rebased).
   372  	// This can also happen when changing the branch you fuzz in an existing syz-manager config.
   373  	// This makes sense when a downstream kernel fork rebased on top of a new upstream version and
   374  	// you don't want syzkaller to report all your old bugs again.
   375  	if cfg.Kernel.CommitTitle == "" {
   376  		// This can happen during a manual bisection, when only a hash is given.
   377  		return cfg.Kernel.Commit, fmt.Errorf(
   378  			"commit %v not reachable in branch '%v' and no commit title available",
   379  			cfg.Kernel.Commit, cfg.Kernel.Branch)
   380  	}
   381  	commit, err := env.repo.GetCommitByTitle(cfg.Kernel.CommitTitle)
   382  	if err != nil {
   383  		return cfg.Kernel.Commit, err
   384  	}
   385  	if commit == nil {
   386  		return cfg.Kernel.Commit, fmt.Errorf(
   387  			"commit %v not reachable in branch '%v'", cfg.Kernel.Commit, cfg.Kernel.Branch)
   388  	}
   389  	env.log("rewritten commit %v reidentified by title '%v'\n", commit.Hash, cfg.Kernel.CommitTitle)
   390  	return commit.Hash, nil
   391  }
   392  
   393  func (env *env) minimizeConfig() (*testResult, error) {
   394  	// Find minimal configuration based on baseline to reproduce the crash.
   395  	testResults := make(map[hash.Sig]*testResult)
   396  	predMinimize := func(test []byte) (vcs.BisectResult, error) {
   397  		env.kernelConfig = test
   398  		testRes, err := env.test()
   399  		if err != nil {
   400  			return 0, err
   401  		}
   402  		// We want either a > 33% repro probability or at least it should not be
   403  		// worse than for the non-minimized config.
   404  		const badRatioThreshold = 1.0 / 3.0
   405  		if testRes.verdict == vcs.BisectBad &&
   406  			testRes.badRatio < badRatioThreshold &&
   407  			testRes.badRatio < env.reproChance {
   408  			return vcs.BisectSkip, nil
   409  		}
   410  		if testRes.verdict == vcs.BisectBad {
   411  			// Only remember crashes.
   412  			testResults[hash.Hash(test)] = testRes
   413  		}
   414  		return testRes.verdict, err
   415  	}
   416  	minConfig, err := env.minimizer.Minimize(env.cfg.Manager.SysTarget, env.cfg.Kernel.Config,
   417  		env.cfg.Kernel.BaselineConfig, env.reportTypes, env.cfg.Trace, predMinimize)
   418  	if err != nil {
   419  		if errors.Is(err, vcs.ErrBadKconfig) {
   420  			env.log("config minimization failed due to bad Kconfig %v\nproceeding with the original config", err)
   421  		} else {
   422  			return nil, err
   423  		}
   424  	}
   425  	env.kernelConfig = minConfig
   426  	return testResults[hash.Hash(minConfig)], nil
   427  }
   428  
   429  func (env *env) detectNoopChange(com *vcs.Commit) (bool, error) {
   430  	testRes := env.results[com.Hash]
   431  	if testRes.kernelSign == "" || len(com.Parents) != 1 {
   432  		return false, nil
   433  	}
   434  	parent := com.Parents[0]
   435  	parentRes := env.results[parent]
   436  	if parentRes == nil {
   437  		env.log("parent commit %v wasn't tested", parent)
   438  		// We could not test the parent commit if it is not based on the previous release
   439  		// (instead based on an older release, i.e. a very old non-rebased commit
   440  		// merged into the current release).
   441  		// TODO: we can use a differnet compiler for this old commit
   442  		// since effectively it's in the older release, in that case we may not
   443  		// detect noop change anyway.
   444  		if _, err := env.repo.SwitchCommit(parent); err != nil {
   445  			return false, err
   446  		}
   447  		_, kernelSign, err := env.build()
   448  		if err != nil {
   449  			return false, err
   450  		}
   451  		parentRes = &testResult{kernelSign: kernelSign}
   452  	}
   453  	env.log("culprit signature: %v", testRes.kernelSign)
   454  	env.log("parent  signature: %v", parentRes.kernelSign)
   455  	return testRes.kernelSign == parentRes.kernelSign, nil
   456  }
   457  
   458  func (env *env) commitRange() (*vcs.Commit, *vcs.Commit, []*testResult, *Result, error) {
   459  	rangeFunc := env.commitRangeForCause
   460  	if env.cfg.Fix {
   461  		rangeFunc = env.commitRangeForFix
   462  	}
   463  
   464  	bad, good, results1, err := rangeFunc()
   465  	if err != nil {
   466  		return bad, good, results1, nil, err
   467  	}
   468  
   469  	fatalResult, err := env.validateCommitRange(bad, good, results1)
   470  	return bad, good, results1, fatalResult, err
   471  }
   472  
   473  func (env *env) commitRangeForFix() (*vcs.Commit, *vcs.Commit, []*testResult, error) {
   474  	var results []*testResult
   475  	startCommit := env.commit
   476  	if env.cfg.CrossTree {
   477  		env.log("determining the merge base between %v and %v",
   478  			env.commit.Hash, env.head.Hash)
   479  		bases, err := env.repo.MergeBases(env.commit.Hash, env.head.Hash)
   480  		if err != nil {
   481  			return nil, nil, nil, err
   482  		}
   483  		if len(bases) != 1 {
   484  			env.log("expected 1 merge base, got %d", len(bases))
   485  			return nil, nil, nil, fmt.Errorf("expected 1 merge base, got %d", len(bases))
   486  		}
   487  		env.log("%s/%s is a merge base, check if it has the bug", bases[0].Hash, bases[0].Title)
   488  		startCommit = bases[0]
   489  		if _, err := env.repo.SwitchCommit(startCommit.Hash); err != nil {
   490  			return nil, nil, nil, err
   491  		}
   492  		res, err := env.test()
   493  		if err != nil {
   494  			return nil, nil, nil, err
   495  		}
   496  		results = append(results, res)
   497  		if res.verdict != vcs.BisectBad {
   498  			return nil, startCommit, results, nil
   499  		}
   500  	}
   501  	env.log("testing current HEAD %v", env.head.Hash)
   502  	if _, err := env.repo.SwitchCommit(env.head.Hash); err != nil {
   503  		return nil, nil, nil, err
   504  	}
   505  	res, err := env.test()
   506  	if err != nil {
   507  		return nil, nil, nil, err
   508  	}
   509  	results = append(results, res)
   510  	if res.verdict != vcs.BisectGood {
   511  		return env.head, nil, results, nil
   512  	}
   513  	return env.head, startCommit, results, nil
   514  }
   515  
   516  func (env *env) commitRangeForCause() (*vcs.Commit, *vcs.Commit, []*testResult, error) {
   517  	cfg := env.cfg
   518  	tags, err := env.bisecter.PreviousReleaseTags(cfg.Kernel.Commit, cfg.CompilerType)
   519  	if err != nil {
   520  		return nil, nil, nil, err
   521  	}
   522  	if len(tags) == 0 {
   523  		return nil, nil, nil, fmt.Errorf("no release tags before this commit")
   524  	}
   525  	pickedTags := pickReleaseTags(tags)
   526  	env.log("picked %v out of %d release tags", pickedTags, len(tags))
   527  
   528  	lastBad := env.commit
   529  	var results []*testResult
   530  	for _, tag := range pickedTags {
   531  		env.log("testing release %v", tag)
   532  		com, err := env.repo.SwitchCommit(tag)
   533  		if err != nil {
   534  			return nil, nil, nil, err
   535  		}
   536  		res, err := env.test()
   537  		if err != nil {
   538  			return nil, nil, nil, err
   539  		}
   540  		results = append(results, res)
   541  		if res.verdict == vcs.BisectGood {
   542  			return lastBad, com, results, nil
   543  		}
   544  		if res.verdict == vcs.BisectBad {
   545  			lastBad = com
   546  		}
   547  	}
   548  	// All tags were vcs.BisectBad or vcs.BisectSkip.
   549  	return lastBad, nil, results, nil
   550  }
   551  
   552  func (env *env) validateCommitRange(bad, good *vcs.Commit, results []*testResult) (*Result, error) {
   553  	if len(results) < 1 {
   554  		return nil, fmt.Errorf("commitRange returned no results")
   555  	}
   556  
   557  	if env.cfg.Fix && env.cfg.CrossTree && len(results) < 2 {
   558  		// For cross-tree bisections, it can be the case that the bug was introduced
   559  		// after the merge base, so there's no sense to continue the fix bisection.
   560  		env.log("reproducer does not crash the merge base, so there's no known bad commit")
   561  		return &Result{Commit: good, Config: env.kernelConfig}, nil
   562  	}
   563  
   564  	finalResult := results[len(results)-1] // HEAD test for fix, oldest tested test for cause bisection
   565  	if finalResult.verdict == vcs.BisectBad {
   566  		// For cause bisection: Oldest tested release already had the bug. Giving up.
   567  		// For fix bisection:   Crash still not fixed on HEAD. Leaving Result.Commits empty causes
   568  		//                      syzbot to retry this bisection later.
   569  		env.log("crash still not fixed/happens on the oldest tested release")
   570  		return &Result{Report: finalResult.rep, Commit: bad, Config: env.kernelConfig}, nil
   571  	}
   572  	if finalResult.verdict == vcs.BisectSkip {
   573  		if env.cfg.Fix {
   574  			// HEAD is moving target. Sometimes changes break syzkaller fuzzing.
   575  			// Leaving Result.Commits empty so syzbot retries this bisection again later.
   576  			env.log("HEAD had kernel build, boot or test errors")
   577  			return &Result{Report: finalResult.rep, Commit: bad, Config: env.kernelConfig}, nil
   578  		}
   579  		// The oldest tested release usually doesn't change. Retrying would give us the same result,
   580  		// unless we change the syz-ci setup (e.g. new rootfs, new compilers).
   581  		return nil, fmt.Errorf("oldest tested release had kernel build, boot or test errors")
   582  	}
   583  
   584  	return nil, nil
   585  }
   586  
   587  type testResult struct {
   588  	verdict    vcs.BisectResult
   589  	com        *vcs.Commit
   590  	rep        *report.Report
   591  	types      []crash.Type
   592  	kernelSign string
   593  	// The ratio of bad/(good+bad) results.
   594  	badRatio float64
   595  	// An estimate how much we can trust the result.
   596  	confidence float64
   597  }
   598  
   599  func (env *env) build() (*vcs.Commit, string, error) {
   600  	current, err := env.repo.HeadCommit()
   601  	if err != nil {
   602  		return nil, "", err
   603  	}
   604  
   605  	bisectEnv, err := env.bisecter.EnvForCommit(
   606  		env.cfg.DefaultCompiler, env.cfg.CompilerType,
   607  		env.cfg.BinDir, current.Hash, env.kernelConfig,
   608  		env.cfg.Kernel.Backports,
   609  	)
   610  	if err != nil {
   611  		return current, "", err
   612  	}
   613  	env.log("testing commit %v %v", current.Hash, env.cfg.CompilerType)
   614  	buildStart := time.Now()
   615  	mgr := env.cfg.Manager
   616  	if err := build.Clean(mgr.TargetOS, mgr.TargetVMArch, mgr.Type, mgr.KernelSrc); err != nil {
   617  		return current, "", fmt.Errorf("kernel clean failed: %w", err)
   618  	}
   619  	kern := &env.cfg.Kernel
   620  	_, imageDetails, err := env.inst.BuildKernel(&instance.BuildKernelConfig{
   621  		CompilerBin:  bisectEnv.Compiler,
   622  		LinkerBin:    env.cfg.Linker,
   623  		CcacheBin:    env.cfg.Ccache,
   624  		UserspaceDir: kern.Userspace,
   625  		CmdlineFile:  kern.Cmdline,
   626  		SysctlFile:   kern.Sysctl,
   627  		KernelConfig: bisectEnv.KernelConfig,
   628  	})
   629  	if imageDetails.CompilerID != "" {
   630  		env.log("compiler: %v", imageDetails.CompilerID)
   631  	}
   632  	if imageDetails.Signature != "" {
   633  		env.log("kernel signature: %v", imageDetails.Signature)
   634  	}
   635  	env.buildTime += time.Since(buildStart)
   636  	return current, imageDetails.Signature, err
   637  }
   638  
   639  // Note: When this function returns an error, the bisection it was called from is aborted.
   640  // Hence recoverable errors must be handled and the callers must treat testResult with care.
   641  // e.g. testResult.verdict will be vcs.BisectSkip for a broken build, but err will be nil.
   642  func (env *env) test() (*testResult, error) {
   643  	cfg := env.cfg
   644  	if cfg.Timeout != 0 && time.Since(env.startTime) > cfg.Timeout {
   645  		return nil, fmt.Errorf("bisection is taking too long (>%v), aborting", cfg.Timeout)
   646  	}
   647  	current, kernelSign, err := env.build()
   648  	res := &testResult{
   649  		verdict:    vcs.BisectSkip,
   650  		com:        current,
   651  		kernelSign: kernelSign,
   652  		confidence: 1.0,
   653  	}
   654  	if current == nil {
   655  		// This is not recoverable, as the caller must know which commit to skip.
   656  		return res, fmt.Errorf("couldn't get repo HEAD: %w", err)
   657  	}
   658  	if err != nil {
   659  		errInfo := fmt.Sprintf("failed building %v: ", current.Hash)
   660  		var verr *osutil.VerboseError
   661  		var kerr *build.KernelError
   662  		if errors.As(err, &verr) {
   663  			errInfo += verr.Title
   664  			env.saveDebugFile(current.Hash, 0, verr.Output)
   665  		} else if errors.As(err, &kerr) {
   666  			errInfo += string(kerr.Report)
   667  			env.saveDebugFile(current.Hash, 0, kerr.Output)
   668  		} else {
   669  			errInfo += err.Error()
   670  			env.log("%v", err)
   671  		}
   672  
   673  		env.log("%s", errInfo)
   674  		res.rep = &report.Report{Title: errInfo}
   675  		return res, nil
   676  	}
   677  
   678  	numTests := MaxNumTests / 2
   679  	if env.flaky || env.numTests == 0 {
   680  		// Use twice as many instances if the bug is flaky and during initial testing
   681  		// (as we don't know yet if it's flaky or not).
   682  		numTests *= 2
   683  	}
   684  	env.numTests++
   685  
   686  	testStart := time.Now()
   687  
   688  	results, err := env.inst.Test(numTests, cfg.Repro.Syz, cfg.Repro.Opts, cfg.Repro.C)
   689  	env.testTime += time.Since(testStart)
   690  	if err != nil {
   691  		problem := fmt.Sprintf("repro testing failure: %v", err)
   692  		env.log(problem)
   693  		return res, &InfraError{Title: problem}
   694  	}
   695  	bad, good, infra, rep, types := env.processResults(current, results)
   696  	res.verdict, err = env.bisectionDecision(len(results), bad, good, infra)
   697  	if err != nil {
   698  		return nil, err
   699  	}
   700  	if bad+good > 0 {
   701  		res.badRatio = float64(bad) / float64(bad+good)
   702  	}
   703  	if res.verdict == vcs.BisectGood {
   704  		// The result could be a false negative.
   705  		res.confidence = 1.0 - math.Pow(1.0-env.reproChance, float64(good))
   706  		env.log("false negative chance: %.3f", 1.0-res.confidence)
   707  	}
   708  	if res.verdict == vcs.BisectSkip {
   709  		res.rep = &report.Report{
   710  			Title: fmt.Sprintf("failed testing reproducer on %v", current.Hash),
   711  		}
   712  	} else {
   713  		// Pick the most relevant as the main one.
   714  		res.rep = rep
   715  	}
   716  	res.types = types
   717  	env.updateFlaky(res)
   718  	// TODO: when we start supporting boot/test error bisection, we need to make
   719  	// processResults treat that verdit as "good".
   720  	return res, nil
   721  }
   722  
   723  // testPredicate() is meant to be invoked by bisecter.Bisect().
   724  func (env *env) testPredicate() (vcs.BisectResult, error) {
   725  	var testRes1 *testResult
   726  	if env.cfg.Fix {
   727  		// There's a chance we might test a revision that does not yet contain the bug.
   728  		// Perform extra checks (see #4117).
   729  		env.log("determine whether the revision contains the guilty commit")
   730  		hadBug, err := env.revisionHadBug()
   731  		if err == errUnknownBugPresence {
   732  			// Let's skip the revision just in case.
   733  			testRes1 = &testResult{verdict: vcs.BisectSkip}
   734  		} else if err != nil {
   735  			return 0, err
   736  		}
   737  		if !hadBug {
   738  			// For result consistency, pretend that the kernel crashed.
   739  			env.log("the bug was not introduced yet; pretend that kernel crashed")
   740  			testRes1 = &testResult{verdict: vcs.BisectBad}
   741  		}
   742  	}
   743  	if testRes1 == nil {
   744  		var err error
   745  		testRes1, err = env.test()
   746  		if err != nil {
   747  			return 0, err
   748  		}
   749  		env.postTestResult(testRes1)
   750  		env.results[testRes1.com.Hash] = testRes1
   751  	}
   752  	// For fix bisections, results are inverted.
   753  	if env.cfg.Fix {
   754  		if testRes1.verdict == vcs.BisectBad {
   755  			testRes1.verdict = vcs.BisectGood
   756  		} else if testRes1.verdict == vcs.BisectGood {
   757  			testRes1.verdict = vcs.BisectBad
   758  		}
   759  	}
   760  	return testRes1.verdict, nil
   761  }
   762  
   763  // If there's a merge from a branch that was based on a much older code revision,
   764  // it's likely that the bug was not yet present at all.
   765  var errUnknownBugPresence = errors.New("unable to determine whether there was a bug")
   766  
   767  func (env *env) revisionHadBug() (bool, error) {
   768  	// Check if any already tested revision that is reachable from HEAD crashed.
   769  	for hash, res := range env.results {
   770  		if res.rep == nil {
   771  			continue
   772  		}
   773  		ok, err := env.repo.Contains(hash)
   774  		if err != nil {
   775  			return false, err
   776  		}
   777  		if ok {
   778  			env.log("revision %s crashed and is reachable", hash)
   779  			return true, nil
   780  		}
   781  	}
   782  
   783  	// TODO: it's also possible to extract useful information from non-crashed runs.
   784  	// But let's first see how many extra test() runs we get without it.
   785  
   786  	// We'll likely change the revision below. Ensure we get back to the original one.
   787  	curr, err := env.repo.HeadCommit()
   788  	if err != nil {
   789  		return false, err
   790  	}
   791  	defer env.repo.SwitchCommit(curr.Hash)
   792  
   793  	// Check all merge bases between the original bad commit (*) and the current HEAD revision.
   794  	// If at least one crashed, bug was definitely present.
   795  	// (*) Using the same bad commit hopefully helps us reuse many of the results.
   796  	bases, err := env.repo.MergeBases(curr.Hash, env.commit.Hash)
   797  	if err != nil {
   798  		return false, fmt.Errorf("failed to get the merge base between %s and %s: %w",
   799  			curr.Hash, env.commit.Hash, err)
   800  	}
   801  	anyResult := false
   802  	for _, base := range bases {
   803  		env.log("checking the merge base %s", base.Hash)
   804  		res := env.results[base.Hash]
   805  		if res == nil {
   806  			env.log("no existing result, test the revision")
   807  			env.repo.SwitchCommit(base.Hash)
   808  			res, err = env.test()
   809  			if err != nil {
   810  				return false, err
   811  			}
   812  			env.results[base.Hash] = res
   813  		}
   814  		if res.verdict == vcs.BisectSkip {
   815  			continue
   816  		}
   817  		anyResult = true
   818  		if res.rep != nil {
   819  			// No reason to test other bases.
   820  			return true, nil
   821  		}
   822  	}
   823  	if anyResult {
   824  		return false, nil
   825  	}
   826  	return false, errUnknownBugPresence
   827  }
   828  
   829  func (env *env) bisectionDecision(total, bad, good, infra int) (vcs.BisectResult, error) {
   830  	// Boot errors, image test errors, skipped crashes.
   831  	skip := total - bad - good - infra
   832  
   833  	wantGoodRuns := total / 2
   834  	wantTotalRuns := total / 2
   835  	if env.flaky {
   836  		// The reproducer works less than 50% of time, so we need really many good results.
   837  		wantGoodRuns = total * 3 / 4
   838  	}
   839  	if bad == 0 && good >= wantGoodRuns {
   840  		// We need a big enough number of good results, otherwise the chance of a false
   841  		// positive is too high.
   842  		return vcs.BisectGood, nil
   843  	} else if bad > 0 && (good+bad) >= wantTotalRuns {
   844  		// We need enough (good+bad) results to conclude that the kernel revision itself
   845  		// is not too broken.
   846  		return vcs.BisectBad, nil
   847  	} else if infra > skip {
   848  		// We have been unable to determine a verdict mostly because of infra errors.
   849  		// Abort the bisection.
   850  		return vcs.BisectSkip,
   851  			&InfraError{Title: "unable to determine the verdict because of infra errors"}
   852  	}
   853  	env.log("unable to determine the verdict: %d good runs (wanted %d), for bad wanted %d in total, got %d",
   854  		good, wantGoodRuns, wantTotalRuns, good+bad)
   855  	return vcs.BisectSkip, nil
   856  }
   857  
   858  func (env *env) processResults(current *vcs.Commit, results []instance.EnvTestResult) (
   859  	bad, good, infra int, rep *report.Report, types []crash.Type) {
   860  	var verdicts []string
   861  	var reports []*report.Report
   862  	for i, res := range results {
   863  		if res.Error == nil {
   864  			good++
   865  			verdicts = append(verdicts, "OK")
   866  			continue
   867  		}
   868  		var testError *instance.TestError
   869  		var crashError *instance.CrashError
   870  		switch {
   871  		case errors.As(res.Error, &testError):
   872  			if testError.Infra {
   873  				infra++
   874  				verdicts = append(verdicts, fmt.Sprintf("infra problem: %v", testError))
   875  			} else if testError.Boot {
   876  				verdicts = append(verdicts, fmt.Sprintf("boot failed: %v", testError))
   877  			} else {
   878  				verdicts = append(verdicts, fmt.Sprintf("basic kernel testing failed: %v", testError))
   879  			}
   880  			output := testError.Output
   881  			if testError.Report != nil {
   882  				output = testError.Report.Output
   883  			}
   884  			env.saveDebugFile(current.Hash, i, output)
   885  		case errors.As(res.Error, &crashError):
   886  			output := crashError.Report.Report
   887  			if len(output) == 0 {
   888  				output = crashError.Report.Output
   889  			}
   890  			env.saveDebugFile(current.Hash, i, output)
   891  			if env.isTransientError(crashError.Report) {
   892  				verdicts = append(verdicts, fmt.Sprintf("ignore: %v", crashError))
   893  				break
   894  			}
   895  			bad++
   896  			reports = append(reports, crashError.Report)
   897  			verdicts = append(verdicts, fmt.Sprintf("crashed: %v", crashError))
   898  		default:
   899  			infra++
   900  			verdicts = append(verdicts, fmt.Sprintf("failed: %v", res.Error))
   901  		}
   902  	}
   903  	unique := make(map[string]bool)
   904  	for _, verdict := range verdicts {
   905  		unique[verdict] = true
   906  	}
   907  	if len(unique) == 1 {
   908  		env.log("all runs: %v", verdicts[0])
   909  	} else {
   910  		for i, verdict := range verdicts {
   911  			env.log("run #%v: %v", i, verdict)
   912  		}
   913  	}
   914  	var others bool
   915  	rep, types, others = mostFrequentReports(reports)
   916  	if rep != nil || others {
   917  		// TODO: set flaky=true or in some other way indicate that the bug
   918  		// triggers multiple different crashes?
   919  		env.log("representative crash: %v, types: %v", rep.Title, types)
   920  	}
   921  	return
   922  }
   923  
   924  // postTestResult() is to be run after we have got the results of a test() call for a revision.
   925  // It updates the estimates of reproducibility and the overall result confidence.
   926  func (env *env) postTestResult(res *testResult) {
   927  	env.confidence *= res.confidence
   928  	if res.verdict == vcs.BisectBad {
   929  		// Let's be conservative and only decrease our reproduction likelihood estimate.
   930  		// As the estimate of each test() can also be flaky, only partially update the result.
   931  		avg := (env.reproChance + res.badRatio) / 2.0
   932  		if env.reproChance > avg {
   933  			env.reproChance = avg
   934  		}
   935  	}
   936  }
   937  
   938  // updateFlaky() updates the current flakiness estimate.
   939  func (env *env) updateFlaky(res *testResult) {
   940  	// We require at least 5 good+bad runs for a verdict, so
   941  	// with a 50% reproducility there's a ~3% chance of a false negative result.
   942  	// If there are 10 "good" results, that's a ~36% accumulated error probability.
   943  	// That's already noticeable, so let's do 2x more runs from there.
   944  	const flakyThreshold = 0.5
   945  	if res.verdict == vcs.BisectBad && res.badRatio < flakyThreshold {
   946  		// Once flaky => always treat as flaky.
   947  		env.flaky = true
   948  	}
   949  }
   950  
   951  // mostFrequentReports() processes the list of run results and determines:
   952  // 1) The most representative crash types.
   953  // 2) The most representative crash report.
   954  // The algorithm is described in code comments.
   955  func mostFrequentReports(reports []*report.Report) (*report.Report, []crash.Type, bool) {
   956  	// First find most frequent report types.
   957  	type info struct {
   958  		t      crash.Type
   959  		count  int
   960  		report *report.Report
   961  	}
   962  	crashes := 0
   963  	perType := []*info{}
   964  	perTypeMap := map[crash.Type]*info{}
   965  	for _, rep := range reports {
   966  		if rep.Title == "" {
   967  			continue
   968  		}
   969  		crashes++
   970  		if perTypeMap[rep.Type] == nil {
   971  			obj := &info{
   972  				t:      rep.Type,
   973  				report: rep,
   974  			}
   975  			perType = append(perType, obj)
   976  			perTypeMap[rep.Type] = obj
   977  		}
   978  		perTypeMap[rep.Type].count++
   979  	}
   980  	sort.Slice(perType, func(i, j int) bool {
   981  		return perType[i].count > perType[j].count
   982  	})
   983  	// Then pick those that are representative enough.
   984  	var bestTypes []crash.Type
   985  	var bestReport *report.Report
   986  	taken := 0
   987  	for _, info := range perType {
   988  		if info.t == crash.Hang && info.count*2 < crashes && len(perType) > 1 {
   989  			// To pick a Hang as a representative one, require >= 50%
   990  			// of all crashes to be of this type.
   991  			// Hang crashes can appear in various parts of the kernel, so
   992  			// we only want to take them into account only if we are actually
   993  			// bisecting this kind of a bug.
   994  			continue
   995  		}
   996  		// Take further crash types until we have considered 2/3 of all crashes, but
   997  		// no more than 3.
   998  		needTaken := (crashes + 2) * 2 / 3
   999  		if taken < needTaken && len(bestTypes) < 3 {
  1000  			if bestReport == nil {
  1001  				bestReport = info.report
  1002  			}
  1003  			bestTypes = append(bestTypes, info.t)
  1004  			taken += info.count
  1005  		}
  1006  	}
  1007  	return bestReport, bestTypes, len(bestTypes) != len(perType)
  1008  }
  1009  
  1010  func (env *env) isTransientError(rep *report.Report) bool {
  1011  	// If we're not chasing a SYZFATAL error, ignore them.
  1012  	// Otherwise it indicates some transient problem of the tested kernel revision.
  1013  	hadSyzFailure := false
  1014  	for _, t := range env.reportTypes {
  1015  		hadSyzFailure = hadSyzFailure || t == crash.SyzFailure
  1016  	}
  1017  	return rep.Type == crash.SyzFailure &&
  1018  		len(env.reportTypes) > 0 && !hadSyzFailure
  1019  }
  1020  
  1021  func (env *env) saveDebugFile(hash string, idx int, data []byte) {
  1022  	env.cfg.Trace.SaveFile(fmt.Sprintf("%v.%v", hash, idx), data)
  1023  }
  1024  
  1025  func checkConfig(cfg *Config) error {
  1026  	if !osutil.IsExist(cfg.BinDir) {
  1027  		return fmt.Errorf("bin dir %v does not exist", cfg.BinDir)
  1028  	}
  1029  	if cfg.Kernel.Userspace != "" && !osutil.IsExist(cfg.Kernel.Userspace) {
  1030  		return fmt.Errorf("userspace dir %v does not exist", cfg.Kernel.Userspace)
  1031  	}
  1032  	if cfg.Kernel.Sysctl != "" && !osutil.IsExist(cfg.Kernel.Sysctl) {
  1033  		return fmt.Errorf("sysctl file %v does not exist", cfg.Kernel.Sysctl)
  1034  	}
  1035  	if cfg.Kernel.Cmdline != "" && !osutil.IsExist(cfg.Kernel.Cmdline) {
  1036  		return fmt.Errorf("cmdline file %v does not exist", cfg.Kernel.Cmdline)
  1037  	}
  1038  	return nil
  1039  }
  1040  
  1041  func (env *env) log(msg string, args ...interface{}) {
  1042  	if false {
  1043  		_ = fmt.Sprintf(msg, args...) // enable printf checker
  1044  	}
  1045  	env.cfg.Trace.Log(msg, args...)
  1046  }
  1047  
  1048  // pickReleaseTags() picks a subset of revisions to test.
  1049  // `all` is an ordered list of tags (from newer to older).
  1050  func pickReleaseTags(all []string) []string {
  1051  	if len(all) == 0 {
  1052  		return nil
  1053  	}
  1054  	// First split into x.y.z, x.y.z-1, ... and x.y, x.y-1, ...
  1055  	var subReleases, releases []string
  1056  	releaseBegin := false
  1057  	for _, tag := range all {
  1058  		v1, _, rc, v3 := vcs.ParseReleaseTag(tag)
  1059  		if v1 < 0 || rc < 0 && v3 < 0 {
  1060  			releaseBegin = true
  1061  			releases = append(releases, tag)
  1062  		}
  1063  		if !releaseBegin {
  1064  			subReleases = append(subReleases, tag)
  1065  		}
  1066  	}
  1067  	var ret []string
  1068  	// Take 2 latest sub releases.
  1069  	takeSubReleases := minInts(2, len(subReleases))
  1070  	ret = append(ret, subReleases[:takeSubReleases]...)
  1071  	// If there are a lot of sub releases, also take the middle one.
  1072  	if len(subReleases) > 5 {
  1073  		ret = append(ret, subReleases[len(subReleases)/2])
  1074  	}
  1075  	for i := 0; i < len(releases); i++ {
  1076  		// Gradually increase step.
  1077  		step := 1
  1078  		if i >= 3 {
  1079  			step = 2
  1080  		}
  1081  		if i >= 11 {
  1082  			step = 3
  1083  		}
  1084  		if i%step == 0 || i == len(releases)-1 {
  1085  			ret = append(ret, releases[i])
  1086  		}
  1087  	}
  1088  	return ret
  1089  }
  1090  
  1091  func minInts(vals ...int) int {
  1092  	ret := vals[0]
  1093  	for i := 1; i < len(vals); i++ {
  1094  		if vals[i] < ret {
  1095  			ret = vals[i]
  1096  		}
  1097  	}
  1098  	return ret
  1099  }