github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/bisect/bisect.go

github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/bisect/bisect.go (about)

     1  // Copyright 2018 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package bisect
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"math"
    10  	"os"
    11  	"sort"
    12  	"time"
    13  
    14  	"github.com/google/syzkaller/pkg/build"
    15  	"github.com/google/syzkaller/pkg/debugtracer"
    16  	"github.com/google/syzkaller/pkg/hash"
    17  	"github.com/google/syzkaller/pkg/instance"
    18  	"github.com/google/syzkaller/pkg/mgrconfig"
    19  	"github.com/google/syzkaller/pkg/osutil"
    20  	"github.com/google/syzkaller/pkg/report"
    21  	"github.com/google/syzkaller/pkg/report/crash"
    22  	"github.com/google/syzkaller/pkg/vcs"
    23  )
    24  
    25  type Config struct {
    26  	Trace           debugtracer.DebugTracer
    27  	Fix             bool
    28  	DefaultCompiler string
    29  	CompilerType    string
    30  	Make            string
    31  	Linker          string
    32  	BinDir          string
    33  	Ccache          string
    34  	Timeout         time.Duration
    35  	Kernel          KernelConfig
    36  	Syzkaller       SyzkallerConfig
    37  	Repro           ReproConfig
    38  	Manager         *mgrconfig.Config
    39  	BuildSemaphore  *osutil.Semaphore
    40  	TestSemaphore   *osutil.Semaphore
    41  	BuildCPUs       int
    42  	// CrossTree specifies whether a cross tree bisection is to take place, i.e.
    43  	// Kernel.Commit is not reachable from Kernel.Branch.
    44  	// In this case, bisection starts from their merge base.
    45  	CrossTree bool
    46  }
    47  
    48  type KernelConfig struct {
    49  	Repo        string
    50  	Branch      string
    51  	Commit      string
    52  	CommitTitle string
    53  	Cmdline     string
    54  	Sysctl      string
    55  	Config      []byte
    56  	// Baseline configuration is used in commit bisection. If the crash doesn't reproduce
    57  	// with baseline configuratopm config bisection is run. When triggering configuration
    58  	// option is found provided baseline configuration is modified according the bisection
    59  	// results. This new configuration is tested once more with current head. If crash
    60  	// reproduces with the generated configuration original configuation is replaced with
    61  	// this minimized one.
    62  	BaselineConfig []byte
    63  	Userspace      string
    64  	// Extra commits to cherry pick to older kernel revisions.
    65  	Backports []vcs.BackportCommit
    66  }
    67  
    68  type SyzkallerConfig struct {
    69  	Repo         string
    70  	Commit       string
    71  	Descriptions string
    72  }
    73  
    74  type ReproConfig struct {
    75  	Opts []byte
    76  	Syz  []byte
    77  	C    []byte
    78  }
    79  
    80  type env struct {
    81  	cfg          *Config
    82  	repo         vcs.Repo
    83  	bisecter     vcs.Bisecter
    84  	minimizer    vcs.ConfigMinimizer
    85  	commit       *vcs.Commit
    86  	head         *vcs.Commit
    87  	kernelConfig []byte
    88  	inst         instance.Env
    89  	numTests     int
    90  	startTime    time.Time
    91  	buildTime    time.Duration
    92  	testTime     time.Duration
    93  	reportTypes  []crash.Type
    94  	// The current estimate of the reproducer's kernel crashing probability.
    95  	reproChance float64
    96  	// The product of our confidence in every bisection step result.
    97  	confidence float64
    98  	// Whether we should do 2x more execution runs for every test step.
    99  	// We could have inferred this data from reproChance, but we want to be
   100  	// able to react faster to sudden drops of reproducibility than an estimate
   101  	// can allows us to.
   102  	flaky bool
   103  	// A cache of already performed revision tests.
   104  	results  map[string]*testResult
   105  	buildCfg instance.BuildKernelConfig
   106  }
   107  
   108  const MaxNumTests = 20 // number of tests we do per commit
   109  
   110  // Result describes bisection result:
   111  // 1. if bisection is conclusive, the single cause/fix commit in Commits
   112  //   - for cause bisection report is the crash on the cause commit
   113  //   - for fix bisection report is nil
   114  //   - Commit is nil
   115  //   - NoopChange is set if the commit did not cause any change in the kernel binary
   116  //     (bisection result it most likely wrong)
   117  //
   118  // 2. Bisected to a release commit
   119  //   - if bisection is inconclusive, range of potential cause/fix commits in Commits
   120  //   - report is nil in such case
   121  //
   122  // 3. Commit is nil
   123  //   - if the crash still happens on the oldest release/HEAD (for cause/fix bisection correspondingly)
   124  //   - no commits in Commits
   125  //   - the crash report on the oldest release/HEAD;
   126  //   - Commit points to the oldest/latest commit where crash happens.
   127  //
   128  // 4. Config contains kernel config used for bisection.
   129  type Result struct {
   130  	Commits    []*vcs.Commit
   131  	Report     *report.Report
   132  	Commit     *vcs.Commit
   133  	Config     []byte
   134  	NoopChange bool
   135  	IsRelease  bool
   136  	Confidence float64
   137  }
   138  
   139  // Run does the bisection and returns either the Result,
   140  // or, if the crash is not reproduced on the start commit, an error.
   141  func Run(cfg *Config) (*Result, error) {
   142  	if err := checkConfig(cfg); err != nil {
   143  		return nil, err
   144  	}
   145  	cfg.Manager.Cover = false // it's not supported somewhere back in time
   146  	repo, err := vcs.NewRepo(cfg.Manager.TargetOS, cfg.Manager.Type, cfg.Manager.KernelSrc)
   147  	if err != nil {
   148  		return nil, err
   149  	}
   150  	inst, err := instance.NewEnv(cfg.Manager, cfg.BuildSemaphore, cfg.TestSemaphore)
   151  	if err != nil {
   152  		return nil, err
   153  	}
   154  	if _, err = repo.CheckoutBranch(cfg.Kernel.Repo, cfg.Kernel.Branch); err != nil {
   155  		return nil, &build.InfraError{Title: fmt.Sprintf("%v", err)}
   156  	}
   157  	return runImpl(cfg, repo, inst)
   158  }
   159  
   160  func runImpl(cfg *Config, repo vcs.Repo, inst instance.Env) (*Result, error) {
   161  	bisecter, ok := repo.(vcs.Bisecter)
   162  	if !ok {
   163  		return nil, fmt.Errorf("bisection is not implemented for %v", cfg.Manager.TargetOS)
   164  	}
   165  	minimizer, ok := repo.(vcs.ConfigMinimizer)
   166  	if !ok && len(cfg.Kernel.BaselineConfig) != 0 {
   167  		return nil, fmt.Errorf("config minimization is not implemented for %v", cfg.Manager.TargetOS)
   168  	}
   169  	env := &env{
   170  		cfg:        cfg,
   171  		repo:       repo,
   172  		bisecter:   bisecter,
   173  		minimizer:  minimizer,
   174  		inst:       inst,
   175  		startTime:  time.Now(),
   176  		confidence: 1.0,
   177  		buildCfg: instance.BuildKernelConfig{
   178  			CompilerBin:  cfg.DefaultCompiler,
   179  			MakeBin:      cfg.Make,
   180  			LinkerBin:    cfg.Linker,
   181  			CcacheBin:    cfg.Ccache,
   182  			UserspaceDir: cfg.Kernel.Userspace,
   183  			CmdlineFile:  cfg.Kernel.Cmdline,
   184  			SysctlFile:   cfg.Kernel.Sysctl,
   185  			KernelConfig: cfg.Kernel.Config,
   186  			BuildCPUs:    cfg.BuildCPUs,
   187  		},
   188  	}
   189  	head, err := repo.Commit(vcs.HEAD)
   190  	if err != nil {
   191  		return nil, err
   192  	}
   193  	defer env.repo.SwitchCommit(head.Hash)
   194  	env.head = head
   195  	hostname, err := os.Hostname()
   196  	if err != nil {
   197  		hostname = "unnamed host"
   198  	}
   199  	env.logf("%s starts bisection %s", hostname, env.startTime.String())
   200  	if cfg.Fix {
   201  		env.logf("bisecting fixing commit since %v", cfg.Kernel.Commit)
   202  	} else {
   203  		env.logf("bisecting cause commit starting from %v", cfg.Kernel.Commit)
   204  	}
   205  	start := time.Now()
   206  	res, err := env.bisect()
   207  	if env.flaky {
   208  		env.logf("reproducer is flaky (%.2f repro chance estimate)", env.reproChance)
   209  	}
   210  	env.logf("revisions tested: %v, total time: %v (build: %v, test: %v)",
   211  		env.numTests, time.Since(start), env.buildTime, env.testTime)
   212  	if err != nil {
   213  		env.logf("error: %v", err)
   214  		return nil, err
   215  	}
   216  	if len(res.Commits) == 0 {
   217  		if cfg.Fix {
   218  			env.logf("crash still not fixed or there were kernel test errors")
   219  		} else {
   220  			env.logf("oldest tested release already had the bug or it had kernel test errors")
   221  		}
   222  
   223  		env.logf("commit msg: %v", res.Commit.Title)
   224  		if res.Report != nil {
   225  			env.logf("crash: %v\n%s", res.Report.Title, res.Report.Report)
   226  		}
   227  		return res, nil
   228  	}
   229  	what := "bad"
   230  	if cfg.Fix {
   231  		what = "good"
   232  	}
   233  	if len(res.Commits) > 1 {
   234  		env.logf("bisection is inconclusive, the first %v commit could be any of:", what)
   235  		for _, com := range res.Commits {
   236  			env.logf("%v", com.Hash)
   237  		}
   238  		return res, nil
   239  	}
   240  	com := res.Commits[0]
   241  	env.logf("first %v commit: %v %v", what, com.Hash, com.Title)
   242  	env.logf("recipients (to): %q", com.Recipients.GetEmails(vcs.To))
   243  	env.logf("recipients (cc): %q", com.Recipients.GetEmails(vcs.Cc))
   244  	if res.Report != nil {
   245  		env.logf("crash: %v\n%s", res.Report.Title, res.Report.Report)
   246  	}
   247  	return res, nil
   248  }
   249  
   250  func (env *env) bisect() (*Result, error) {
   251  	err := env.bisecter.PrepareBisect()
   252  	if err != nil {
   253  		return nil, err
   254  	}
   255  
   256  	cfg := env.cfg
   257  	if err := env.inst.CleanKernel(&env.buildCfg); err != nil {
   258  		return nil, fmt.Errorf("kernel clean failed: %w", err)
   259  	}
   260  	env.logf("building syzkaller on %v", cfg.Syzkaller.Commit)
   261  	if _, err := env.inst.BuildSyzkaller(cfg.Syzkaller.Repo, cfg.Syzkaller.Commit); err != nil {
   262  		return nil, err
   263  	}
   264  
   265  	cfg.Kernel.Commit, err = env.identifyRewrittenCommit()
   266  	if err != nil {
   267  		return nil, err
   268  	}
   269  	com, err := env.repo.SwitchCommit(cfg.Kernel.Commit)
   270  	if err != nil {
   271  		return nil, err
   272  	}
   273  
   274  	env.logf("ensuring issue is reproducible on original commit %v\n", cfg.Kernel.Commit)
   275  	env.commit = com
   276  	env.kernelConfig = cfg.Kernel.Config
   277  	testRes, err := env.test()
   278  	if err != nil {
   279  		return nil, err
   280  	} else if testRes.verdict != vcs.BisectBad {
   281  		return nil, fmt.Errorf("the crash wasn't reproduced on the original commit")
   282  	}
   283  	env.reportTypes = testRes.types
   284  	env.reproChance = testRes.badRatio
   285  
   286  	testRes1, err := env.minimizeConfig()
   287  	if err != nil {
   288  		return nil, fmt.Errorf("config minimization failed: %w", err)
   289  	}
   290  	if testRes1 != nil {
   291  		// If config minimization even partially succeeds, minimizeConfig()
   292  		// would return a non-nil value of a new report.
   293  		testRes = testRes1
   294  		// Overwrite bug's reproducibility - it may be different after config minimization.
   295  		env.reproChance = testRes.badRatio
   296  	}
   297  
   298  	bad, good, results1, fatalResult, err := env.commitRange()
   299  	if fatalResult != nil || err != nil {
   300  		return fatalResult, err
   301  	}
   302  	if env.cfg.Fix {
   303  		env.commit = good
   304  	} else {
   305  		env.commit = bad
   306  	}
   307  	env.results = map[string]*testResult{cfg.Kernel.Commit: testRes}
   308  	for _, res := range results1 {
   309  		env.results[res.com.Hash] = res
   310  	}
   311  	commits, err := env.bisecter.Bisect(bad.Hash, good.Hash, cfg.Trace, env.testPredicate)
   312  	if err != nil {
   313  		return nil, err
   314  	}
   315  	env.logf("accumulated error probability: %0.2f", 1.0-env.confidence)
   316  	res := &Result{
   317  		Commits:    commits,
   318  		Config:     env.kernelConfig,
   319  		Confidence: env.confidence,
   320  	}
   321  	if len(commits) == 1 {
   322  		com := commits[0]
   323  		testRes := env.results[com.Hash]
   324  		if testRes == nil {
   325  			return nil, fmt.Errorf("no result for culprit commit")
   326  		}
   327  		res.Report = testRes.rep
   328  		isRelease, err := env.bisecter.IsRelease(com.Hash)
   329  		if err != nil {
   330  			env.logf("failed to detect release: %v", err)
   331  		}
   332  		res.IsRelease = isRelease
   333  		noopChange, err := env.detectNoopChange(com)
   334  		if err != nil {
   335  			env.logf("failed to detect noop change: %v", err)
   336  		}
   337  		res.NoopChange = noopChange
   338  	}
   339  	return res, nil
   340  }
   341  
   342  func (env *env) identifyRewrittenCommit() (string, error) {
   343  	cfg := env.cfg
   344  	if cfg.Kernel.Commit != "" && cfg.CrossTree {
   345  		// If the failing commit is on another tree, just take it as is.
   346  		return cfg.Kernel.Commit, nil
   347  	}
   348  	_, err := env.repo.CheckoutBranch(cfg.Kernel.Repo, cfg.Kernel.Branch)
   349  	if err != nil {
   350  		return cfg.Kernel.Commit, err
   351  	}
   352  	contained, err := env.repo.Contains(cfg.Kernel.Commit)
   353  	if err != nil || contained {
   354  		return cfg.Kernel.Commit, err
   355  	}
   356  
   357  	if !cfg.Fix {
   358  		// If we're doing a cause bisection, we don't really need the commit to be
   359  		// reachable from cfg.Kernel.Branch.
   360  		// So let's try to force tag fetch and check if the commit is present in the
   361  		// repository.
   362  		env.logf("fetch other tags and check if the commit is present")
   363  		commit, err := env.repo.CheckoutCommit(cfg.Kernel.Repo, cfg.Kernel.Commit)
   364  		if err != nil {
   365  			// Ignore the error because the command will fail if the commit is really not
   366  			// present in the tree.
   367  			env.logf("fetch failed with %s", err)
   368  		} else if commit != nil {
   369  			return commit.Hash, nil
   370  		}
   371  	}
   372  
   373  	// We record the tested kernel commit when syzkaller triggers a crash. These commits can become
   374  	// unreachable after the crash was found, when the history of the tested kernel branch was
   375  	// rewritten. The commit might have been completely deleted from the branch or just changed in
   376  	// some way. Some branches like linux-next are often and heavily rewritten (aka rebased).
   377  	// This can also happen when changing the branch you fuzz in an existing syz-manager config.
   378  	// This makes sense when a downstream kernel fork rebased on top of a new upstream version and
   379  	// you don't want syzkaller to report all your old bugs again.
   380  	if cfg.Kernel.CommitTitle == "" {
   381  		// This can happen during a manual bisection, when only a hash is given.
   382  		return cfg.Kernel.Commit, fmt.Errorf(
   383  			"commit %v not reachable in branch '%v' and no commit title available",
   384  			cfg.Kernel.Commit, cfg.Kernel.Branch)
   385  	}
   386  	commit, err := env.repo.GetCommitByTitle(cfg.Kernel.CommitTitle)
   387  	if err != nil {
   388  		return cfg.Kernel.Commit, err
   389  	}
   390  	if commit == nil {
   391  		return cfg.Kernel.Commit, fmt.Errorf(
   392  			"commit %v not reachable in branch '%v'", cfg.Kernel.Commit, cfg.Kernel.Branch)
   393  	}
   394  	env.logf("rewritten commit %v reidentified by title '%v'\n", commit.Hash, cfg.Kernel.CommitTitle)
   395  	return commit.Hash, nil
   396  }
   397  
   398  func (env *env) minimizeConfig() (*testResult, error) {
   399  	// Find minimal configuration based on baseline to reproduce the crash.
   400  	testResults := make(map[hash.Sig]*testResult)
   401  	predMinimize := func(test []byte) (vcs.BisectResult, error) {
   402  		env.kernelConfig = test
   403  		testRes, err := env.test()
   404  		if err != nil {
   405  			return 0, err
   406  		}
   407  		// We want either a > 33% repro probability or at least it should not be
   408  		// worse than for the non-minimized config.
   409  		const badRatioThreshold = 1.0 / 3.0
   410  		if testRes.verdict == vcs.BisectBad &&
   411  			testRes.badRatio < badRatioThreshold &&
   412  			testRes.badRatio < env.reproChance {
   413  			return vcs.BisectSkip, nil
   414  		}
   415  		if testRes.verdict == vcs.BisectBad {
   416  			// Only remember crashes.
   417  			testResults[hash.Hash(test)] = testRes
   418  		}
   419  		return testRes.verdict, err
   420  	}
   421  	minConfig, err := env.minimizer.Minimize(env.cfg.Manager.SysTarget, env.cfg.Kernel.Config,
   422  		env.cfg.Kernel.BaselineConfig, env.reportTypes, env.cfg.Trace, predMinimize)
   423  	if err != nil {
   424  		if errors.Is(err, vcs.ErrBadKconfig) {
   425  			env.logf("config minimization failed due to bad Kconfig %v\nproceeding with the original config", err)
   426  		} else {
   427  			return nil, err
   428  		}
   429  	}
   430  	env.kernelConfig = minConfig
   431  	return testResults[hash.Hash(minConfig)], nil
   432  }
   433  
   434  func (env *env) detectNoopChange(com *vcs.Commit) (bool, error) {
   435  	testRes := env.results[com.Hash]
   436  	if testRes.kernelSign == "" || len(com.Parents) != 1 {
   437  		return false, nil
   438  	}
   439  	parent := com.Parents[0]
   440  	parentRes := env.results[parent]
   441  	if parentRes == nil {
   442  		env.logf("parent commit %v wasn't tested", parent)
   443  		// We could not test the parent commit if it is not based on the previous release
   444  		// (instead based on an older release, i.e. a very old non-rebased commit
   445  		// merged into the current release).
   446  		// TODO: we can use a differnet compiler for this old commit
   447  		// since effectively it's in the older release, in that case we may not
   448  		// detect noop change anyway.
   449  		if _, err := env.repo.SwitchCommit(parent); err != nil {
   450  			return false, err
   451  		}
   452  		_, kernelSign, err := env.build()
   453  		if err != nil {
   454  			return false, err
   455  		}
   456  		parentRes = &testResult{kernelSign: kernelSign}
   457  	}
   458  	env.logf("culprit signature: %v", testRes.kernelSign)
   459  	env.logf("parent  signature: %v", parentRes.kernelSign)
   460  	return testRes.kernelSign == parentRes.kernelSign, nil
   461  }
   462  
   463  func (env *env) commitRange() (*vcs.Commit, *vcs.Commit, []*testResult, *Result, error) {
   464  	rangeFunc := env.commitRangeForCause
   465  	if env.cfg.Fix {
   466  		rangeFunc = env.commitRangeForFix
   467  	}
   468  
   469  	bad, good, results1, err := rangeFunc()
   470  	if err != nil {
   471  		return bad, good, results1, nil, err
   472  	}
   473  
   474  	fatalResult, err := env.validateCommitRange(bad, good, results1)
   475  	return bad, good, results1, fatalResult, err
   476  }
   477  
   478  func (env *env) commitRangeForFix() (*vcs.Commit, *vcs.Commit, []*testResult, error) {
   479  	var results []*testResult
   480  	startCommit := env.commit
   481  	if env.cfg.CrossTree {
   482  		env.logf("determining the merge base between %v and %v",
   483  			env.commit.Hash, env.head.Hash)
   484  		bases, err := env.repo.MergeBases(env.commit.Hash, env.head.Hash)
   485  		if err != nil {
   486  			return nil, nil, nil, err
   487  		}
   488  		if len(bases) != 1 {
   489  			env.logf("expected 1 merge base, got %d", len(bases))
   490  			return nil, nil, nil, fmt.Errorf("expected 1 merge base, got %d", len(bases))
   491  		}
   492  		env.logf("%s/%s is a merge base, check if it has the bug", bases[0].Hash, bases[0].Title)
   493  		startCommit = bases[0]
   494  		if _, err := env.repo.SwitchCommit(startCommit.Hash); err != nil {
   495  			return nil, nil, nil, err
   496  		}
   497  		res, err := env.test()
   498  		if err != nil {
   499  			return nil, nil, nil, err
   500  		}
   501  		results = append(results, res)
   502  		if res.verdict != vcs.BisectBad {
   503  			return nil, startCommit, results, nil
   504  		}
   505  	}
   506  	env.logf("testing current HEAD %v", env.head.Hash)
   507  	if _, err := env.repo.SwitchCommit(env.head.Hash); err != nil {
   508  		return nil, nil, nil, err
   509  	}
   510  	res, err := env.test()
   511  	if err != nil {
   512  		return nil, nil, nil, err
   513  	}
   514  	results = append(results, res)
   515  	if res.verdict != vcs.BisectGood {
   516  		return env.head, nil, results, nil
   517  	}
   518  	return env.head, startCommit, results, nil
   519  }
   520  
   521  func (env *env) commitRangeForCause() (*vcs.Commit, *vcs.Commit, []*testResult, error) {
   522  	cfg := env.cfg
   523  	tags, err := env.bisecter.PreviousReleaseTags(cfg.Kernel.Commit, cfg.CompilerType)
   524  	if err != nil {
   525  		return nil, nil, nil, err
   526  	}
   527  	if len(tags) == 0 {
   528  		return nil, nil, nil, fmt.Errorf("no release tags before this commit")
   529  	}
   530  	pickedTags := pickReleaseTags(tags)
   531  	env.logf("picked %v out of %d release tags", pickedTags, len(tags))
   532  
   533  	lastBad := env.commit
   534  	var results []*testResult
   535  	for _, tag := range pickedTags {
   536  		env.logf("testing release %v", tag)
   537  		com, err := env.repo.SwitchCommit(tag)
   538  		if err != nil {
   539  			return nil, nil, nil, err
   540  		}
   541  		res, err := env.test()
   542  		if err != nil {
   543  			return nil, nil, nil, err
   544  		}
   545  		results = append(results, res)
   546  		if res.verdict == vcs.BisectGood {
   547  			return lastBad, com, results, nil
   548  		}
   549  		if res.verdict == vcs.BisectBad {
   550  			lastBad = com
   551  		}
   552  	}
   553  	// All tags were vcs.BisectBad or vcs.BisectSkip.
   554  	return lastBad, nil, results, nil
   555  }
   556  
   557  func (env *env) validateCommitRange(bad, good *vcs.Commit, results []*testResult) (*Result, error) {
   558  	if len(results) < 1 {
   559  		return nil, fmt.Errorf("commitRange returned no results")
   560  	}
   561  
   562  	if env.cfg.Fix && env.cfg.CrossTree && len(results) < 2 {
   563  		// For cross-tree bisections, it can be the case that the bug was introduced
   564  		// after the merge base, so there's no sense to continue the fix bisection.
   565  		env.logf("reproducer does not crash the merge base, so there's no known bad commit")
   566  		return &Result{Commit: good, Config: env.kernelConfig}, nil
   567  	}
   568  
   569  	finalResult := results[len(results)-1] // HEAD test for fix, oldest tested test for cause bisection
   570  	if finalResult.verdict == vcs.BisectBad {
   571  		// For cause bisection: Oldest tested release already had the bug. Giving up.
   572  		// For fix bisection:   Crash still not fixed on HEAD. Leaving Result.Commits empty causes
   573  		//                      syzbot to retry this bisection later.
   574  		env.logf("crash still not fixed/happens on the oldest tested release")
   575  		return &Result{Report: finalResult.rep, Commit: bad, Config: env.kernelConfig}, nil
   576  	}
   577  	if finalResult.verdict == vcs.BisectSkip {
   578  		if env.cfg.Fix {
   579  			// HEAD is moving target. Sometimes changes break syzkaller fuzzing.
   580  			// Leaving Result.Commits empty so syzbot retries this bisection again later.
   581  			env.logf("HEAD had kernel build, boot or test errors")
   582  			return &Result{Report: finalResult.rep, Commit: bad, Config: env.kernelConfig}, nil
   583  		}
   584  		// The oldest tested release usually doesn't change. Retrying would give us the same result,
   585  		// unless we change the syz-ci setup (e.g. new rootfs, new compilers).
   586  		return nil, fmt.Errorf("oldest tested release had kernel build, boot or test errors")
   587  	}
   588  
   589  	return nil, nil
   590  }
   591  
   592  type testResult struct {
   593  	verdict    vcs.BisectResult
   594  	com        *vcs.Commit
   595  	rep        *report.Report
   596  	types      []crash.Type
   597  	kernelSign string
   598  	// The ratio of bad/(good+bad) results.
   599  	badRatio float64
   600  	// An estimate how much we can trust the result.
   601  	confidence float64
   602  }
   603  
   604  func (env *env) build() (*vcs.Commit, string, error) {
   605  	current, err := env.repo.Commit(vcs.HEAD)
   606  	if err != nil {
   607  		return nil, "", err
   608  	}
   609  
   610  	bisectEnv, err := env.bisecter.EnvForCommit(
   611  		env.cfg.DefaultCompiler, env.cfg.CompilerType,
   612  		env.cfg.BinDir, current.Hash, env.kernelConfig,
   613  		env.cfg.Kernel.Backports,
   614  	)
   615  	if err != nil {
   616  		return current, "", err
   617  	}
   618  	env.logf("testing commit %v %v", current.Hash, env.cfg.CompilerType)
   619  	buildStart := time.Now()
   620  	buildCfg := env.buildCfg
   621  	buildCfg.CompilerBin = bisectEnv.Compiler
   622  	buildCfg.KernelConfig = bisectEnv.KernelConfig
   623  	if err := env.inst.CleanKernel(&buildCfg); err != nil {
   624  		return current, "", fmt.Errorf("kernel clean failed: %w", err)
   625  	}
   626  	_, imageDetails, err := env.inst.BuildKernel(&buildCfg)
   627  	if imageDetails.CompilerID != "" {
   628  		env.logf("compiler: %v", imageDetails.CompilerID)
   629  	}
   630  	if imageDetails.Signature != "" {
   631  		env.logf("kernel signature: %v", imageDetails.Signature)
   632  	}
   633  	env.buildTime += time.Since(buildStart)
   634  	return current, imageDetails.Signature, err
   635  }
   636  
   637  // Note: When this function returns an error, the bisection it was called from is aborted.
   638  // Hence recoverable errors must be handled and the callers must treat testResult with care.
   639  // e.g. testResult.verdict will be vcs.BisectSkip for a broken build, but err will be nil.
   640  func (env *env) test() (*testResult, error) {
   641  	cfg := env.cfg
   642  	if cfg.Timeout != 0 && time.Since(env.startTime) > cfg.Timeout {
   643  		return nil, fmt.Errorf("bisection is taking too long (>%v), aborting", cfg.Timeout)
   644  	}
   645  	current, kernelSign, err := env.build()
   646  	res := &testResult{
   647  		verdict:    vcs.BisectSkip,
   648  		com:        current,
   649  		kernelSign: kernelSign,
   650  		confidence: 1.0,
   651  	}
   652  	if current == nil {
   653  		// This is not recoverable, as the caller must know which commit to skip.
   654  		return res, fmt.Errorf("couldn't get repo HEAD: %w", err)
   655  	}
   656  	if err != nil {
   657  		errInfo := fmt.Sprintf("failed building %v: ", current.Hash)
   658  		var verr *osutil.VerboseError
   659  		var kerr *build.KernelError
   660  		if errors.As(err, &verr) {
   661  			errInfo += verr.Error()
   662  			env.saveDebugFile(current.Hash, 0, verr.Output)
   663  		} else if errors.As(err, &kerr) {
   664  			errInfo += string(kerr.Report)
   665  			env.saveDebugFile(current.Hash, 0, kerr.Output)
   666  		} else {
   667  			errInfo += err.Error()
   668  			env.logf("%v", err)
   669  		}
   670  
   671  		env.logf("%s", errInfo)
   672  		res.rep = &report.Report{Title: errInfo}
   673  		return res, nil
   674  	}
   675  
   676  	numTests := MaxNumTests / 2
   677  	if env.flaky || env.numTests == 0 {
   678  		// Use twice as many instances if the bug is flaky and during initial testing
   679  		// (as we don't know yet if it's flaky or not).
   680  		numTests *= 2
   681  	}
   682  	env.numTests++
   683  
   684  	testStart := time.Now()
   685  
   686  	results, err := env.inst.Test(numTests, cfg.Repro.Syz, cfg.Repro.Opts, cfg.Repro.C)
   687  	env.testTime += time.Since(testStart)
   688  	if err != nil {
   689  		problem := fmt.Sprintf("repro testing failure: %v", err)
   690  		env.log(problem)
   691  		return res, &build.InfraError{Title: problem}
   692  	}
   693  	bad, good, infra, rep, types := env.processResults(current, results)
   694  	res.verdict, err = env.bisectionDecision(len(results), bad, good, infra)
   695  	if err != nil {
   696  		return nil, err
   697  	}
   698  	if bad+good > 0 {
   699  		res.badRatio = float64(bad) / float64(bad+good)
   700  	}
   701  	if res.verdict == vcs.BisectGood {
   702  		// The result could be a false negative.
   703  		res.confidence = 1.0 - math.Pow(1.0-env.reproChance, float64(good))
   704  		env.logf("false negative chance: %.3f", 1.0-res.confidence)
   705  	}
   706  	if res.verdict == vcs.BisectSkip {
   707  		res.rep = &report.Report{
   708  			Title: fmt.Sprintf("failed testing reproducer on %v", current.Hash),
   709  		}
   710  	} else {
   711  		// Pick the most relevant as the main one.
   712  		res.rep = rep
   713  	}
   714  	res.types = types
   715  	env.updateFlaky(res)
   716  	// TODO: when we start supporting boot/test error bisection, we need to make
   717  	// processResults treat that verdit as "good".
   718  	return res, nil
   719  }
   720  
   721  // testPredicate() is meant to be invoked by bisecter.Bisect().
   722  func (env *env) testPredicate() (vcs.BisectResult, error) {
   723  	var testRes1 *testResult
   724  	if env.cfg.Fix {
   725  		// There's a chance we might test a revision that does not yet contain the bug.
   726  		// Perform extra checks (see #4117).
   727  		env.logf("determine whether the revision contains the guilty commit")
   728  		hadBug, err := env.revisionHadBug()
   729  		if err == errUnknownBugPresence {
   730  			// Let's skip the revision just in case.
   731  			testRes1 = &testResult{verdict: vcs.BisectSkip}
   732  		} else if err != nil {
   733  			return 0, err
   734  		}
   735  		if !hadBug {
   736  			// For result consistency, pretend that the kernel crashed.
   737  			env.logf("the bug was not introduced yet; pretend that kernel crashed")
   738  			testRes1 = &testResult{verdict: vcs.BisectBad}
   739  		}
   740  	}
   741  	if testRes1 == nil {
   742  		var err error
   743  		testRes1, err = env.test()
   744  		if err != nil {
   745  			return 0, err
   746  		}
   747  		env.postTestResult(testRes1)
   748  		env.results[testRes1.com.Hash] = testRes1
   749  	}
   750  	// For fix bisections, results are inverted.
   751  	if env.cfg.Fix {
   752  		switch testRes1.verdict {
   753  		case vcs.BisectBad:
   754  			testRes1.verdict = vcs.BisectGood
   755  		case vcs.BisectGood:
   756  			testRes1.verdict = vcs.BisectBad
   757  		}
   758  	}
   759  	return testRes1.verdict, nil
   760  }
   761  
   762  // If there's a merge from a branch that was based on a much older code revision,
   763  // it's likely that the bug was not yet present at all.
   764  var errUnknownBugPresence = errors.New("unable to determine whether there was a bug")
   765  
   766  func (env *env) revisionHadBug() (bool, error) {
   767  	// Check if any already tested revision that is reachable from HEAD crashed.
   768  	for hash, res := range env.results {
   769  		if res.rep == nil {
   770  			continue
   771  		}
   772  		ok, err := env.repo.Contains(hash)
   773  		if err != nil {
   774  			return false, err
   775  		}
   776  		if ok {
   777  			env.logf("revision %s crashed and is reachable", hash)
   778  			return true, nil
   779  		}
   780  	}
   781  
   782  	// TODO: it's also possible to extract useful information from non-crashed runs.
   783  	// But let's first see how many extra test() runs we get without it.
   784  
   785  	// We'll likely change the revision below. Ensure we get back to the original one.
   786  	curr, err := env.repo.Commit(vcs.HEAD)
   787  	if err != nil {
   788  		return false, err
   789  	}
   790  	defer env.repo.SwitchCommit(curr.Hash)
   791  
   792  	// Check all merge bases between the original bad commit (*) and the current HEAD revision.
   793  	// If at least one crashed, bug was definitely present.
   794  	// (*) Using the same bad commit hopefully helps us reuse many of the results.
   795  	bases, err := env.repo.MergeBases(curr.Hash, env.commit.Hash)
   796  	if err != nil {
   797  		return false, fmt.Errorf("failed to get the merge base between %s and %s: %w",
   798  			curr.Hash, env.commit.Hash, err)
   799  	}
   800  	anyResult := false
   801  	for _, base := range bases {
   802  		env.logf("checking the merge base %s", base.Hash)
   803  		res := env.results[base.Hash]
   804  		if res == nil {
   805  			env.logf("no existing result, test the revision")
   806  			env.repo.SwitchCommit(base.Hash)
   807  			res, err = env.test()
   808  			if err != nil {
   809  				return false, err
   810  			}
   811  			env.results[base.Hash] = res
   812  		}
   813  		if res.verdict == vcs.BisectSkip {
   814  			continue
   815  		}
   816  		anyResult = true
   817  		if res.rep != nil {
   818  			// No reason to test other bases.
   819  			return true, nil
   820  		}
   821  	}
   822  	if anyResult {
   823  		return false, nil
   824  	}
   825  	return false, errUnknownBugPresence
   826  }
   827  
   828  func (env *env) bisectionDecision(total, bad, good, infra int) (vcs.BisectResult, error) {
   829  	// Boot errors, image test errors, skipped crashes.
   830  	skip := total - bad - good - infra
   831  
   832  	wantBadRuns := max(2, (total-infra)/6) // For 10 runs, require 2 crashes. For 20, require 3.
   833  	wantGoodRuns := total / 2
   834  	wantTotalRuns := total / 2
   835  	if env.flaky {
   836  		// The reproducer works less than 50% of time, so we need really many good results.
   837  		wantGoodRuns = total * 3 / 4
   838  	}
   839  	if bad == 0 && good >= wantGoodRuns {
   840  		// We need a big enough number of good results, otherwise the chance of a false
   841  		// positive is too high.
   842  		return vcs.BisectGood, nil
   843  	} else if bad >= wantBadRuns && (good+bad) >= wantTotalRuns {
   844  		// We need enough (good+bad) results to conclude that the kernel revision itself
   845  		// is not too broken.
   846  		return vcs.BisectBad, nil
   847  	} else if infra > skip {
   848  		// We have been unable to determine a verdict mostly because of infra errors.
   849  		// Abort the bisection.
   850  		return vcs.BisectSkip,
   851  			&build.InfraError{Title: "unable to determine the verdict because of infra errors"}
   852  	}
   853  	env.logf("unable to determine the verdict: %d good runs (wanted %d), for bad wanted %d in total, got %d",
   854  		good, wantGoodRuns, wantTotalRuns, good+bad)
   855  	return vcs.BisectSkip, nil
   856  }
   857  
   858  func (env *env) processResults(current *vcs.Commit, results []instance.EnvTestResult) (
   859  	bad, good, infra int, rep *report.Report, types []crash.Type) {
   860  	var verdicts []string
   861  	var reports []*report.Report
   862  	for i, res := range results {
   863  		if res.Error == nil {
   864  			good++
   865  			verdicts = append(verdicts, "OK")
   866  			continue
   867  		}
   868  		var testError *instance.TestError
   869  		var crashError *instance.CrashError
   870  		switch {
   871  		case errors.As(res.Error, &testError):
   872  			if testError.Infra {
   873  				infra++
   874  				verdicts = append(verdicts, fmt.Sprintf("infra problem: %v", testError))
   875  			} else if testError.Boot {
   876  				verdicts = append(verdicts, fmt.Sprintf("boot failed: %v", testError))
   877  			} else {
   878  				verdicts = append(verdicts, fmt.Sprintf("basic kernel testing failed: %v", testError))
   879  			}
   880  			output := testError.Output
   881  			if testError.Report != nil {
   882  				output = testError.Report.Output
   883  			}
   884  			env.saveDebugFile(current.Hash, i, output)
   885  		case errors.As(res.Error, &crashError):
   886  			output := crashError.Report.Report
   887  			if len(output) == 0 {
   888  				output = crashError.Report.Output
   889  			}
   890  			env.saveDebugFile(current.Hash, i, output)
   891  			if env.isTransientError(crashError.Report) {
   892  				verdicts = append(verdicts, fmt.Sprintf("ignore: %v", crashError))
   893  				break
   894  			}
   895  			bad++
   896  			reports = append(reports, crashError.Report)
   897  			verdicts = append(verdicts, fmt.Sprintf("crashed: %v", crashError))
   898  		default:
   899  			infra++
   900  			verdicts = append(verdicts, fmt.Sprintf("failed: %v", res.Error))
   901  		}
   902  	}
   903  	unique := make(map[string]bool)
   904  	for _, verdict := range verdicts {
   905  		unique[verdict] = true
   906  	}
   907  	if len(unique) == 1 {
   908  		env.logf("all runs: %v", verdicts[0])
   909  	} else {
   910  		for i, verdict := range verdicts {
   911  			env.logf("run #%v: %v", i, verdict)
   912  		}
   913  	}
   914  	var others bool
   915  	rep, types, others = mostFrequentReports(reports)
   916  	if rep != nil || others {
   917  		// TODO: set flaky=true or in some other way indicate that the bug
   918  		// triggers multiple different crashes?
   919  		env.logf("representative crash: %v, types: %v", rep.Title, types)
   920  	}
   921  	return
   922  }
   923  
   924  // postTestResult() is to be run after we have got the results of a test() call for a revision.
   925  // It updates the estimates of reproducibility and the overall result confidence.
   926  func (env *env) postTestResult(res *testResult) {
   927  	env.confidence *= res.confidence
   928  	if res.verdict == vcs.BisectBad {
   929  		// Let's be conservative and only decrease our reproduction likelihood estimate.
   930  		// As the estimate of each test() can also be flaky, only partially update the result.
   931  		avg := (env.reproChance + res.badRatio) / 2.0
   932  		env.reproChance = min(env.reproChance, avg)
   933  	}
   934  }
   935  
   936  // updateFlaky() updates the current flakiness estimate.
   937  func (env *env) updateFlaky(res *testResult) {
   938  	// We require at least 5 good+bad runs for a verdict, so
   939  	// with a 50% reproducility there's a ~3% chance of a false negative result.
   940  	// If there are 10 "good" results, that's a ~36% accumulated error probability.
   941  	// That's already noticeable, so let's do 2x more runs from there.
   942  	const flakyThreshold = 0.5
   943  	if res.verdict == vcs.BisectBad && res.badRatio < flakyThreshold {
   944  		// Once flaky => always treat as flaky.
   945  		env.flaky = true
   946  	}
   947  }
   948  
   949  // mostFrequentReports() processes the list of run results and determines:
   950  // 1) The most representative crash types.
   951  // 2) The most representative crash report.
   952  // The algorithm is described in code comments.
   953  func mostFrequentReports(reports []*report.Report) (*report.Report, []crash.Type, bool) {
   954  	// First find most frequent report types.
   955  	type info struct {
   956  		t      crash.Type
   957  		count  int
   958  		report *report.Report
   959  	}
   960  	crashes := 0
   961  	perType := []*info{}
   962  	perTypeMap := map[crash.Type]*info{}
   963  	for _, rep := range reports {
   964  		if rep.Title == "" {
   965  			continue
   966  		}
   967  		crashes++
   968  		if perTypeMap[rep.Type] == nil {
   969  			obj := &info{
   970  				t:      rep.Type,
   971  				report: rep,
   972  			}
   973  			perType = append(perType, obj)
   974  			perTypeMap[rep.Type] = obj
   975  		}
   976  		perTypeMap[rep.Type].count++
   977  	}
   978  	sort.Slice(perType, func(i, j int) bool {
   979  		return perType[i].count > perType[j].count
   980  	})
   981  	// Then pick those that are representative enough.
   982  	var bestTypes []crash.Type
   983  	var bestReport *report.Report
   984  	taken := 0
   985  	for _, info := range perType {
   986  		if info.t == crash.Hang && info.count*2 < crashes && len(perType) > 1 {
   987  			// To pick a Hang as a representative one, require >= 50%
   988  			// of all crashes to be of this type.
   989  			// Hang crashes can appear in various parts of the kernel, so
   990  			// we only want to take them into account only if we are actually
   991  			// bisecting this kind of a bug.
   992  			continue
   993  		}
   994  		if info.t == crash.LostConnection && len(perType) > 1 {
   995  			// This crash type is much more often unrelated than not.
   996  			// Take it only if it's the only crash type.
   997  			continue
   998  		}
   999  		// Take further crash types until we have considered 2/3 of all crashes, but
  1000  		// no more than 3.
  1001  		needTaken := (crashes + 2) * 2 / 3
  1002  		if taken < needTaken && len(bestTypes) < 3 {
  1003  			if bestReport == nil {
  1004  				bestReport = info.report
  1005  			}
  1006  			bestTypes = append(bestTypes, info.t)
  1007  			taken += info.count
  1008  		}
  1009  	}
  1010  	return bestReport, bestTypes, len(bestTypes) != len(perType)
  1011  }
  1012  
  1013  func (env *env) isTransientError(rep *report.Report) bool {
  1014  	// If we're not chasing a SYZFATAL error, ignore them.
  1015  	// Otherwise it indicates some transient problem of the tested kernel revision.
  1016  	if rep.Type == crash.SyzFailure {
  1017  		hadSyzFailure := false
  1018  		for _, t := range env.reportTypes {
  1019  			hadSyzFailure = hadSyzFailure || t == crash.SyzFailure
  1020  		}
  1021  		return len(env.reportTypes) > 0 && !hadSyzFailure
  1022  	}
  1023  	// Lost connection is a frequent source of flaky results.
  1024  	// Ignore if it is was not in the canonical crash types set.
  1025  	if rep.Type == crash.LostConnection {
  1026  		hadLostConnection := false
  1027  		for _, t := range env.reportTypes {
  1028  			hadLostConnection = hadLostConnection || t == crash.LostConnection
  1029  		}
  1030  		return len(env.reportTypes) > 0 && !hadLostConnection
  1031  	}
  1032  	// All other errors are okay.
  1033  	return false
  1034  }
  1035  
  1036  func (env *env) saveDebugFile(hash string, idx int, data []byte) {
  1037  	env.cfg.Trace.SaveFile(fmt.Sprintf("%v.%v", hash, idx), data)
  1038  }
  1039  
  1040  func checkConfig(cfg *Config) error {
  1041  	if !osutil.IsExist(cfg.BinDir) {
  1042  		return fmt.Errorf("bin dir %v does not exist", cfg.BinDir)
  1043  	}
  1044  	if cfg.Kernel.Userspace != "" && !osutil.IsExist(cfg.Kernel.Userspace) {
  1045  		return fmt.Errorf("userspace dir %v does not exist", cfg.Kernel.Userspace)
  1046  	}
  1047  	if cfg.Kernel.Sysctl != "" && !osutil.IsExist(cfg.Kernel.Sysctl) {
  1048  		return fmt.Errorf("sysctl file %v does not exist", cfg.Kernel.Sysctl)
  1049  	}
  1050  	if cfg.Kernel.Cmdline != "" && !osutil.IsExist(cfg.Kernel.Cmdline) {
  1051  		return fmt.Errorf("cmdline file %v does not exist", cfg.Kernel.Cmdline)
  1052  	}
  1053  	return nil
  1054  }
  1055  
  1056  func (env *env) log(msg string) {
  1057  	env.logf("%v", msg)
  1058  }
  1059  
  1060  func (env *env) logf(msg string, args ...interface{}) {
  1061  	if false {
  1062  		_ = fmt.Sprintf(msg, args...) // enable printf checker
  1063  	}
  1064  	env.cfg.Trace.Log(msg, args...)
  1065  }
  1066  
  1067  // pickReleaseTags() picks a subset of revisions to test.
  1068  // `all` is an ordered list of tags (from newer to older).
  1069  func pickReleaseTags(all []string) []string {
  1070  	if len(all) == 0 {
  1071  		return nil
  1072  	}
  1073  	// First split into x.y.z, x.y.z-1, ... and x.y, x.y-1, ...
  1074  	var subReleases, releases []string
  1075  	releaseBegin := false
  1076  	for _, tag := range all {
  1077  		v1, _, rc, v3 := vcs.ParseReleaseTag(tag)
  1078  		if v1 < 0 || rc < 0 && v3 < 0 {
  1079  			releaseBegin = true
  1080  			releases = append(releases, tag)
  1081  		}
  1082  		if !releaseBegin {
  1083  			subReleases = append(subReleases, tag)
  1084  		}
  1085  	}
  1086  	var ret []string
  1087  	// Take 2 latest sub releases.
  1088  	takeSubReleases := min(2, len(subReleases))
  1089  	ret = append(ret, subReleases[:takeSubReleases]...)
  1090  	// If there are a lot of sub releases, also take the middle one.
  1091  	if len(subReleases) > 5 {
  1092  		ret = append(ret, subReleases[len(subReleases)/2])
  1093  	}
  1094  	for i := 0; i < len(releases); i++ {
  1095  		// Gradually increase step.
  1096  		step := 1
  1097  		if i >= 3 {
  1098  			step = 2
  1099  		}
  1100  		if i >= 11 {
  1101  			step = 3
  1102  		}
  1103  		if i%step == 0 || i == len(releases)-1 {
  1104  			ret = append(ret, releases[i])
  1105  		}
  1106  	}
  1107  	return ret
  1108  }