github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/syz-ci/jobs.go (about)

     1  // Copyright 2017 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package main
     5  
     6  import (
     7  	"bytes"
     8  	"context"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"os"
    13  	"path/filepath"
    14  	"strings"
    15  	"sync"
    16  	"time"
    17  
    18  	"github.com/google/syzkaller/dashboard/dashapi"
    19  	"github.com/google/syzkaller/pkg/bisect"
    20  	"github.com/google/syzkaller/pkg/build"
    21  	"github.com/google/syzkaller/pkg/debugtracer"
    22  	"github.com/google/syzkaller/pkg/instance"
    23  	"github.com/google/syzkaller/pkg/log"
    24  	"github.com/google/syzkaller/pkg/mgrconfig"
    25  	"github.com/google/syzkaller/pkg/osutil"
    26  	"github.com/google/syzkaller/pkg/report"
    27  	"github.com/google/syzkaller/pkg/vcs"
    28  	"github.com/google/syzkaller/vm"
    29  )
    30  
    31  type JobManager struct {
    32  	cfg               *Config
    33  	dash              *dashapi.Dashboard
    34  	managers          []*Manager
    35  	parallelJobFilter *ManagerJobs
    36  	shutdownPending   <-chan struct{}
    37  }
    38  
    39  type JobProcessor struct {
    40  	*JobManager
    41  	name           string
    42  	instanceSuffix string
    43  	knownCommits   map[string]bool
    44  	baseDir        string
    45  	jobFilter      *ManagerJobs
    46  	jobTicker      <-chan time.Time
    47  	commitTicker   <-chan time.Time
    48  }
    49  
    50  func newJobManager(cfg *Config, managers []*Manager, shutdownPending chan struct{}) (*JobManager, error) {
    51  	dash, err := dashapi.New(cfg.DashboardClient, cfg.DashboardAddr, cfg.DashboardKey)
    52  	if err != nil {
    53  		return nil, err
    54  	}
    55  	return &JobManager{
    56  		cfg:             cfg,
    57  		dash:            dash,
    58  		managers:        managers,
    59  		shutdownPending: shutdownPending,
    60  		// For now let's only parallelize patch testing requests.
    61  		parallelJobFilter: &ManagerJobs{TestPatches: true},
    62  	}, nil
    63  }
    64  
    65  // startLoop starts a job loop in parallel.
    66  func (jm *JobManager) startLoop(ctx context.Context, wg *sync.WaitGroup) {
    67  	wg.Add(1)
    68  	go func() {
    69  		defer wg.Done()
    70  		jm.loop(ctx)
    71  	}()
    72  }
    73  
    74  func (jm *JobManager) loop(ctx context.Context) {
    75  	if err := jm.resetJobs(); err != nil {
    76  		if jm.dash != nil {
    77  			jm.dash.LogError("syz-ci", "reset jobs failed: %v", err)
    78  		}
    79  		return
    80  	}
    81  	commitTicker := time.NewTicker(time.Duration(jm.cfg.CommitPollPeriod) * time.Second)
    82  	defer commitTicker.Stop()
    83  	jobTicker := time.NewTicker(time.Duration(jm.cfg.JobPollPeriod) * time.Second)
    84  	defer jobTicker.Stop()
    85  	var wg sync.WaitGroup
    86  	for main := true; ; main = false {
    87  		jp := &JobProcessor{
    88  			JobManager: jm,
    89  			jobTicker:  jobTicker.C,
    90  		}
    91  		if main {
    92  			jp.instanceSuffix = "-job"
    93  			jp.baseDir = osutil.Abs("jobs")
    94  			jp.commitTicker = commitTicker.C
    95  			jp.knownCommits = make(map[string]bool)
    96  		} else {
    97  			jp.instanceSuffix = "-job-parallel"
    98  			jp.baseDir = osutil.Abs("jobs-2")
    99  			jp.jobFilter = jm.parallelJobFilter
   100  		}
   101  		jp.name = fmt.Sprintf("%v%v", jm.cfg.Name, jp.instanceSuffix)
   102  		wg.Add(1)
   103  		go func() {
   104  			defer wg.Done()
   105  			jp.loop(ctx)
   106  		}()
   107  		if !main || !jm.needParallelProcessor() {
   108  			break
   109  		}
   110  	}
   111  	wg.Wait()
   112  }
   113  
   114  func (jm *JobManager) needParallelProcessor() bool {
   115  	if !jm.cfg.ParallelJobs {
   116  		return false
   117  	}
   118  	for _, mgr := range jm.managers {
   119  		if mgr.mgrcfg.Jobs.Filter(jm.parallelJobFilter).AnyEnabled() {
   120  			return true
   121  		}
   122  	}
   123  	return false
   124  }
   125  
   126  func (jm *JobManager) resetJobs() error {
   127  	managerNames := []string{}
   128  	for _, mgr := range jm.managers {
   129  		if mgr.mgrcfg.Jobs.AnyEnabled() {
   130  			managerNames = append(managerNames, mgr.name)
   131  		}
   132  	}
   133  	if len(managerNames) > 0 {
   134  		return jm.dash.JobReset(&dashapi.JobResetReq{Managers: managerNames})
   135  	}
   136  	return nil
   137  }
   138  
   139  func (jp *JobProcessor) loop(ctx context.Context) {
   140  	jp.Logf(0, "job loop started")
   141  loop:
   142  	for {
   143  		// Check jp.stop separately first, otherwise if stop signal arrives during a job execution,
   144  		// we can still grab the next job with 50% probability.
   145  		select {
   146  		case <-ctx.Done():
   147  			break loop
   148  		default:
   149  		}
   150  		// Similar for commit polling: if we grab 2-3 bisect jobs in a row,
   151  		// it can delay commit polling by days.
   152  		select {
   153  		case <-jp.commitTicker:
   154  			jp.pollCommits()
   155  		default:
   156  		}
   157  		select {
   158  		case <-jp.jobTicker:
   159  			jp.pollJobs()
   160  		case <-jp.commitTicker:
   161  			jp.pollCommits()
   162  		case <-ctx.Done():
   163  			break loop
   164  		}
   165  	}
   166  	jp.Logf(0, "job loop stopped")
   167  }
   168  
   169  func (jp *JobProcessor) pollCommits() {
   170  	for _, mgr := range jp.managers {
   171  		if !mgr.mgrcfg.Jobs.PollCommits {
   172  			continue
   173  		}
   174  		if err := jp.pollManagerCommits(mgr); err != nil {
   175  			jp.Errorf("failed to poll commits on %v: %v", mgr.name, err)
   176  		}
   177  	}
   178  }
   179  
   180  func brokenRepo(url string) bool {
   181  	// TODO(dvyukov): mmots contains weird squashed commits titled "linux-next" or "origin",
   182  	// which contain hundreds of other commits. This makes fix attribution totally broken.
   183  	return strings.Contains(url, "git.cmpxchg.org/linux-mmots")
   184  }
   185  
   186  func (jp *JobProcessor) pollManagerCommits(mgr *Manager) error {
   187  	resp, err := mgr.dash.CommitPoll()
   188  	if err != nil {
   189  		return err
   190  	}
   191  	jp.Logf(0, "polling commits for %v: repos %v, commits %v", mgr.name, len(resp.Repos), len(resp.Commits))
   192  	if len(resp.Repos) == 0 {
   193  		return fmt.Errorf("no repos")
   194  	}
   195  	commits := make(map[string]*vcs.Commit)
   196  	for i, repo := range resp.Repos {
   197  		if brokenRepo(repo.URL) {
   198  			continue
   199  		}
   200  		if resp.ReportEmail != "" {
   201  			commits1, err := jp.pollRepo(mgr, repo.URL, repo.Branch, resp.ReportEmail)
   202  			if err != nil {
   203  				jp.Errorf("failed to poll %v %v: %v", repo.URL, repo.Branch, err)
   204  				continue
   205  			}
   206  			jp.Logf(1, "got %v commits from %v/%v repo", len(commits1), repo.URL, repo.Branch)
   207  			for _, com := range commits1 {
   208  				// Only the "main" repo is the source of true hashes.
   209  				if i != 0 {
   210  					com.Hash = ""
   211  				}
   212  				// Not overwrite existing commits, in particular commit from the main repo with hash.
   213  				if _, ok := commits[com.Title]; !ok && !jp.knownCommits[com.Title] && len(commits) < 100 {
   214  					commits[com.Title] = com
   215  					jp.knownCommits[com.Title] = true
   216  				}
   217  			}
   218  		}
   219  		if i == 0 && len(resp.Commits) != 0 {
   220  			commits1, err := jp.getCommitInfo(mgr, repo.URL, repo.Branch, resp.Commits)
   221  			if err != nil {
   222  				jp.Errorf("failed to poll %v %v: %v", repo.URL, repo.Branch, err)
   223  				continue
   224  			}
   225  			jp.Logf(1, "got %v commit infos from %v/%v repo", len(commits1), repo.URL, repo.Branch)
   226  			for _, com := range commits1 {
   227  				// GetCommitByTitle does not accept ReportEmail and does not return tags,
   228  				// so don't replace the existing commit.
   229  				if _, ok := commits[com.Title]; !ok {
   230  					commits[com.Title] = com
   231  				}
   232  			}
   233  		}
   234  	}
   235  	results := make([]dashapi.Commit, 0, len(commits))
   236  	for _, com := range commits {
   237  		results = append(results, dashapi.Commit{
   238  			Hash:   com.Hash,
   239  			Title:  com.Title,
   240  			Author: com.Author,
   241  			BugIDs: com.Tags,
   242  			Date:   com.Date,
   243  		})
   244  	}
   245  	return mgr.dash.UploadCommits(results)
   246  }
   247  
   248  func (jp *JobProcessor) pollRepo(mgr *Manager, URL, branch, reportEmail string) ([]*vcs.Commit, error) {
   249  	dir := filepath.Join(jp.baseDir, mgr.managercfg.TargetOS, "kernel")
   250  	repo, err := vcs.NewRepo(mgr.managercfg.TargetOS, mgr.managercfg.Type, dir)
   251  	if err != nil {
   252  		return nil, fmt.Errorf("failed to create kernel repo: %w", err)
   253  	}
   254  	if _, err = repo.CheckoutBranch(URL, branch); err != nil {
   255  		return nil, fmt.Errorf("failed to checkout kernel repo %v/%v: %w", URL, branch, err)
   256  	}
   257  	return repo.ExtractFixTagsFromCommits("HEAD", reportEmail)
   258  }
   259  
   260  func (jp *JobProcessor) getCommitInfo(mgr *Manager, URL, branch string, commits []string) ([]*vcs.Commit, error) {
   261  	dir := filepath.Join(jp.baseDir, mgr.managercfg.TargetOS, "kernel")
   262  	repo, err := vcs.NewRepo(mgr.managercfg.TargetOS, mgr.managercfg.Type, dir)
   263  	if err != nil {
   264  		return nil, fmt.Errorf("failed to create kernel repo: %w", err)
   265  	}
   266  	if _, err = repo.CheckoutBranch(URL, branch); err != nil {
   267  		return nil, fmt.Errorf("failed to checkout kernel repo %v/%v: %w", URL, branch, err)
   268  	}
   269  	results, missing, err := repo.GetCommitsByTitles(commits)
   270  	if err != nil {
   271  		return nil, err
   272  	}
   273  	for _, title := range missing {
   274  		jp.Logf(0, "did not find commit %q in kernel repo %v/%v", title, URL, branch)
   275  	}
   276  	return results, nil
   277  }
   278  
   279  func (jp *JobProcessor) pollJobs() {
   280  	poll := &dashapi.JobPollReq{
   281  		Managers: make(map[string]dashapi.ManagerJobs),
   282  	}
   283  	for _, mgr := range jp.managers {
   284  		jobs := &mgr.mgrcfg.Jobs
   285  		if jp.jobFilter != nil {
   286  			jobs = jobs.Filter(jp.jobFilter)
   287  		}
   288  		apiJobs := dashapi.ManagerJobs{
   289  			TestPatches: jobs.TestPatches,
   290  			BisectCause: jobs.BisectCause,
   291  			BisectFix:   jobs.BisectFix,
   292  		}
   293  		if apiJobs.Any() {
   294  			poll.Managers[mgr.name] = apiJobs
   295  		}
   296  	}
   297  	if len(poll.Managers) == 0 {
   298  		return
   299  	}
   300  	req, err := jp.dash.JobPoll(poll)
   301  	if err != nil {
   302  		jp.Errorf("failed to poll jobs: %v", err)
   303  		return
   304  	}
   305  	if req.ID == "" {
   306  		return
   307  	}
   308  	var mgr *Manager
   309  	for _, m := range jp.managers {
   310  		if m.name == req.Manager {
   311  			mgr = m
   312  			break
   313  		}
   314  	}
   315  	if mgr == nil {
   316  		jp.Errorf("got job for unknown manager: %v", req.Manager)
   317  		return
   318  	}
   319  	job := &Job{
   320  		req: req,
   321  		mgr: mgr,
   322  	}
   323  	jp.processJob(job)
   324  }
   325  
   326  func (jp *JobProcessor) processJob(job *Job) {
   327  	req := job.req
   328  	jp.Logf(0, "starting job %v type %v for manager %v on %v/%v",
   329  		req.ID, req.Type, req.Manager, req.KernelRepo, req.KernelBranch)
   330  	resp := jp.process(job)
   331  	jp.Logf(0, "done job %v: commit %v, crash %q, error: %s",
   332  		resp.ID, resp.Build.KernelCommit, resp.CrashTitle, resp.Error)
   333  	select {
   334  	case <-jp.shutdownPending:
   335  		if len(resp.Error) != 0 {
   336  			// Ctrl+C can kill a child process which will cause an error.
   337  			jp.Logf(0, "ignoring error: shutdown pending")
   338  			return
   339  		}
   340  	default:
   341  	}
   342  	if err := jp.dash.JobDone(resp); err != nil {
   343  		jp.Errorf("failed to mark job as done: %v", err)
   344  		return
   345  	}
   346  }
   347  
   348  type Job struct {
   349  	req  *dashapi.JobPollResp
   350  	resp *dashapi.JobDoneReq
   351  	mgr  *Manager
   352  }
   353  
   354  func (jp *JobProcessor) process(job *Job) *dashapi.JobDoneReq {
   355  	req, mgr := job.req, job.mgr
   356  
   357  	dir := filepath.Join(jp.baseDir, mgr.managercfg.TargetOS)
   358  	mgrcfg := new(mgrconfig.Config)
   359  	*mgrcfg = *mgr.managercfg
   360  	mgrcfg.Workdir = filepath.Join(dir, "workdir")
   361  	mgrcfg.KernelSrc = filepath.Join(dir, "kernel", mgr.mgrcfg.KernelSrcSuffix)
   362  	mgrcfg.Syzkaller = filepath.Join(dir, "gopath", "src", "github.com", "google", "syzkaller")
   363  	os.RemoveAll(mgrcfg.Workdir)
   364  	defer os.RemoveAll(mgrcfg.Workdir)
   365  
   366  	resp := &dashapi.JobDoneReq{
   367  		ID: req.ID,
   368  		Build: dashapi.Build{
   369  			Manager:         mgr.name,
   370  			ID:              req.ID,
   371  			OS:              mgr.managercfg.TargetOS,
   372  			Arch:            mgr.managercfg.TargetArch,
   373  			VMArch:          mgr.managercfg.TargetVMArch,
   374  			SyzkallerCommit: req.SyzkallerCommit,
   375  		},
   376  	}
   377  	job.resp = resp
   378  	resp.Build.KernelRepo = req.KernelRepo
   379  	resp.Build.KernelBranch = req.KernelBranch
   380  	resp.Build.KernelConfig = req.KernelConfig
   381  	switch req.Type {
   382  	case dashapi.JobTestPatch:
   383  		resp.Build.KernelCommit = "[unknown]"
   384  		mgrcfg.Name += "-test" + jp.instanceSuffix
   385  	case dashapi.JobBisectCause, dashapi.JobBisectFix:
   386  		resp.Build.KernelCommit = req.KernelCommit
   387  		resp.Build.KernelCommitTitle = req.KernelCommitTitle
   388  		mgrcfg.Name += "-bisect" + jp.instanceSuffix
   389  	default:
   390  		err := fmt.Errorf("bad job type %v", req.Type)
   391  		job.resp.Error = []byte(err.Error())
   392  		jp.Errorf("%s", err)
   393  		return job.resp
   394  	}
   395  	if req.KernelRepo == "" {
   396  		req.KernelRepo = mgr.mgrcfg.Repo
   397  		req.KernelBranch = mgr.mgrcfg.Branch
   398  	}
   399  	required := []struct {
   400  		name string
   401  		ok   bool
   402  	}{
   403  		{"kernel repository", req.KernelRepo != "" || req.Type != dashapi.JobTestPatch},
   404  		{"kernel branch", req.KernelBranch != "" || req.Type != dashapi.JobTestPatch},
   405  		{"kernel config", len(req.KernelConfig) != 0},
   406  		{"syzkaller commit", req.SyzkallerCommit != ""},
   407  		// We either want a normal repro (with options and syz repro text)
   408  		// or it's a boot time bug, in which case both are empty.
   409  		{"reproducer consistency", (len(req.ReproOpts) != 0 && len(req.ReproSyz) != 0) ||
   410  			(len(req.ReproOpts) == 0 && len(req.ReproSyz) == 0)},
   411  	}
   412  	for _, req := range required {
   413  		if !req.ok {
   414  			job.resp.Error = []byte(req.name + " is invalid")
   415  			jp.Errorf("%s (job id=%q, type=%v)", job.resp.Error, job.req.ID, job.req.Type)
   416  			return job.resp
   417  		}
   418  	}
   419  	if typ := mgr.managercfg.Type; !vm.AllowsOvercommit(typ) {
   420  		job.resp.Error = []byte(fmt.Sprintf("testing is not yet supported for %v machine type.", typ))
   421  		jp.Errorf("%s", job.resp.Error)
   422  		return job.resp
   423  	}
   424  
   425  	var err error
   426  	switch req.Type {
   427  	case dashapi.JobTestPatch:
   428  		err = jp.testPatch(job, mgrcfg)
   429  	case dashapi.JobBisectCause, dashapi.JobBisectFix:
   430  		err = jp.bisect(job, mgrcfg)
   431  	}
   432  	if err != nil {
   433  		job.resp.Error = []byte(err.Error())
   434  	}
   435  	return job.resp
   436  }
   437  
   438  func (jp *JobProcessor) bisect(job *Job, mgrcfg *mgrconfig.Config) error {
   439  	req, resp, mgr := job.req, job.resp, job.mgr
   440  
   441  	// Hack: if the manager has only, say, 5 VMs, but bisect wants 10, try to override number of VMs to 10.
   442  	// OverrideVMCount is opportunistic and should do it only if it's safe.
   443  	if err := instance.OverrideVMCount(mgrcfg, bisect.MaxNumTests); err != nil {
   444  		return err
   445  	}
   446  
   447  	var baseline []byte
   448  	// Read possible baseline for config minimization.
   449  	if mgr.mgrcfg.KernelBaselineConfig != "" {
   450  		var err error
   451  		baseline, err = os.ReadFile(mgr.mgrcfg.KernelBaselineConfig)
   452  		if err != nil {
   453  			return fmt.Errorf("failed to read baseline config: %w", err)
   454  		}
   455  	}
   456  	err := jp.prepareBisectionRepo(mgrcfg, req)
   457  	if err != nil {
   458  		return err
   459  	}
   460  	trace := new(bytes.Buffer)
   461  	cfg := &bisect.Config{
   462  		Trace: &debugtracer.GenericTracer{
   463  			TraceWriter: io.MultiWriter(trace, log.VerboseWriter(3)),
   464  			OutDir:      osutil.Abs(filepath.Join("jobs", "debug", strings.ReplaceAll(req.ID, "|", "_"))),
   465  		},
   466  		// Out of 1049 cause bisections that we have now:
   467  		// -  891 finished under  6h (84.9%)
   468  		// -  957 finished under  8h (91.2%)
   469  		// -  980 finished under 10h (93.4%)
   470  		// -  989 finished under 12h (94.3%)
   471  		// - 1011 finished under 18h (96.3%)
   472  		// - 1025 finished under 24h (97.7%)
   473  		// There is also a significant increase in errors/inconclusive bisections after ~8h.
   474  		// Out of 4075 fix bisections:
   475  		// - 4015 finished under  6h (98.5%)
   476  		// - 4020 finished under  8h (98.7%)
   477  		// - 4026 finished under 10h (98.8%)
   478  		// - 4032 finished under 12h (98.9%)
   479  		// Significant increase in errors starts after ~12h.
   480  		// Bisection jobs are now executed in parallel to patch testing, so it doesn't destroy user experience.
   481  		// Let's set the timeout to 12h.
   482  		Timeout:         12 * time.Hour,
   483  		Fix:             req.Type == dashapi.JobBisectFix,
   484  		DefaultCompiler: mgr.mgrcfg.Compiler,
   485  		CompilerType:    mgr.mgrcfg.CompilerType,
   486  		BinDir:          jp.cfg.BisectBinDir,
   487  		Linker:          mgr.mgrcfg.Linker,
   488  		Ccache:          jp.cfg.Ccache,
   489  		BuildCPUs:       jp.cfg.BuildCPUs,
   490  		Kernel: bisect.KernelConfig{
   491  			Repo:           req.KernelRepo,
   492  			Branch:         req.KernelBranch,
   493  			Commit:         req.KernelCommit,
   494  			CommitTitle:    req.KernelCommitTitle,
   495  			Cmdline:        mgr.mgrcfg.KernelCmdline,
   496  			Sysctl:         mgr.mgrcfg.KernelSysctl,
   497  			Config:         req.KernelConfig,
   498  			BaselineConfig: baseline,
   499  			Userspace:      mgr.mgrcfg.Userspace,
   500  			Backports:      mgr.backportCommits(),
   501  		},
   502  		Syzkaller: bisect.SyzkallerConfig{
   503  			Repo:   jp.cfg.SyzkallerRepo,
   504  			Commit: req.SyzkallerCommit,
   505  		},
   506  		Repro: bisect.ReproConfig{
   507  			Opts: req.ReproOpts,
   508  			Syz:  req.ReproSyz,
   509  			C:    req.ReproC,
   510  		},
   511  		CrossTree:      req.MergeBaseRepo != "",
   512  		Manager:        mgrcfg,
   513  		BuildSemaphore: buildSem,
   514  		TestSemaphore:  testSem,
   515  	}
   516  
   517  	res, err := bisect.Run(cfg)
   518  	resp.Log = trace.Bytes()
   519  	if err != nil {
   520  		var infraErr *build.InfraError
   521  		if errors.As(err, &infraErr) {
   522  			resp.Flags |= dashapi.BisectResultInfraError
   523  		}
   524  		return err
   525  	}
   526  	for _, com := range res.Commits {
   527  		resp.Commits = append(resp.Commits, dashapi.Commit{
   528  			Hash:       com.Hash,
   529  			Title:      com.Title,
   530  			Author:     com.Author,
   531  			AuthorName: com.AuthorName,
   532  			Recipients: com.Recipients.ToDash(),
   533  			Date:       com.Date,
   534  		})
   535  	}
   536  	if len(res.Commits) == 1 {
   537  		if len(res.Commits[0].Parents) > 1 {
   538  			resp.Flags |= dashapi.BisectResultMerge
   539  		}
   540  		if res.NoopChange {
   541  			resp.Flags |= dashapi.BisectResultNoop
   542  		}
   543  		if res.IsRelease {
   544  			resp.Flags |= dashapi.BisectResultRelease
   545  		}
   546  		const confidenceCutOff = 0.66
   547  		if res.Confidence < confidenceCutOff {
   548  			resp.Flags |= dashapi.BisectResultIgnore
   549  		}
   550  		if jp.ignoreBisectCommit(res.Commits[0]) {
   551  			resp.Flags |= dashapi.BisectResultIgnore
   552  		}
   553  	}
   554  	if res.Report != nil {
   555  		resp.CrashTitle = res.Report.Title
   556  		resp.CrashAltTitles = res.Report.AltTitles
   557  		resp.CrashReport = res.Report.Report
   558  		resp.CrashLog = res.Report.Output
   559  		if len(resp.Commits) != 0 {
   560  			resp.Commits[0].Recipients = append(resp.Commits[0].Recipients, res.Report.Recipients.ToDash()...)
   561  		} else {
   562  			// If there is a report and there is no commit, it means a crash
   563  			// occurred on HEAD(for BisectFix) and oldest tested release(for BisectCause).
   564  			resp.Build.KernelCommit = res.Commit.Hash
   565  			resp.Build.KernelCommitDate = res.Commit.CommitDate
   566  			resp.Build.KernelCommitTitle = res.Commit.Title
   567  		}
   568  	}
   569  	return nil
   570  }
   571  
   572  var ignoredCommits = []string{
   573  	// Commit "usb: gadget: add raw-gadget interface" adds a kernel interface for
   574  	// triggering USB bugs, which ends up being the guilty commit during bisection
   575  	// for USB bugs introduced before it.
   576  	"f2c2e717642c66f7fe7e5dd69b2e8ff5849f4d10",
   577  	// Commit "devlink: bump the instance index directly when iterating" has likely
   578  	// fixed some frequent task hung, which skews fix bisection results.
   579  	// TODO: consider backporting it during bisection itself.
   580  	"d772781964415c63759572b917e21c4f7ec08d9f",
   581  }
   582  
   583  func (jp *JobProcessor) ignoreBisectCommit(commit *vcs.Commit) bool {
   584  	// First look at the always ignored values.
   585  	for _, hash := range ignoredCommits {
   586  		if commit.Hash == hash {
   587  			return true
   588  		}
   589  	}
   590  	_, ok := jp.cfg.BisectIgnore[commit.Hash]
   591  	return ok
   592  }
   593  
   594  func (jp *JobProcessor) testPatch(job *Job, mgrcfg *mgrconfig.Config) error {
   595  	req, resp, mgr := job.req, job.resp, job.mgr
   596  	env, err := instance.NewEnv(mgrcfg, buildSem, testSem)
   597  	if err != nil {
   598  		return err
   599  	}
   600  	jp.Logf(0, "building syzkaller on %v...", req.SyzkallerCommit)
   601  	syzBuildLog, syzBuildErr := env.BuildSyzkaller(jp.cfg.SyzkallerRepo, req.SyzkallerCommit)
   602  	if syzBuildErr != nil {
   603  		return syzBuildErr
   604  	}
   605  	jp.Logf(0, "fetching kernel...")
   606  	repo, err := vcs.NewRepo(mgrcfg.TargetOS, mgrcfg.Type, mgrcfg.KernelSrc)
   607  	if err != nil {
   608  		return fmt.Errorf("failed to create kernel repo: %w", err)
   609  	}
   610  	kernelCommit, err := jp.checkoutJobCommit(job, repo)
   611  	if err != nil {
   612  		return err
   613  	}
   614  	resp.Build.KernelCommit = kernelCommit.Hash
   615  	resp.Build.KernelCommitTitle = kernelCommit.Title
   616  	resp.Build.KernelCommitDate = kernelCommit.CommitDate
   617  
   618  	buildCfg := &instance.BuildKernelConfig{
   619  		CompilerBin:  mgr.mgrcfg.Compiler,
   620  		MakeBin:      mgr.mgrcfg.Make,
   621  		LinkerBin:    mgr.mgrcfg.Linker,
   622  		CcacheBin:    mgr.mgrcfg.Ccache,
   623  		UserspaceDir: mgr.mgrcfg.Userspace,
   624  		CmdlineFile:  mgr.mgrcfg.KernelCmdline,
   625  		SysctlFile:   mgr.mgrcfg.KernelSysctl,
   626  		KernelConfig: req.KernelConfig,
   627  	}
   628  	if err := env.CleanKernel(buildCfg); err != nil {
   629  		return fmt.Errorf("kernel clean failed: %w", err)
   630  	}
   631  	if len(req.Patch) != 0 {
   632  		if err := vcs.Patch(mgrcfg.KernelSrc, req.Patch); err != nil {
   633  			return err
   634  		}
   635  	}
   636  
   637  	// Disable CONFIG_DEBUG_INFO_BTF in the config.
   638  	// DEBUG_INFO_BTF requires a very new pahole binary, which we don't have on syzbot instances.
   639  	// Currently we don't enable DEBUG_INFO_BTF, but we have some old bugs with DEBUG_INFO_BTF enabled
   640  	// (at the time requirements for pahole binary were lower, or maybe the config silently disabled itself).
   641  	// Testing of patches for these bugs fail now because of the config, so we disable it as a work-around.
   642  	// Ideally we have a new pahole and then we can remove this hack. That's issue #2096.
   643  	// pkg/vcs/linux.go also disables it for the bisection process.
   644  	req.KernelConfig = bytes.ReplaceAll(req.KernelConfig,
   645  		[]byte("CONFIG_DEBUG_INFO_BTF=y"),
   646  		[]byte("# CONFIG_DEBUG_INFO_BTF is not set"))
   647  
   648  	log.Logf(0, "job: building kernel...")
   649  	kernelConfig, details, err := env.BuildKernel(buildCfg)
   650  	resp.Build.CompilerID = details.CompilerID
   651  	if err != nil {
   652  		return err
   653  	}
   654  	if kernelConfig != "" {
   655  		resp.Build.KernelConfig, err = os.ReadFile(kernelConfig)
   656  		if err != nil {
   657  			return fmt.Errorf("failed to read config file: %w", err)
   658  		}
   659  	}
   660  	jp.Logf(0, "job: testing...")
   661  	results, err := env.Test(3, req.ReproSyz, req.ReproOpts, req.ReproC)
   662  	if err != nil {
   663  		return fmt.Errorf("%w\n\nsyzkaller build log:\n%s", err, syzBuildLog)
   664  	}
   665  	ret, err := aggregateTestResults(results)
   666  	if err != nil {
   667  		return fmt.Errorf("%w\n\nsyzkaller build log:\n%s", err, syzBuildLog)
   668  	}
   669  	rep := ret.report
   670  	if rep != nil {
   671  		resp.CrashTitle = rep.Title
   672  		resp.CrashAltTitles = rep.AltTitles
   673  		resp.CrashReport = rep.Report
   674  	}
   675  	resp.CrashLog = ret.rawOutput
   676  	return nil
   677  }
   678  
   679  func (jp *JobProcessor) prepareBisectionRepo(mgrcfg *mgrconfig.Config, req *dashapi.JobPollResp) error {
   680  	if req.MergeBaseRepo == "" {
   681  		// No need to.
   682  		return nil
   683  	}
   684  	repo, err := vcs.NewRepo(mgrcfg.TargetOS, mgrcfg.Type, mgrcfg.KernelSrc)
   685  	if err != nil {
   686  		return fmt.Errorf("failed to create kernel repo: %w", err)
   687  	}
   688  	_, err = checkoutKernelOrCommit(repo, req.MergeBaseRepo, req.MergeBaseBranch)
   689  	if err != nil {
   690  		return fmt.Errorf("failed to checkout the merge base repo %v on %v: %w",
   691  			req.MergeBaseRepo, req.MergeBaseBranch, err)
   692  	}
   693  	return nil
   694  }
   695  
   696  func (jp *JobProcessor) checkoutJobCommit(job *Job, repo vcs.Repo) (*vcs.Commit, error) {
   697  	req, resp := job.req, job.resp
   698  	var kernelCommit *vcs.Commit
   699  	if req.MergeBaseRepo != "" {
   700  		jp.Logf(1, "checking out the base kernel...")
   701  		firstCommit, err := checkoutKernelOrCommit(repo, req.KernelRepo, req.KernelBranch)
   702  		if err != nil {
   703  			return nil, fmt.Errorf("failed to checkout first kernel repo %v on %v: %w",
   704  				req.KernelRepo, req.KernelBranch, err)
   705  		}
   706  		secondCommit, err := checkoutKernelOrCommit(repo, req.MergeBaseRepo, req.MergeBaseBranch)
   707  		if err != nil {
   708  			return nil, fmt.Errorf("failed to checkout second kernel repo %v on %v: %w",
   709  				req.MergeBaseRepo, req.MergeBaseBranch, err)
   710  		}
   711  		bases, err := repo.MergeBases(firstCommit.Hash, secondCommit.Hash)
   712  		if err != nil {
   713  			return nil, fmt.Errorf("failed to calculate merge bases between %v and %v: %w",
   714  				firstCommit.Hash, secondCommit.Hash, err)
   715  		}
   716  		if len(bases) != 1 {
   717  			return nil, fmt.Errorf("expected one merge base between %v and %v, got %d",
   718  				firstCommit.Hash, secondCommit.Hash, len(bases))
   719  		}
   720  		kernelCommit, err = repo.CheckoutCommit(req.KernelRepo, bases[0].Hash)
   721  		if err != nil {
   722  			return nil, fmt.Errorf("failed to checkout kernel repo %v on merge base %v: %w",
   723  				req.KernelRepo, bases[0].Hash, err)
   724  		}
   725  		resp.Build.KernelBranch = ""
   726  	} else if vcs.CheckCommitHash(req.KernelBranch) {
   727  		var err error
   728  		kernelCommit, err = repo.CheckoutCommit(req.KernelRepo, req.KernelBranch)
   729  		if err != nil {
   730  			return nil, fmt.Errorf("failed to checkout kernel repo %v on commit %v: %w",
   731  				req.KernelRepo, req.KernelBranch, err)
   732  		}
   733  		resp.Build.KernelBranch = ""
   734  	} else {
   735  		var err error
   736  		kernelCommit, err = repo.CheckoutBranch(req.KernelRepo, req.KernelBranch)
   737  		if err != nil {
   738  			return nil, fmt.Errorf("failed to checkout kernel repo %v/%v: %w",
   739  				req.KernelRepo, req.KernelBranch, err)
   740  		}
   741  	}
   742  	return kernelCommit, nil
   743  }
   744  
   745  func checkoutKernelOrCommit(repo vcs.Repo, url, branch string) (*vcs.Commit, error) {
   746  	if vcs.CheckCommitHash(branch) {
   747  		return repo.CheckoutCommit(url, branch)
   748  	}
   749  	return repo.CheckoutBranch(url, branch)
   750  }
   751  
   752  type patchTestResult struct {
   753  	report    *report.Report
   754  	rawOutput []byte
   755  }
   756  
   757  func aggregateTestResults(results []instance.EnvTestResult) (*patchTestResult, error) {
   758  	// We can have transient errors and other errors of different types.
   759  	// We need to avoid reporting transient "failed to boot" or "failed to copy binary" errors.
   760  	// If any of the instances crash during testing, we report this with the highest priority.
   761  	// Then if any of the runs succeed, we report that (to avoid transient errors).
   762  	// If all instances failed to boot, then we report one of these errors.
   763  	var anyErr, testErr error
   764  	var resReport, resSuccess *patchTestResult
   765  	anyErr = fmt.Errorf("no env test runs")
   766  	for _, res := range results {
   767  		if res.Error == nil {
   768  			resSuccess = &patchTestResult{rawOutput: res.RawOutput}
   769  			continue
   770  		}
   771  		anyErr = res.Error
   772  		var testError *instance.TestError
   773  		var crashError *instance.CrashError
   774  		switch {
   775  		case errors.As(res.Error, &testError):
   776  			// We should not put rep into resp.CrashTitle/CrashReport,
   777  			// because that will be treated as patch not fixing the bug.
   778  			if rep := testError.Report; rep != nil {
   779  				testErr = fmt.Errorf("%v\n\n%s\n\n%s", rep.Title, rep.Report, rep.Output)
   780  			} else {
   781  				testErr = fmt.Errorf("%v\n\n%s", testError.Title, testError.Output)
   782  			}
   783  		case errors.As(res.Error, &crashError):
   784  			if resReport == nil || (len(resReport.report.Report) == 0 && len(crashError.Report.Report) != 0) {
   785  				resReport = &patchTestResult{report: crashError.Report, rawOutput: res.RawOutput}
   786  			}
   787  		}
   788  	}
   789  	if resReport != nil {
   790  		return resReport, nil
   791  	}
   792  	if resSuccess != nil {
   793  		return resSuccess, nil
   794  	}
   795  	if testErr != nil {
   796  		return nil, testErr
   797  	}
   798  	return nil, anyErr
   799  }
   800  
   801  func (jp *JobProcessor) Logf(level int, msg string, args ...interface{}) {
   802  	log.Logf(level, "%s: "+msg, append([]interface{}{jp.name}, args...)...)
   803  }
   804  
   805  // Errorf logs non-fatal error and sends it to dashboard.
   806  func (jp *JobProcessor) Errorf(msg string, args ...interface{}) {
   807  	log.Errorf("job: "+msg, args...)
   808  	if jp.dash != nil {
   809  		jp.dash.LogError(jp.name, msg, args...)
   810  	}
   811  }