github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/manager/diff.go (about)

     1  // Copyright 2024 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package manager
     5  
     6  import (
     7  	"context"
     8  	"encoding/json"
     9  	"errors"
    10  	"fmt"
    11  	"math/rand"
    12  	"net"
    13  	"sort"
    14  	"strings"
    15  	"sync"
    16  	"sync/atomic"
    17  	"time"
    18  
    19  	"github.com/google/syzkaller/pkg/corpus"
    20  	"github.com/google/syzkaller/pkg/flatrpc"
    21  	"github.com/google/syzkaller/pkg/fuzzer"
    22  	"github.com/google/syzkaller/pkg/fuzzer/queue"
    23  	"github.com/google/syzkaller/pkg/instance"
    24  	"github.com/google/syzkaller/pkg/log"
    25  	"github.com/google/syzkaller/pkg/mgrconfig"
    26  	"github.com/google/syzkaller/pkg/osutil"
    27  	"github.com/google/syzkaller/pkg/report"
    28  	"github.com/google/syzkaller/pkg/repro"
    29  	"github.com/google/syzkaller/pkg/rpcserver"
    30  	"github.com/google/syzkaller/pkg/signal"
    31  	"github.com/google/syzkaller/pkg/stat"
    32  	"github.com/google/syzkaller/pkg/vcs"
    33  	"github.com/google/syzkaller/pkg/vminfo"
    34  	"github.com/google/syzkaller/prog"
    35  	"github.com/google/syzkaller/vm"
    36  	"github.com/google/syzkaller/vm/dispatcher"
    37  	"golang.org/x/sync/errgroup"
    38  )
    39  
    40  type DiffFuzzerConfig struct {
    41  	Debug        bool
    42  	PatchedOnly  chan *UniqueBug
    43  	BaseCrashes  chan string
    44  	Store        *DiffFuzzerStore
    45  	ArtifactsDir string // Where to store the artifacts that supplement the logs.
    46  	// The fuzzer waits no more than MaxTriageTime time until it starts taking VMs away
    47  	// for bug reproduction.
    48  	// The option may help find a balance between spending too much time triaging
    49  	// the corpus and not reaching a proper kernel coverage.
    50  	MaxTriageTime time.Duration
    51  	// If non-empty, the fuzzer will spend no more than this amount of time
    52  	// trying to reach the modified code. The time is counted since the moment
    53  	// 99% of the corpus is triaged.
    54  	FuzzToReachPatched time.Duration
    55  	// The callback may be used to consult external systems on whether
    56  	// the crash should be ignored. E.g. because it doesn't match the filter or
    57  	// the particular base kernel has already been seen to crash with the given title.
    58  	// It helps reduce the number of unnecessary reproductions.
    59  	IgnoreCrash func(context.Context, string) (bool, error)
    60  }
    61  
    62  func (cfg *DiffFuzzerConfig) TriageDeadline() <-chan time.Time {
    63  	if cfg.MaxTriageTime == 0 {
    64  		return nil
    65  	}
    66  	return time.After(cfg.MaxTriageTime)
    67  }
    68  
    69  type UniqueBug struct {
    70  	// The report from the patched kernel.
    71  	Report *report.Report
    72  	Repro  *repro.Result
    73  }
    74  
    75  func RunDiffFuzzer(ctx context.Context, baseCfg, newCfg *mgrconfig.Config, cfg DiffFuzzerConfig) error {
    76  	if cfg.PatchedOnly == nil {
    77  		return fmt.Errorf("you must set up a patched only channel")
    78  	}
    79  	base, err := setup("base", baseCfg, cfg.Debug)
    80  	if err != nil {
    81  		return err
    82  	}
    83  	new, err := setup("new", newCfg, cfg.Debug)
    84  	if err != nil {
    85  		return err
    86  	}
    87  	eg, ctx := errgroup.WithContext(ctx)
    88  	eg.Go(func() error {
    89  		info, err := LoadSeeds(newCfg, true)
    90  		if err != nil {
    91  			return err
    92  		}
    93  		select {
    94  		case new.candidates <- info.Candidates:
    95  		case <-ctx.Done():
    96  		}
    97  		return nil
    98  	})
    99  
   100  	stream := queue.NewRandomQueue(4096, rand.New(rand.NewSource(time.Now().UnixNano())))
   101  	base.source = stream
   102  	new.duplicateInto = stream
   103  
   104  	diffCtx := &diffContext{
   105  		cfg:           cfg,
   106  		doneRepro:     make(chan *ReproResult),
   107  		base:          base,
   108  		new:           new,
   109  		store:         cfg.Store,
   110  		reproAttempts: map[string]int{},
   111  		patchedOnly:   cfg.PatchedOnly,
   112  	}
   113  	if newCfg.HTTP != "" {
   114  		diffCtx.http = &HTTPServer{
   115  			Cfg:       newCfg,
   116  			StartTime: time.Now(),
   117  			DiffStore: cfg.Store,
   118  			Pools: map[string]*vm.Dispatcher{
   119  				new.name:  new.pool,
   120  				base.name: base.pool,
   121  			},
   122  		}
   123  		new.http = diffCtx.http
   124  	}
   125  	eg.Go(func() error {
   126  		return diffCtx.Loop(ctx)
   127  	})
   128  	return eg.Wait()
   129  }
   130  
   131  type diffContext struct {
   132  	cfg   DiffFuzzerConfig
   133  	store *DiffFuzzerStore
   134  	http  *HTTPServer
   135  
   136  	doneRepro   chan *ReproResult
   137  	base        *kernelContext
   138  	new         *kernelContext
   139  	patchedOnly chan *UniqueBug
   140  
   141  	mu            sync.Mutex
   142  	reproAttempts map[string]int
   143  }
   144  
   145  const (
   146  	// Don't start reproductions until 90% of the corpus has been triaged.
   147  	corpusTriageToRepro = 0.9
   148  	// Start to monitor whether we reached the modified files only after triaging 99%.
   149  	corpusTriageToMonitor = 0.99
   150  )
   151  
   152  func (dc *diffContext) Loop(baseCtx context.Context) error {
   153  	g, ctx := errgroup.WithContext(baseCtx)
   154  	reproLoop := NewReproLoop(dc, dc.new.pool.Total()-dc.new.cfg.FuzzingVMs, false)
   155  	if dc.http != nil {
   156  		dc.http.ReproLoop = reproLoop
   157  		g.Go(func() error {
   158  			return dc.http.Serve(ctx)
   159  		})
   160  	}
   161  
   162  	g.Go(func() error {
   163  		select {
   164  		case <-ctx.Done():
   165  			return nil
   166  		case <-dc.waitCorpusTriage(ctx, corpusTriageToRepro):
   167  		case <-dc.cfg.TriageDeadline():
   168  			log.Logf(0, "timed out waiting for coprus triage")
   169  		}
   170  		log.Logf(0, "starting bug reproductions")
   171  		reproLoop.Loop(ctx)
   172  		return nil
   173  	})
   174  
   175  	g.Go(func() error { return dc.monitorPatchedCoverage(ctx) })
   176  	g.Go(func() error { return dc.base.Loop(ctx) })
   177  	g.Go(func() error { return dc.new.Loop(ctx) })
   178  
   179  	runner := &reproRunner{done: make(chan reproRunnerResult, 2), kernel: dc.base}
   180  	statTimer := time.NewTicker(5 * time.Minute)
   181  loop:
   182  	for {
   183  		select {
   184  		case <-ctx.Done():
   185  			break loop
   186  		case <-statTimer.C:
   187  			vals := make(map[string]int)
   188  			for _, stat := range stat.Collect(stat.All) {
   189  				vals[stat.Name] = stat.V
   190  			}
   191  			data, _ := json.MarshalIndent(vals, "", "  ")
   192  			log.Logf(0, "STAT %s", data)
   193  		case rep := <-dc.base.crashes:
   194  			log.Logf(1, "base crash: %v", rep.Title)
   195  			dc.reportBaseCrash(ctx, rep)
   196  		case ret := <-runner.done:
   197  			// We have run the reproducer on the base instance.
   198  
   199  			// A sanity check: the base kernel might have crashed with the same title
   200  			// since the moment we have stared the reproduction / running on the repro base.
   201  			ignored := dc.ignoreCrash(ctx, ret.reproReport.Title)
   202  			if ret.crashReport == nil && ignored {
   203  				// Report it as error so that we could at least find it in the logs.
   204  				log.Errorf("resulting crash of an approved repro result is to be ignored: %s",
   205  					ret.reproReport.Title)
   206  			} else if ret.crashReport == nil {
   207  				dc.store.BaseNotCrashed(ret.reproReport.Title)
   208  				select {
   209  				case <-ctx.Done():
   210  				case dc.patchedOnly <- &UniqueBug{
   211  					Report: ret.reproReport,
   212  					Repro:  ret.repro,
   213  				}:
   214  				}
   215  				log.Logf(0, "patched-only: %s", ret.reproReport.Title)
   216  				// Now that we know this bug only affects the patch kernel, we can spend more time
   217  				// generating a minimalistic repro and a C repro.
   218  				if !ret.fullRepro {
   219  					reproLoop.Enqueue(&Crash{
   220  						Report: &report.Report{
   221  							Title:  ret.reproReport.Title,
   222  							Output: ret.repro.Prog.Serialize(),
   223  						},
   224  						FullRepro: true,
   225  					})
   226  				}
   227  			} else {
   228  				dc.reportBaseCrash(ctx, ret.crashReport)
   229  				log.Logf(0, "crashes both: %s / %s", ret.reproReport.Title, ret.crashReport.Title)
   230  			}
   231  		case ret := <-dc.doneRepro:
   232  			// We have finished reproducing a crash from the patched instance.
   233  			if ret.Repro != nil && ret.Repro.Report != nil {
   234  				origTitle := ret.Crash.Report.Title
   235  				if ret.Repro.Report.Title == origTitle {
   236  					origTitle = "-SAME-"
   237  				}
   238  				log.Logf(1, "found repro for %q (orig title: %q, reliability: %2.f), took %.2f minutes",
   239  					ret.Repro.Report.Title, origTitle, ret.Repro.Reliability, ret.Stats.TotalTime.Minutes())
   240  				g.Go(func() error {
   241  					runner.Run(ctx, ret.Repro, ret.Crash.FullRepro)
   242  					return nil
   243  				})
   244  			} else {
   245  				origTitle := ret.Crash.Report.Title
   246  				log.Logf(1, "failed repro for %q, err=%s", origTitle, ret.Err)
   247  			}
   248  			dc.store.SaveRepro(ret)
   249  		case rep := <-dc.new.crashes:
   250  			// A new crash is found on the patched instance.
   251  			crash := &Crash{Report: rep}
   252  			need := dc.NeedRepro(crash)
   253  			log.Logf(0, "patched crashed: %v [need repro = %v]",
   254  				rep.Title, need)
   255  			dc.store.PatchedCrashed(rep.Title, rep.Report, rep.Output)
   256  			if need {
   257  				reproLoop.Enqueue(crash)
   258  			}
   259  		}
   260  	}
   261  	return g.Wait()
   262  }
   263  
   264  func (dc *diffContext) ignoreCrash(ctx context.Context, title string) bool {
   265  	if dc.store.EverCrashedBase(title) {
   266  		return true
   267  	}
   268  	// Let's try to ask the external systems about it as well.
   269  	if dc.cfg.IgnoreCrash != nil {
   270  		ignore, err := dc.cfg.IgnoreCrash(ctx, title)
   271  		if err != nil {
   272  			log.Logf(0, "a call to IgnoreCrash failed: %v", err)
   273  		} else {
   274  			if ignore {
   275  				log.Logf(0, "base crash %q is to be ignored", title)
   276  			}
   277  			return ignore
   278  		}
   279  	}
   280  	return false
   281  }
   282  
   283  func (dc *diffContext) reportBaseCrash(ctx context.Context, rep *report.Report) {
   284  	dc.store.BaseCrashed(rep.Title, rep.Report)
   285  	if dc.cfg.BaseCrashes == nil {
   286  		return
   287  	}
   288  	select {
   289  	case dc.cfg.BaseCrashes <- rep.Title:
   290  	case <-ctx.Done():
   291  	}
   292  }
   293  
   294  func (dc *diffContext) waitCorpusTriage(ctx context.Context, threshold float64) chan struct{} {
   295  	const backOffTime = 30 * time.Second
   296  	ret := make(chan struct{})
   297  	go func() {
   298  		for {
   299  			select {
   300  			case <-time.After(backOffTime):
   301  			case <-ctx.Done():
   302  				return
   303  			}
   304  			triaged := dc.new.triageProgress()
   305  			if triaged >= threshold {
   306  				log.Logf(0, "triaged %.1f%% of the corpus", triaged*100.0)
   307  				close(ret)
   308  				return
   309  			}
   310  		}
   311  	}()
   312  	return ret
   313  }
   314  
   315  var ErrPatchedAreaNotReached = errors.New("fuzzer has not reached the patched area")
   316  
   317  func (dc *diffContext) monitorPatchedCoverage(ctx context.Context) error {
   318  	if dc.cfg.FuzzToReachPatched == 0 {
   319  		// The feature is disabled.
   320  		return nil
   321  	}
   322  
   323  	// First wait until we have almost triaged all of the corpus.
   324  	select {
   325  	case <-ctx.Done():
   326  		return nil
   327  	case <-dc.waitCorpusTriage(ctx, corpusTriageToMonitor):
   328  	}
   329  
   330  	// By this moment, we must have coverage filters already filled out.
   331  	focusPCs := 0
   332  	// The last one is "everything else", so it's not of interest.
   333  	coverFilters := dc.new.coverFilters
   334  	for i := 0; i < len(coverFilters.Areas)-1; i++ {
   335  		focusPCs += len(coverFilters.Areas[i].CoverPCs)
   336  	}
   337  	if focusPCs == 0 {
   338  		// No areas were configured.
   339  		log.Logf(1, "no PCs in the areas of focused fuzzing, skipping the zero patched coverage check")
   340  		return nil
   341  	}
   342  
   343  	// Then give the fuzzer some change to get through.
   344  	select {
   345  	case <-time.After(dc.cfg.FuzzToReachPatched):
   346  	case <-ctx.Done():
   347  		return nil
   348  	}
   349  	focusAreaStats := dc.new.progsPerArea()
   350  	if focusAreaStats[symbolsArea]+focusAreaStats[filesArea]+focusAreaStats[includesArea] > 0 {
   351  		log.Logf(0, "fuzzer has reached the modified code (%d + %d + %d), continuing fuzzing",
   352  			focusAreaStats[symbolsArea], focusAreaStats[filesArea], focusAreaStats[includesArea])
   353  		return nil
   354  	}
   355  	log.Logf(0, "fuzzer has not reached the modified code in %s, aborting",
   356  		dc.cfg.FuzzToReachPatched)
   357  	return ErrPatchedAreaNotReached
   358  }
   359  
   360  // TODO: instead of this limit, consider expotentially growing delays between reproduction attempts.
   361  const maxReproAttempts = 6
   362  
   363  func needReproForTitle(title string) bool {
   364  	if strings.Contains(title, "no output") ||
   365  		strings.Contains(title, "lost connection") ||
   366  		strings.Contains(title, "detected stall") ||
   367  		strings.Contains(title, "SYZ") {
   368  		// Don't waste time reproducing these.
   369  		return false
   370  	}
   371  	return true
   372  }
   373  
   374  func (dc *diffContext) NeedRepro(crash *Crash) bool {
   375  	if crash.FullRepro {
   376  		return true
   377  	}
   378  	if !needReproForTitle(crash.Title) {
   379  		return false
   380  	}
   381  	ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
   382  	defer cancel()
   383  	if dc.ignoreCrash(ctx, crash.Title) {
   384  		return false
   385  	}
   386  	dc.mu.Lock()
   387  	defer dc.mu.Unlock()
   388  	return dc.reproAttempts[crash.Title] <= maxReproAttempts
   389  }
   390  
   391  func (dc *diffContext) RunRepro(ctx context.Context, crash *Crash) *ReproResult {
   392  	dc.mu.Lock()
   393  	dc.reproAttempts[crash.Title]++
   394  	dc.mu.Unlock()
   395  
   396  	res, stats, err := repro.Run(ctx, crash.Output, repro.Environment{
   397  		Config:   dc.new.cfg,
   398  		Features: dc.new.features,
   399  		Reporter: dc.new.reporter,
   400  		Pool:     dc.new.pool,
   401  		Fast:     !crash.FullRepro,
   402  	})
   403  	if res != nil && res.Report != nil {
   404  		dc.mu.Lock()
   405  		dc.reproAttempts[res.Report.Title] = maxReproAttempts
   406  		dc.mu.Unlock()
   407  	}
   408  	ret := &ReproResult{
   409  		Crash: crash,
   410  		Repro: res,
   411  		Stats: stats,
   412  		Err:   err,
   413  	}
   414  	select {
   415  	case dc.doneRepro <- ret:
   416  	case <-ctx.Done():
   417  		// If the context is cancelled, no one may be listening on doneRepro.
   418  	}
   419  	return ret
   420  }
   421  
   422  func (dc *diffContext) ResizeReproPool(size int) {
   423  	dc.new.pool.ReserveForRun(size)
   424  }
   425  
   426  type kernelContext struct {
   427  	name       string
   428  	ctx        context.Context
   429  	debug      bool
   430  	cfg        *mgrconfig.Config
   431  	reporter   *report.Reporter
   432  	fuzzer     atomic.Pointer[fuzzer.Fuzzer]
   433  	serv       rpcserver.Server
   434  	servStats  rpcserver.Stats
   435  	crashes    chan *report.Report
   436  	pool       *vm.Dispatcher
   437  	features   flatrpc.Feature
   438  	candidates chan []fuzzer.Candidate
   439  	// Once candidates is assigned, candidatesCount holds their original count.
   440  	candidatesCount atomic.Int64
   441  
   442  	coverFilters    CoverageFilters
   443  	reportGenerator *ReportGeneratorWrapper
   444  
   445  	http          *HTTPServer
   446  	source        queue.Source
   447  	duplicateInto queue.Executor
   448  }
   449  
   450  func setup(name string, cfg *mgrconfig.Config, debug bool) (*kernelContext, error) {
   451  	osutil.MkdirAll(cfg.Workdir)
   452  
   453  	kernelCtx := &kernelContext{
   454  		name:            name,
   455  		debug:           debug,
   456  		cfg:             cfg,
   457  		crashes:         make(chan *report.Report, 128),
   458  		candidates:      make(chan []fuzzer.Candidate),
   459  		servStats:       rpcserver.NewNamedStats(name),
   460  		reportGenerator: ReportGeneratorCache(cfg),
   461  	}
   462  
   463  	var err error
   464  	kernelCtx.reporter, err = report.NewReporter(cfg)
   465  	if err != nil {
   466  		return nil, fmt.Errorf("failed to create reporter for %q: %w", name, err)
   467  	}
   468  
   469  	kernelCtx.serv, err = rpcserver.New(&rpcserver.RemoteConfig{
   470  		Config:  cfg,
   471  		Manager: kernelCtx,
   472  		Stats:   kernelCtx.servStats,
   473  		Debug:   debug,
   474  	})
   475  	if err != nil {
   476  		return nil, fmt.Errorf("failed to create rpc server for %q: %w", name, err)
   477  	}
   478  
   479  	vmPool, err := vm.Create(cfg, debug)
   480  	if err != nil {
   481  		return nil, fmt.Errorf("failed to create vm.Pool for %q: %w", name, err)
   482  	}
   483  
   484  	kernelCtx.pool = vm.NewDispatcher(vmPool, kernelCtx.fuzzerInstance)
   485  	return kernelCtx, nil
   486  }
   487  
   488  func (kc *kernelContext) Loop(baseCtx context.Context) error {
   489  	defer log.Logf(1, "%s: kernel context loop terminated", kc.name)
   490  
   491  	if err := kc.serv.Listen(); err != nil {
   492  		return fmt.Errorf("failed to start rpc server: %w", err)
   493  	}
   494  	eg, ctx := errgroup.WithContext(baseCtx)
   495  	kc.ctx = ctx
   496  	eg.Go(func() error {
   497  		defer log.Logf(1, "%s: rpc server terminaled", kc.name)
   498  		return kc.serv.Serve(ctx)
   499  	})
   500  	eg.Go(func() error {
   501  		defer log.Logf(1, "%s: pool terminated", kc.name)
   502  		kc.pool.Loop(ctx)
   503  		return nil
   504  	})
   505  	eg.Go(func() error {
   506  		for {
   507  			select {
   508  			case <-ctx.Done():
   509  				return nil
   510  			case err := <-kc.pool.BootErrors:
   511  				title := "unknown"
   512  				var bootErr vm.BootErrorer
   513  				if errors.As(err, &bootErr) {
   514  					title, _ = bootErr.BootError()
   515  				}
   516  				// Boot errors are not useful for patch fuzzing (at least yet).
   517  				// Fetch them to not block the channel and print them to the logs.
   518  				log.Logf(0, "%s: boot error: %s", kc.name, title)
   519  			}
   520  		}
   521  	})
   522  	return eg.Wait()
   523  }
   524  
   525  func (kc *kernelContext) MaxSignal() signal.Signal {
   526  	if fuzzer := kc.fuzzer.Load(); fuzzer != nil {
   527  		return fuzzer.Cover.CopyMaxSignal()
   528  	}
   529  	return nil
   530  }
   531  
   532  func (kc *kernelContext) BugFrames() (leaks, races []string) {
   533  	return nil, nil
   534  }
   535  
   536  func (kc *kernelContext) MachineChecked(features flatrpc.Feature,
   537  	syscalls map[*prog.Syscall]bool) (queue.Source, error) {
   538  	if len(syscalls) == 0 {
   539  		return nil, fmt.Errorf("all system calls are disabled")
   540  	}
   541  	log.Logf(0, "%s: machine check complete", kc.name)
   542  	kc.features = features
   543  
   544  	var source queue.Source
   545  	if kc.source == nil {
   546  		source = queue.Tee(kc.setupFuzzer(features, syscalls), kc.duplicateInto)
   547  	} else {
   548  		source = kc.source
   549  	}
   550  	opts := fuzzer.DefaultExecOpts(kc.cfg, features, kc.debug)
   551  	return queue.DefaultOpts(source, opts), nil
   552  }
   553  
   554  func (kc *kernelContext) setupFuzzer(features flatrpc.Feature, syscalls map[*prog.Syscall]bool) queue.Source {
   555  	rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
   556  	corpusObj := corpus.NewFocusedCorpus(kc.ctx, nil, kc.coverFilters.Areas)
   557  	fuzzerObj := fuzzer.NewFuzzer(kc.ctx, &fuzzer.Config{
   558  		Corpus:   corpusObj,
   559  		Coverage: kc.cfg.Cover,
   560  		// Fault injection may bring instaibility into bug reproducibility, which may lead to false positives.
   561  		FaultInjection: false,
   562  		Comparisons:    features&flatrpc.FeatureComparisons != 0,
   563  		Collide:        true,
   564  		EnabledCalls:   syscalls,
   565  		NoMutateCalls:  kc.cfg.NoMutateCalls,
   566  		PatchTest:      true,
   567  		Logf: func(level int, msg string, args ...interface{}) {
   568  			if level != 0 {
   569  				return
   570  			}
   571  			log.Logf(level, msg, args...)
   572  		},
   573  	}, rnd, kc.cfg.Target)
   574  
   575  	if kc.http != nil {
   576  		kc.http.Fuzzer.Store(fuzzerObj)
   577  		kc.http.EnabledSyscalls.Store(syscalls)
   578  		kc.http.Corpus.Store(corpusObj)
   579  	}
   580  
   581  	var candidates []fuzzer.Candidate
   582  	select {
   583  	case candidates = <-kc.candidates:
   584  	case <-kc.ctx.Done():
   585  		// The loop will be aborted later.
   586  		break
   587  	}
   588  	// We assign kc.fuzzer after kc.candidatesCount to simplify the triageProgress implementation.
   589  	kc.candidatesCount.Store(int64(len(candidates)))
   590  	kc.fuzzer.Store(fuzzerObj)
   591  
   592  	filtered := FilterCandidates(candidates, syscalls, false).Candidates
   593  	log.Logf(0, "%s: adding %d seeds", kc.name, len(filtered))
   594  	fuzzerObj.AddCandidates(filtered)
   595  
   596  	go func() {
   597  		if !kc.cfg.Cover {
   598  			return
   599  		}
   600  		for {
   601  			select {
   602  			case <-time.After(time.Second):
   603  			case <-kc.ctx.Done():
   604  				return
   605  			}
   606  			newSignal := fuzzerObj.Cover.GrabSignalDelta()
   607  			if len(newSignal) == 0 {
   608  				continue
   609  			}
   610  			kc.serv.DistributeSignalDelta(newSignal)
   611  		}
   612  	}()
   613  	return fuzzerObj
   614  }
   615  
   616  func (kc *kernelContext) CoverageFilter(modules []*vminfo.KernelModule) ([]uint64, error) {
   617  	kc.reportGenerator.Init(modules)
   618  	filters, err := PrepareCoverageFilters(kc.reportGenerator, kc.cfg, false)
   619  	if err != nil {
   620  		return nil, fmt.Errorf("failed to init coverage filter: %w", err)
   621  	}
   622  	kc.coverFilters = filters
   623  	for _, area := range filters.Areas {
   624  		log.Logf(0, "area %q: %d PCs in the cover filter",
   625  			area.Name, len(area.CoverPCs))
   626  	}
   627  	log.Logf(0, "executor cover filter: %d PCs", len(filters.ExecutorFilter))
   628  	if kc.http != nil {
   629  		kc.http.Cover.Store(&CoverageInfo{
   630  			Modules:         modules,
   631  			ReportGenerator: kc.reportGenerator,
   632  			CoverFilter:     filters.ExecutorFilter,
   633  		})
   634  	}
   635  	var pcs []uint64
   636  	for pc := range filters.ExecutorFilter {
   637  		pcs = append(pcs, pc)
   638  	}
   639  	return pcs, nil
   640  }
   641  
   642  func (kc *kernelContext) fuzzerInstance(ctx context.Context, inst *vm.Instance, updInfo dispatcher.UpdateInfo) {
   643  	index := inst.Index()
   644  	injectExec := make(chan bool, 10)
   645  	kc.serv.CreateInstance(index, injectExec, updInfo)
   646  	rep, err := kc.runInstance(ctx, inst, injectExec)
   647  	lastExec, _ := kc.serv.ShutdownInstance(index, rep != nil)
   648  	if rep != nil {
   649  		rpcserver.PrependExecuting(rep, lastExec)
   650  		select {
   651  		case kc.crashes <- rep:
   652  		case <-ctx.Done():
   653  		}
   654  	}
   655  	if err != nil {
   656  		log.Errorf("#%d run failed: %s", inst.Index(), err)
   657  	}
   658  }
   659  
   660  func (kc *kernelContext) runInstance(ctx context.Context, inst *vm.Instance,
   661  	injectExec <-chan bool) (*report.Report, error) {
   662  	fwdAddr, err := inst.Forward(kc.serv.Port())
   663  	if err != nil {
   664  		return nil, fmt.Errorf("failed to setup port forwarding: %w", err)
   665  	}
   666  	executorBin, err := inst.Copy(kc.cfg.ExecutorBin)
   667  	if err != nil {
   668  		return nil, fmt.Errorf("failed to copy binary: %w", err)
   669  	}
   670  	host, port, err := net.SplitHostPort(fwdAddr)
   671  	if err != nil {
   672  		return nil, fmt.Errorf("failed to parse manager's address")
   673  	}
   674  	cmd := fmt.Sprintf("%v runner %v %v %v", executorBin, inst.Index(), host, port)
   675  	ctxTimeout, cancel := context.WithTimeout(ctx, kc.cfg.Timeouts.VMRunningTime)
   676  	defer cancel()
   677  	_, reps, err := inst.Run(ctxTimeout, kc.reporter, cmd,
   678  		vm.WithExitCondition(vm.ExitTimeout),
   679  		vm.WithInjectExecuting(injectExec),
   680  		vm.WithEarlyFinishCb(func() {
   681  			// Depending on the crash type and kernel config, fuzzing may continue
   682  			// running for several seconds even after kernel has printed a crash report.
   683  			// This litters the log and we want to prevent it.
   684  			kc.serv.StopFuzzing(inst.Index())
   685  		}),
   686  	)
   687  	if len(reps) > 0 {
   688  		return reps[0], err
   689  	}
   690  	return nil, err
   691  }
   692  
   693  func (kc *kernelContext) triageProgress() float64 {
   694  	fuzzer := kc.fuzzer.Load()
   695  	if fuzzer == nil {
   696  		return 0
   697  	}
   698  	total := kc.candidatesCount.Load()
   699  	if total == 0.0 {
   700  		// There were no candidates in the first place.
   701  		return 1
   702  	}
   703  	return 1.0 - float64(fuzzer.CandidatesToTriage())/float64(total)
   704  }
   705  
   706  func (kc *kernelContext) progsPerArea() map[string]int {
   707  	fuzzer := kc.fuzzer.Load()
   708  	if fuzzer == nil {
   709  		return nil
   710  	}
   711  	return fuzzer.Config.Corpus.ProgsPerArea()
   712  }
   713  
   714  // reproRunner is used to run reproducers on the base kernel to determine whether it is affected.
   715  type reproRunner struct {
   716  	done    chan reproRunnerResult
   717  	running atomic.Int64
   718  	kernel  *kernelContext
   719  }
   720  
   721  type reproRunnerResult struct {
   722  	reproReport *report.Report
   723  	crashReport *report.Report
   724  	repro       *repro.Result
   725  	fullRepro   bool // whether this was a full reproduction
   726  }
   727  
   728  const (
   729  	// We want to avoid false positives as much as possible, so let's use
   730  	// a stricter relibability cut-off than what's used inside pkg/repro.
   731  	reliabilityCutOff = 0.4
   732  	// 80% reliability x 3 runs is a 0.8% chance of false positives.
   733  	// 6 runs at 40% reproducibility gives a ~4% false positive chance.
   734  	reliabilityThreshold = 0.8
   735  )
   736  
   737  // Run executes the reproducer 3 times with slightly different options.
   738  // The objective is to verify whether the bug triggered by the reproducer affects the base kernel.
   739  // To avoid reporting false positives, the function does not require the kernel to crash with exactly
   740  // the same crash title as in the original crash report. Any single crash is accepted.
   741  // The result is sent back over the rr.done channel.
   742  func (rr *reproRunner) Run(ctx context.Context, r *repro.Result, fullRepro bool) {
   743  	if r.Reliability < reliabilityCutOff {
   744  		log.Logf(1, "%s: repro is too unreliable, skipping", r.Report.Title)
   745  		return
   746  	}
   747  	needRuns := 3
   748  	if r.Reliability < reliabilityThreshold {
   749  		needRuns = 6
   750  	}
   751  
   752  	pool := rr.kernel.pool
   753  	cnt := int(rr.running.Add(1))
   754  	pool.ReserveForRun(min(cnt, pool.Total()))
   755  	defer func() {
   756  		cnt := int(rr.running.Add(-1))
   757  		rr.kernel.pool.ReserveForRun(min(cnt, pool.Total()))
   758  	}()
   759  
   760  	ret := reproRunnerResult{reproReport: r.Report, repro: r, fullRepro: fullRepro}
   761  	for doneRuns := 0; doneRuns < needRuns; {
   762  		if ctx.Err() != nil {
   763  			return
   764  		}
   765  		opts := r.Opts
   766  		opts.Repeat = true
   767  		if doneRuns%3 != 2 {
   768  			// Two times out of 3, test with Threaded=true.
   769  			// The third time we leave it as it was in the reproducer (in case it was important).
   770  			opts.Threaded = true
   771  		}
   772  		var err error
   773  		var result *instance.RunResult
   774  		runErr := pool.Run(ctx, func(ctx context.Context, inst *vm.Instance, updInfo dispatcher.UpdateInfo) {
   775  			var ret *instance.ExecProgInstance
   776  			ret, err = instance.SetupExecProg(inst, rr.kernel.cfg, rr.kernel.reporter, nil)
   777  			if err != nil {
   778  				return
   779  			}
   780  			result, err = ret.RunSyzProg(instance.ExecParams{
   781  				SyzProg:  r.Prog.Serialize(),
   782  				Duration: max(r.Duration, time.Minute),
   783  				Opts:     opts,
   784  			})
   785  		})
   786  		logPrefix := fmt.Sprintf("attempt #%d to run %q on base", doneRuns, ret.reproReport.Title)
   787  		if errors.Is(runErr, context.Canceled) {
   788  			// Just exit without sending anything over the channel.
   789  			log.Logf(1, "%s: aborting due to context cancelation", logPrefix)
   790  			return
   791  		} else if runErr != nil || err != nil {
   792  			log.Logf(1, "%s: skipping due to errors: %v / %v", logPrefix, runErr, err)
   793  			continue
   794  		}
   795  		doneRuns++
   796  		if result != nil && result.Report != nil {
   797  			log.Logf(1, "%s: crashed with %s", logPrefix, result.Report.Title)
   798  			ret.crashReport = result.Report
   799  			break
   800  		} else {
   801  			log.Logf(1, "%s: did not crash", logPrefix)
   802  		}
   803  	}
   804  	select {
   805  	case rr.done <- ret:
   806  	case <-ctx.Done():
   807  	}
   808  }
   809  
   810  const (
   811  	symbolsArea  = "symbols"
   812  	filesArea    = "files"
   813  	includesArea = "included"
   814  )
   815  
   816  func PatchFocusAreas(cfg *mgrconfig.Config, gitPatches [][]byte, baseHashes, patchedHashes map[string]string) {
   817  	funcs := modifiedSymbols(baseHashes, patchedHashes)
   818  	if len(funcs) > 0 {
   819  		log.Logf(0, "adding modified_functions to focus areas: %q", funcs)
   820  		cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas,
   821  			mgrconfig.FocusArea{
   822  				Name: symbolsArea,
   823  				Filter: mgrconfig.CovFilterCfg{
   824  					Functions: funcs,
   825  				},
   826  				Weight: 6.0,
   827  			})
   828  	}
   829  
   830  	direct, transitive := affectedFiles(cfg, gitPatches)
   831  	if len(direct) > 0 {
   832  		sort.Strings(direct)
   833  		log.Logf(0, "adding directly modified files to focus areas: %q", direct)
   834  		cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas,
   835  			mgrconfig.FocusArea{
   836  				Name: filesArea,
   837  				Filter: mgrconfig.CovFilterCfg{
   838  					Files: direct,
   839  				},
   840  				Weight: 3.0,
   841  			})
   842  	}
   843  
   844  	if len(transitive) > 0 {
   845  		sort.Strings(transitive)
   846  		log.Logf(0, "adding transitively affected to focus areas: %q", transitive)
   847  		cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas,
   848  			mgrconfig.FocusArea{
   849  				Name: includesArea,
   850  				Filter: mgrconfig.CovFilterCfg{
   851  					Files: transitive,
   852  				},
   853  				Weight: 2.0,
   854  			})
   855  	}
   856  
   857  	// Still fuzz the rest of the kernel.
   858  	if len(cfg.Experimental.FocusAreas) > 0 {
   859  		cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas,
   860  			mgrconfig.FocusArea{
   861  				Weight: 1.0,
   862  			})
   863  	}
   864  }
   865  
   866  func affectedFiles(cfg *mgrconfig.Config, gitPatches [][]byte) (direct, transitive []string) {
   867  	const maxAffectedByHeader = 50
   868  
   869  	directMap := make(map[string]struct{})
   870  	transitiveMap := make(map[string]struct{})
   871  	var allFiles []string
   872  	for _, patch := range gitPatches {
   873  		allFiles = append(allFiles, vcs.ParseGitDiff(patch)...)
   874  	}
   875  	for _, file := range allFiles {
   876  		directMap[file] = struct{}{}
   877  		if strings.HasSuffix(file, ".h") && cfg.KernelSrc != "" {
   878  			// For .h files, we want to determine all the .c files that include them.
   879  			// Ideally, we should combine this with the recompilation process - then we know
   880  			// exactly which files were affected by the patch.
   881  			matching, err := osutil.GrepFiles(cfg.KernelSrc, `.c`,
   882  				[]byte(`<`+strings.TrimPrefix(file, "include/")+`>`))
   883  			if err != nil {
   884  				log.Logf(0, "failed to grep for includes: %s", err)
   885  				continue
   886  			}
   887  			if len(matching) >= maxAffectedByHeader {
   888  				// It's too widespread. It won't help us focus on anything.
   889  				log.Logf(0, "the header %q is included in too many files (%d)", file, len(matching))
   890  				continue
   891  			}
   892  			for _, name := range matching {
   893  				transitiveMap[name] = struct{}{}
   894  			}
   895  		}
   896  	}
   897  	for name := range directMap {
   898  		direct = append(direct, name)
   899  	}
   900  	for name := range transitiveMap {
   901  		if _, ok := directMap[name]; ok {
   902  			continue
   903  		}
   904  		transitive = append(transitive, name)
   905  	}
   906  	return
   907  }
   908  
   909  // If there are too many different symbols, they are no longer specific enough.
   910  // Don't use them to focus the fuzzer.
   911  const modifiedSymbolThreshold = 0.05
   912  
   913  func modifiedSymbols(baseHashes, patchedHashes map[string]string) []string {
   914  	var ret []string
   915  	for name, hash := range patchedHashes {
   916  		if baseHash, ok := baseHashes[name]; !ok || baseHash != hash {
   917  			ret = append(ret, name)
   918  			if float64(len(ret)) > float64(len(patchedHashes))*modifiedSymbolThreshold {
   919  				return nil
   920  			}
   921  		}
   922  	}
   923  	sort.Strings(ret)
   924  	return ret
   925  }