github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/syz-manager/manager.go (about)

     1  // Copyright 2015 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package main
     5  
     6  import (
     7  	"bytes"
     8  	"context"
     9  	"encoding/json"
    10  	"errors"
    11  	"flag"
    12  	"fmt"
    13  	"io"
    14  	"math/rand"
    15  	"net"
    16  	"os"
    17  	"os/exec"
    18  	"path"
    19  	"path/filepath"
    20  	"sort"
    21  	"sync"
    22  	"sync/atomic"
    23  	"time"
    24  
    25  	"github.com/google/syzkaller/dashboard/dashapi"
    26  	"github.com/google/syzkaller/pkg/asset"
    27  	"github.com/google/syzkaller/pkg/corpus"
    28  	"github.com/google/syzkaller/pkg/db"
    29  	"github.com/google/syzkaller/pkg/flatrpc"
    30  	"github.com/google/syzkaller/pkg/fuzzer"
    31  	"github.com/google/syzkaller/pkg/fuzzer/queue"
    32  	"github.com/google/syzkaller/pkg/gce"
    33  	"github.com/google/syzkaller/pkg/ifaceprobe"
    34  	"github.com/google/syzkaller/pkg/image"
    35  	"github.com/google/syzkaller/pkg/kfuzztest"
    36  	"github.com/google/syzkaller/pkg/log"
    37  	"github.com/google/syzkaller/pkg/manager"
    38  	"github.com/google/syzkaller/pkg/mgrconfig"
    39  	"github.com/google/syzkaller/pkg/osutil"
    40  	"github.com/google/syzkaller/pkg/report"
    41  	crash_pkg "github.com/google/syzkaller/pkg/report/crash"
    42  	"github.com/google/syzkaller/pkg/repro"
    43  	"github.com/google/syzkaller/pkg/rpcserver"
    44  	"github.com/google/syzkaller/pkg/runtest"
    45  	"github.com/google/syzkaller/pkg/signal"
    46  	"github.com/google/syzkaller/pkg/stat"
    47  	"github.com/google/syzkaller/pkg/vminfo"
    48  	"github.com/google/syzkaller/prog"
    49  	"github.com/google/syzkaller/sys/targets"
    50  	"github.com/google/syzkaller/vm"
    51  	"github.com/google/syzkaller/vm/dispatcher"
    52  )
    53  
    54  var (
    55  	flagConfig = flag.String("config", "", "configuration file")
    56  	flagDebug  = flag.Bool("debug", false, "dump all VM output to console")
    57  	flagBench  = flag.String("bench", "", "write execution statistics into this file periodically")
    58  	flagMode   = flag.String("mode", ModeFuzzing.Name, modesDescription())
    59  	flagTests  = flag.String("tests", "", "prefix to match test file names (for -mode run-tests)")
    60  )
    61  
    62  type Manager struct {
    63  	cfg             *mgrconfig.Config
    64  	mode            *Mode
    65  	vmPool          *vm.Pool
    66  	pool            *vm.Dispatcher
    67  	target          *prog.Target
    68  	sysTarget       *targets.Target
    69  	reporter        *report.Reporter
    70  	crashStore      *manager.CrashStore
    71  	serv            rpcserver.Server
    72  	http            *manager.HTTPServer
    73  	servStats       rpcserver.Stats
    74  	corpus          *corpus.Corpus
    75  	corpusDB        *db.DB
    76  	corpusDBMu      sync.Mutex // for concurrent operations on corpusDB
    77  	corpusPreload   chan []fuzzer.Candidate
    78  	firstConnect    atomic.Int64 // unix time, or 0 if not connected
    79  	crashTypes      map[string]bool
    80  	enabledFeatures flatrpc.Feature
    81  	checkDone       atomic.Bool
    82  	reportGenerator *manager.ReportGeneratorWrapper
    83  	fresh           bool
    84  	coverFilters    manager.CoverageFilters
    85  
    86  	dash *dashapi.Dashboard
    87  	// This is specifically separated from dash, so that we can keep dash = nil when
    88  	// cfg.DashboardOnlyRepro is set, so that we don't accidentially use dash for anything.
    89  	dashRepro *dashapi.Dashboard
    90  
    91  	mu             sync.Mutex
    92  	fuzzer         atomic.Pointer[fuzzer.Fuzzer]
    93  	snapshotSource *queue.Distributor
    94  	phase          int
    95  
    96  	disabledHashes   map[string]struct{}
    97  	newRepros        [][]byte
    98  	lastMinCorpus    int
    99  	memoryLeakFrames map[string]bool
   100  	dataRaceFrames   map[string]bool
   101  	saturatedCalls   map[string]bool
   102  
   103  	externalReproQueue chan *manager.Crash
   104  	crashes            chan *manager.Crash
   105  
   106  	benchMu   sync.Mutex
   107  	benchFile *os.File
   108  
   109  	assetStorage *asset.Storage
   110  	fsckChecker  image.FsckChecker
   111  
   112  	reproLoop *manager.ReproLoop
   113  
   114  	Stats
   115  }
   116  
   117  type Mode struct {
   118  	Name                  string
   119  	Description           string
   120  	UseDashboard          bool // the mode connects to dashboard/hub
   121  	LoadCorpus            bool // the mode needs to load the corpus
   122  	ExitAfterMachineCheck bool // exit with 0 status when machine check is done
   123  	// Exit with non-zero status and save the report to workdir/report.json if any kernel crash happens.
   124  	FailOnCrashes bool
   125  	CheckConfig   func(cfg *mgrconfig.Config) error
   126  }
   127  
   128  var (
   129  	ModeFuzzing = &Mode{
   130  		Name:         "fuzzing",
   131  		Description:  `the default continuous fuzzing mode`,
   132  		UseDashboard: true,
   133  		LoadCorpus:   true,
   134  	}
   135  	ModeSmokeTest = &Mode{
   136  		Name: "smoke-test",
   137  		Description: `run smoke test for syzkaller+kernel
   138  	The test consists of booting VMs and running some simple test programs
   139  	to ensure that fuzzing can proceed in general. After completing the test
   140  	the process exits and the exit status indicates success/failure.
   141  	If the kernel oopses during testing, the report is saved to workdir/report.json.`,
   142  		ExitAfterMachineCheck: true,
   143  		FailOnCrashes:         true,
   144  	}
   145  	ModeCorpusTriage = &Mode{
   146  		Name: "corpus-triage",
   147  		Description: `triage corpus and exit
   148  	This is useful mostly for benchmarking with testbed.`,
   149  		LoadCorpus: true,
   150  	}
   151  	ModeCorpusRun = &Mode{
   152  		Name:        "corpus-run",
   153  		Description: `continuously run the corpus programs`,
   154  		LoadCorpus:  true,
   155  	}
   156  	ModeRunTests = &Mode{
   157  		Name: "run-tests",
   158  		Description: `run unit tests
   159  	Run sys/os/test/* tests in various modes and print results.`,
   160  	}
   161  	ModeIfaceProbe = &Mode{
   162  		Name: "iface-probe",
   163  		Description: `run dynamic part of kernel interface auto-extraction
   164  	When the probe is finished, manager writes the result to workdir/interfaces.json file and exits.`,
   165  		CheckConfig: func(cfg *mgrconfig.Config) error {
   166  			if cfg.Snapshot {
   167  				return fmt.Errorf("snapshot mode is not supported")
   168  			}
   169  			if cfg.Sandbox != "none" {
   170  				return fmt.Errorf("sandbox \"%v\" is not supported (only \"none\")", cfg.Sandbox)
   171  			}
   172  			if !cfg.Cover {
   173  				return fmt.Errorf("coverage is required")
   174  			}
   175  			return nil
   176  		},
   177  	}
   178  
   179  	modes = []*Mode{
   180  		ModeFuzzing,
   181  		ModeSmokeTest,
   182  		ModeCorpusTriage,
   183  		ModeCorpusRun,
   184  		ModeRunTests,
   185  		ModeIfaceProbe,
   186  	}
   187  )
   188  
   189  func modesDescription() string {
   190  	desc := "mode of operation, one of:\n"
   191  	for _, mode := range modes {
   192  		desc += fmt.Sprintf(" - %v: %v\n", mode.Name, mode.Description)
   193  	}
   194  	return desc
   195  }
   196  
   197  const (
   198  	// Just started, nothing done yet.
   199  	phaseInit = iota
   200  	// Corpus is loaded and machine is checked.
   201  	phaseLoadedCorpus
   202  	// Triaged all inputs from corpus.
   203  	// This is when we start querying hub and minimizing persistent corpus.
   204  	phaseTriagedCorpus
   205  	// Done the first request to hub.
   206  	phaseQueriedHub
   207  	// Triaged all new inputs from hub.
   208  	// This is when we start reproducing crashes.
   209  	phaseTriagedHub
   210  )
   211  
   212  func main() {
   213  	flag.Parse()
   214  	if !prog.GitRevisionKnown() {
   215  		log.Fatalf("bad syz-manager build: build with make, run bin/syz-manager")
   216  	}
   217  	log.EnableLogCaching(1000, 1<<20)
   218  	cfg, err := mgrconfig.LoadFile(*flagConfig)
   219  	if err != nil {
   220  		log.Fatalf("%v", err)
   221  	}
   222  	if cfg.DashboardAddr != "" {
   223  		// This lets better distinguish logs of individual syz-manager instances.
   224  		log.SetName(cfg.Name)
   225  	}
   226  	var mode *Mode
   227  	for _, m := range modes {
   228  		if *flagMode == m.Name {
   229  			mode = m
   230  			break
   231  		}
   232  	}
   233  	if mode == nil {
   234  		flag.PrintDefaults()
   235  		log.Fatalf("unknown mode: %v", *flagMode)
   236  	}
   237  	if mode.CheckConfig != nil {
   238  		if err := mode.CheckConfig(cfg); err != nil {
   239  			log.Fatalf("%v mode: %v", mode.Name, err)
   240  		}
   241  	}
   242  	if !mode.UseDashboard {
   243  		cfg.DashboardClient = ""
   244  		cfg.HubClient = ""
   245  	}
   246  	if cfg.Experimental.EnableKFuzzTest {
   247  		vmLinuxPath := path.Join(cfg.KernelObj, cfg.SysTarget.KernelObject)
   248  		log.Log(0, "enabling KFuzzTest targets")
   249  		_, err := kfuzztest.ActivateKFuzzTargets(cfg.Target, vmLinuxPath)
   250  		if err != nil {
   251  			log.Fatalf("failed to enable KFuzzTest targets: %v", err)
   252  		}
   253  	}
   254  	RunManager(mode, cfg)
   255  }
   256  
   257  func RunManager(mode *Mode, cfg *mgrconfig.Config) {
   258  	var vmPool *vm.Pool
   259  	if !cfg.VMLess {
   260  		var err error
   261  		vmPool, err = vm.Create(cfg, *flagDebug)
   262  		if err != nil {
   263  			log.Fatalf("%v", err)
   264  		}
   265  		defer vmPool.Close()
   266  	}
   267  
   268  	osutil.MkdirAll(cfg.Workdir)
   269  
   270  	reporter, err := report.NewReporter(cfg)
   271  	if err != nil {
   272  		log.Fatalf("%v", err)
   273  	}
   274  
   275  	mgr := &Manager{
   276  		cfg:                cfg,
   277  		mode:               mode,
   278  		vmPool:             vmPool,
   279  		corpusPreload:      make(chan []fuzzer.Candidate),
   280  		target:             cfg.Target,
   281  		sysTarget:          cfg.SysTarget,
   282  		reporter:           reporter,
   283  		crashStore:         manager.NewCrashStore(cfg),
   284  		crashTypes:         make(map[string]bool),
   285  		disabledHashes:     make(map[string]struct{}),
   286  		memoryLeakFrames:   make(map[string]bool),
   287  		dataRaceFrames:     make(map[string]bool),
   288  		fresh:              true,
   289  		externalReproQueue: make(chan *manager.Crash, 10),
   290  		crashes:            make(chan *manager.Crash, 10),
   291  		saturatedCalls:     make(map[string]bool),
   292  		reportGenerator:    manager.ReportGeneratorCache(cfg),
   293  	}
   294  	if *flagDebug {
   295  		mgr.cfg.Procs = 1
   296  	}
   297  	mgr.http = &manager.HTTPServer{
   298  		// Note that if cfg.HTTP == "", we don't start the server.
   299  		Cfg:        cfg,
   300  		StartTime:  time.Now(),
   301  		CrashStore: mgr.crashStore,
   302  	}
   303  
   304  	mgr.initStats()
   305  	if mgr.mode.LoadCorpus {
   306  		go mgr.preloadCorpus()
   307  	} else {
   308  		close(mgr.corpusPreload)
   309  	}
   310  
   311  	// Create RPC server for fuzzers.
   312  	mgr.servStats = rpcserver.NewStats()
   313  	rpcCfg := &rpcserver.RemoteConfig{
   314  		Config:  mgr.cfg,
   315  		Manager: mgr,
   316  		Stats:   mgr.servStats,
   317  		Debug:   *flagDebug,
   318  	}
   319  	mgr.serv, err = rpcserver.New(rpcCfg)
   320  	if err != nil {
   321  		log.Fatalf("failed to create rpc server: %v", err)
   322  	}
   323  	if err := mgr.serv.Listen(); err != nil {
   324  		log.Fatalf("failed to start rpc server: %v", err)
   325  	}
   326  	ctx := vm.ShutdownCtx()
   327  	go func() {
   328  		err := mgr.serv.Serve(ctx)
   329  		if err != nil {
   330  			log.Fatalf("%s", err)
   331  		}
   332  	}()
   333  	log.Logf(0, "serving rpc on tcp://%v", mgr.serv.Port())
   334  
   335  	if cfg.DashboardAddr != "" {
   336  		opts := []dashapi.DashboardOpts{}
   337  		if cfg.DashboardUserAgent != "" {
   338  			opts = append(opts, dashapi.UserAgent(cfg.DashboardUserAgent))
   339  		}
   340  		dash, err := dashapi.New(cfg.DashboardClient, cfg.DashboardAddr, cfg.DashboardKey, opts...)
   341  		if err != nil {
   342  			log.Fatalf("failed to create dashapi connection: %v", err)
   343  		}
   344  		mgr.dashRepro = dash
   345  		if !cfg.DashboardOnlyRepro {
   346  			mgr.dash = dash
   347  		}
   348  	}
   349  
   350  	if !cfg.AssetStorage.IsEmpty() {
   351  		mgr.assetStorage, err = asset.StorageFromConfig(cfg.AssetStorage, mgr.dash)
   352  		if err != nil {
   353  			log.Fatalf("failed to init asset storage: %v", err)
   354  		}
   355  	}
   356  
   357  	if *flagBench != "" {
   358  		mgr.initBench()
   359  	}
   360  
   361  	go mgr.heartbeatLoop()
   362  	if mgr.mode != ModeSmokeTest {
   363  		osutil.HandleInterrupts(vm.Shutdown)
   364  	}
   365  	if mgr.vmPool == nil {
   366  		log.Logf(0, "no VMs started (type=none)")
   367  		log.Logf(0, "you are supposed to start syz-executor manually as:")
   368  		log.Logf(0, "syz-executor runner local manager.ip %v", mgr.serv.Port())
   369  		<-vm.Shutdown
   370  		return
   371  	}
   372  	mgr.pool = vm.NewDispatcher(mgr.vmPool, mgr.fuzzerInstance)
   373  	mgr.http.Pool = mgr.pool
   374  	reproVMs := max(0, mgr.vmPool.Count()-mgr.cfg.FuzzingVMs)
   375  	mgr.reproLoop = manager.NewReproLoop(mgr, reproVMs, mgr.cfg.DashboardOnlyRepro)
   376  	mgr.http.ReproLoop = mgr.reproLoop
   377  	mgr.http.TogglePause = mgr.pool.TogglePause
   378  
   379  	if mgr.cfg.HTTP != "" {
   380  		go func() {
   381  			err := mgr.http.Serve(ctx)
   382  			if err != nil {
   383  				log.Fatalf("failed to serve HTTP: %v", err)
   384  			}
   385  		}()
   386  	}
   387  	go mgr.trackUsedFiles()
   388  	go mgr.processFuzzingResults(ctx)
   389  	mgr.pool.Loop(ctx)
   390  }
   391  
   392  // Exit successfully in special operation modes.
   393  func (mgr *Manager) exit(reason string) {
   394  	log.Logf(0, "%v finished, shutting down...", reason)
   395  	mgr.writeBench()
   396  	close(vm.Shutdown)
   397  	time.Sleep(10 * time.Second)
   398  	os.Exit(0)
   399  }
   400  
   401  func (mgr *Manager) heartbeatLoop() {
   402  	lastTime := time.Now()
   403  	for now := range time.NewTicker(10 * time.Second).C {
   404  		diff := int(now.Sub(lastTime))
   405  		lastTime = now
   406  		if mgr.firstConnect.Load() == 0 {
   407  			continue
   408  		}
   409  		mgr.statFuzzingTime.Add(diff * mgr.servStats.StatNumFuzzing.Val())
   410  		buf := new(bytes.Buffer)
   411  		for _, stat := range stat.Collect(stat.Console) {
   412  			fmt.Fprintf(buf, "%v=%v ", stat.Name, stat.Value)
   413  		}
   414  		log.Logf(0, "%s", buf.String())
   415  	}
   416  }
   417  
   418  func (mgr *Manager) initBench() {
   419  	f, err := os.OpenFile(*flagBench, os.O_WRONLY|os.O_CREATE|os.O_EXCL, osutil.DefaultFilePerm)
   420  	if err != nil {
   421  		log.Fatalf("failed to open bench file: %v", err)
   422  	}
   423  	mgr.benchFile = f
   424  	go func() {
   425  		for range time.NewTicker(time.Minute).C {
   426  			mgr.writeBench()
   427  		}
   428  	}()
   429  }
   430  
   431  func (mgr *Manager) writeBench() {
   432  	if mgr.benchFile == nil {
   433  		return
   434  	}
   435  	mgr.benchMu.Lock()
   436  	defer mgr.benchMu.Unlock()
   437  	vals := make(map[string]int)
   438  	for _, stat := range stat.Collect(stat.All) {
   439  		vals[stat.Name] = stat.V
   440  	}
   441  	data, err := json.MarshalIndent(vals, "", "  ")
   442  	if err != nil {
   443  		log.Fatalf("failed to serialize bench data")
   444  	}
   445  	if _, err := mgr.benchFile.Write(append(data, '\n')); err != nil {
   446  		log.Fatalf("failed to write bench data")
   447  	}
   448  }
   449  
   450  func (mgr *Manager) processFuzzingResults(ctx context.Context) {
   451  	for {
   452  		select {
   453  		case <-ctx.Done():
   454  			return
   455  		case crash := <-mgr.crashes:
   456  			needRepro := mgr.saveCrash(crash)
   457  			if mgr.cfg.Reproduce && needRepro {
   458  				mgr.reproLoop.Enqueue(crash)
   459  			}
   460  		case err := <-mgr.pool.BootErrors:
   461  			crash := mgr.convertBootError(err)
   462  			if crash != nil {
   463  				mgr.saveCrash(crash)
   464  			}
   465  		case crash := <-mgr.externalReproQueue:
   466  			if mgr.NeedRepro(crash) {
   467  				mgr.reproLoop.Enqueue(crash)
   468  			}
   469  		}
   470  	}
   471  }
   472  
   473  func (mgr *Manager) convertBootError(err error) *manager.Crash {
   474  	var bootErr vm.BootErrorer
   475  	if errors.As(err, &bootErr) {
   476  		title, output := bootErr.BootError()
   477  		rep := mgr.reporter.Parse(output)
   478  		if rep != nil && rep.Type == crash_pkg.UnexpectedReboot {
   479  			// Avoid detecting any boot crash as "unexpected kernel reboot".
   480  			rep = mgr.reporter.ParseFrom(output, rep.SkipPos)
   481  		}
   482  		if rep == nil {
   483  			rep = &report.Report{
   484  				Title:  title,
   485  				Output: output,
   486  			}
   487  		}
   488  		return &manager.Crash{
   489  			Report: rep,
   490  		}
   491  	}
   492  	return nil
   493  }
   494  
   495  func reportReproError(err error) {
   496  	shutdown := false
   497  	select {
   498  	case <-vm.Shutdown:
   499  		shutdown = true
   500  	default:
   501  	}
   502  
   503  	if errors.Is(err, repro.ErrEmptyCrashLog) {
   504  		// The kernel could have crashed before we executed any programs.
   505  		log.Logf(0, "repro failed: %v", err)
   506  		return
   507  	} else if errors.Is(err, repro.ErrNoVMs) || errors.Is(err, context.Canceled) {
   508  		// This error is to be expected if we're shutting down.
   509  		if shutdown {
   510  			return
   511  		}
   512  	}
   513  	// Report everything else as errors.
   514  	log.Errorf("repro failed: %v", err)
   515  }
   516  
   517  func (mgr *Manager) RunRepro(ctx context.Context, crash *manager.Crash) *manager.ReproResult {
   518  	res, stats, err := repro.Run(ctx, crash.Output, repro.Environment{
   519  		Config:   mgr.cfg,
   520  		Features: mgr.enabledFeatures,
   521  		Reporter: mgr.reporter,
   522  		Pool:     mgr.pool,
   523  	})
   524  	ret := &manager.ReproResult{
   525  		Crash: crash,
   526  		Repro: res,
   527  		Stats: stats,
   528  		Err:   err,
   529  	}
   530  	if err == nil && res != nil && mgr.cfg.StraceBin != "" {
   531  		const straceAttempts = 2
   532  		for i := 1; i <= straceAttempts; i++ {
   533  			strace := repro.RunStrace(res, mgr.cfg, mgr.reporter, mgr.pool)
   534  			sameBug := strace.IsSameBug(res)
   535  			log.Logf(0, "strace run attempt %d/%d for '%s': same bug %v, error %v",
   536  				i, straceAttempts, res.Report.Title, sameBug, strace.Error)
   537  			// We only want to save strace output if it resulted in the same bug.
   538  			// Otherwise, it will be hard to reproduce on syzbot and will confuse users.
   539  			if sameBug {
   540  				ret.Strace = strace
   541  				break
   542  			}
   543  		}
   544  	}
   545  
   546  	mgr.processRepro(ret)
   547  
   548  	return ret
   549  }
   550  
   551  func (mgr *Manager) processRepro(res *manager.ReproResult) {
   552  	if res.Err != nil {
   553  		reportReproError(res.Err)
   554  	}
   555  	if res.Repro == nil {
   556  		if res.Crash.Title == "" {
   557  			log.Logf(1, "repro '%v' not from dashboard, so not reporting the failure",
   558  				res.Crash.FullTitle())
   559  		} else {
   560  			log.Logf(1, "report repro failure of '%v'", res.Crash.Title)
   561  			mgr.saveFailedRepro(res.Crash.Report, res.Stats)
   562  		}
   563  	} else {
   564  		mgr.saveRepro(res)
   565  	}
   566  }
   567  
   568  func (mgr *Manager) preloadCorpus() {
   569  	info, err := manager.LoadSeeds(mgr.cfg, false)
   570  	if err != nil {
   571  		log.Fatalf("failed to load corpus: %v", err)
   572  	}
   573  	mgr.fresh = info.Fresh
   574  	mgr.corpusDB = info.CorpusDB
   575  	mgr.corpusPreload <- info.Candidates
   576  }
   577  
   578  func (mgr *Manager) loadCorpus(enabledSyscalls map[*prog.Syscall]bool) []fuzzer.Candidate {
   579  	ret := manager.FilterCandidates(<-mgr.corpusPreload, enabledSyscalls, true)
   580  	if mgr.cfg.PreserveCorpus {
   581  		for _, hash := range ret.ModifiedHashes {
   582  			// This program contains a disabled syscall.
   583  			// We won't execute it, but remember its hash so
   584  			// it is not deleted during minimization.
   585  			mgr.disabledHashes[hash] = struct{}{}
   586  		}
   587  	}
   588  	// Let's favorize smaller programs, otherwise the poorly minimized ones may overshadow the rest.
   589  	sort.SliceStable(ret.Candidates, func(i, j int) bool {
   590  		return len(ret.Candidates[i].Prog.Calls) < len(ret.Candidates[j].Prog.Calls)
   591  	})
   592  	reminimized := ret.ReminimizeSubset()
   593  	resmashed := ret.ResmashSubset()
   594  	log.Logf(0, "%-24v: %v (%v seeds), %d to be reminimized, %d to be resmashed",
   595  		"corpus", len(ret.Candidates), ret.SeedCount, reminimized, resmashed)
   596  	return ret.Candidates
   597  }
   598  
   599  func (mgr *Manager) fuzzerInstance(ctx context.Context, inst *vm.Instance, updInfo dispatcher.UpdateInfo) {
   600  	mgr.mu.Lock()
   601  	serv := mgr.serv
   602  	mgr.mu.Unlock()
   603  	if serv == nil {
   604  		// We're in the process of switching off the RPCServer.
   605  		return
   606  	}
   607  	injectExec := make(chan bool, 10)
   608  	serv.CreateInstance(inst.Index(), injectExec, updInfo)
   609  
   610  	reps, vmInfo, err := mgr.runInstanceInner(ctx, inst,
   611  		vm.WithExitCondition(vm.ExitTimeout),
   612  		vm.WithInjectExecuting(injectExec),
   613  		vm.WithEarlyFinishCb(func() {
   614  			// Depending on the crash type and kernel config, fuzzing may continue
   615  			// running for several seconds even after kernel has printed a crash report.
   616  			// This litters the log, and we want to prevent it.
   617  			serv.StopFuzzing(inst.Index())
   618  		}))
   619  	var extraExecs []report.ExecutorInfo
   620  	var rep *report.Report
   621  	if len(reps) != 0 {
   622  		rep = reps[0]
   623  	}
   624  	if rep != nil && rep.Executor != nil {
   625  		extraExecs = []report.ExecutorInfo{*rep.Executor}
   626  	}
   627  	lastExec, machineInfo := serv.ShutdownInstance(inst.Index(), rep != nil, extraExecs...)
   628  	if rep != nil {
   629  		rpcserver.PrependExecuting(rep, lastExec)
   630  		if len(vmInfo) != 0 {
   631  			machineInfo = append(append(vmInfo, '\n'), machineInfo...)
   632  		}
   633  		rep.MachineInfo = machineInfo
   634  	}
   635  	if err == nil && rep != nil {
   636  		mgr.crashes <- &manager.Crash{
   637  			InstanceIndex: inst.Index(),
   638  			Report:        rep,
   639  			TailReports:   reps[1:],
   640  		}
   641  	}
   642  	if err != nil {
   643  		log.Logf(1, "VM %v: failed with error: %v", inst.Index(), err)
   644  	}
   645  }
   646  
   647  func (mgr *Manager) runInstanceInner(ctx context.Context, inst *vm.Instance, opts ...func(*vm.RunOptions),
   648  ) ([]*report.Report, []byte, error) {
   649  	fwdAddr, err := inst.Forward(mgr.serv.Port())
   650  	if err != nil {
   651  		return nil, nil, fmt.Errorf("failed to setup port forwarding: %w", err)
   652  	}
   653  
   654  	// If ExecutorBin is provided, it means that syz-executor is already in the image,
   655  	// so no need to copy it.
   656  	executorBin := mgr.sysTarget.ExecutorBin
   657  	if executorBin == "" {
   658  		executorBin, err = inst.Copy(mgr.cfg.ExecutorBin)
   659  		if err != nil {
   660  			return nil, nil, fmt.Errorf("failed to copy binary: %w", err)
   661  		}
   662  	}
   663  
   664  	// Run the fuzzer binary.
   665  	start := time.Now()
   666  
   667  	host, port, err := net.SplitHostPort(fwdAddr)
   668  	if err != nil {
   669  		return nil, nil, fmt.Errorf("failed to parse manager's address")
   670  	}
   671  	cmd := fmt.Sprintf("%v runner %v %v %v", executorBin, inst.Index(), host, port)
   672  	ctxTimeout, cancel := context.WithTimeout(ctx, mgr.cfg.Timeouts.VMRunningTime)
   673  	defer cancel()
   674  	_, reps, err := inst.Run(ctxTimeout, mgr.reporter, cmd, opts...)
   675  	if err != nil {
   676  		return nil, nil, fmt.Errorf("failed to run fuzzer: %w", err)
   677  	}
   678  	if len(reps) == 0 {
   679  		// This is the only "OK" outcome.
   680  		log.Logf(0, "VM %v: running for %v, restarting", inst.Index(), time.Since(start))
   681  		return nil, nil, nil
   682  	}
   683  	vmInfo, err := inst.Info()
   684  	if err != nil {
   685  		vmInfo = []byte(fmt.Sprintf("error getting VM info: %v\n", err))
   686  	}
   687  	return reps, vmInfo, nil
   688  }
   689  
   690  func (mgr *Manager) emailCrash(crash *manager.Crash) {
   691  	if len(mgr.cfg.EmailAddrs) == 0 {
   692  		return
   693  	}
   694  	args := []string{"-s", "syzkaller: " + crash.Title}
   695  	args = append(args, mgr.cfg.EmailAddrs...)
   696  	log.Logf(0, "sending email to %v", mgr.cfg.EmailAddrs)
   697  
   698  	cmd := exec.Command("mailx", args...)
   699  	cmd.Stdin = bytes.NewReader(crash.Report.Report)
   700  	if _, err := osutil.Run(10*time.Minute, cmd); err != nil {
   701  		log.Logf(0, "failed to send email: %v", err)
   702  	}
   703  }
   704  
   705  func (mgr *Manager) saveCrash(crash *manager.Crash) bool {
   706  	if err := mgr.reporter.Symbolize(crash.Report); err != nil {
   707  		log.Errorf("failed to symbolize report: %v", err)
   708  	}
   709  	if crash.Type == crash_pkg.MemoryLeak {
   710  		mgr.mu.Lock()
   711  		mgr.memoryLeakFrames[crash.Frame] = true
   712  		mgr.mu.Unlock()
   713  	}
   714  	if crash.Type == crash_pkg.KCSANDataRace {
   715  		mgr.mu.Lock()
   716  		mgr.dataRaceFrames[crash.Frame] = true
   717  		mgr.mu.Unlock()
   718  	}
   719  	flags := ""
   720  	if crash.Corrupted {
   721  		flags += " [corrupted]"
   722  	}
   723  	if crash.Suppressed {
   724  		flags += " [suppressed]"
   725  	}
   726  	log.Logf(0, "VM %v: crash: %v%v", crash.InstanceIndex, crash.Report.Title, flags)
   727  	for i, report := range crash.TailReports {
   728  		log.Logf(0, "VM %v: crash(tail%d): %v%v", crash.InstanceIndex, i, report.Title, flags)
   729  	}
   730  
   731  	if mgr.mode.FailOnCrashes {
   732  		path := filepath.Join(mgr.cfg.Workdir, "report.json")
   733  		if err := osutil.WriteJSON(path, crash.Report); err != nil {
   734  			log.Fatal(err)
   735  		}
   736  		log.Fatalf("kernel crashed in smoke testing mode, exiting")
   737  	}
   738  
   739  	if crash.Suppressed {
   740  		// Collect all of them into a single bucket so that it's possible to control and assess them,
   741  		// e.g. if there are some spikes in suppressed reports.
   742  		crash.Title = "suppressed report"
   743  		mgr.statSuppressed.Add(1)
   744  	}
   745  
   746  	mgr.statCrashes.Add(1)
   747  	mgr.mu.Lock()
   748  	if !mgr.crashTypes[crash.Title] {
   749  		mgr.crashTypes[crash.Title] = true
   750  		mgr.statCrashTypes.Add(1)
   751  	}
   752  	mgr.mu.Unlock()
   753  
   754  	if mgr.dash != nil {
   755  		if crash.Type == crash_pkg.MemoryLeak {
   756  			return true
   757  		}
   758  		dc := &dashapi.Crash{
   759  			BuildID:     mgr.cfg.Tag,
   760  			Title:       crash.Title,
   761  			AltTitles:   crash.AltTitles,
   762  			Corrupted:   crash.Corrupted,
   763  			Suppressed:  crash.Suppressed,
   764  			Recipients:  crash.Recipients.ToDash(),
   765  			Log:         crash.Output,
   766  			Report:      report.SplitReportBytes(crash.Report.Report)[0],
   767  			MachineInfo: crash.MachineInfo,
   768  		}
   769  		setGuiltyFiles(dc, crash.Report)
   770  		resp, err := mgr.dash.ReportCrash(dc)
   771  		if err != nil {
   772  			log.Logf(0, "failed to report crash to dashboard: %v", err)
   773  		}
   774  		// Don't store the crash locally even if we failed to upload it.
   775  		// There is 0 chance that one will ever look in the crashes/ folder of those instances.
   776  		return mgr.cfg.Reproduce && resp.NeedRepro
   777  	}
   778  	first, err := mgr.crashStore.SaveCrash(crash)
   779  	if err != nil {
   780  		log.Logf(0, "failed to save the cash: %v", err)
   781  		return false
   782  	}
   783  	if first {
   784  		go mgr.emailCrash(crash)
   785  	}
   786  	return mgr.NeedRepro(crash)
   787  }
   788  
   789  func (mgr *Manager) needLocalRepro(crash *manager.Crash) bool {
   790  	if !mgr.cfg.Reproduce || crash.Corrupted || crash.Suppressed {
   791  		return false
   792  	}
   793  	if mgr.crashStore.HasRepro(crash.Title) {
   794  		return false
   795  	}
   796  	return mgr.crashStore.MoreReproAttempts(crash.Title)
   797  }
   798  
   799  func (mgr *Manager) NeedRepro(crash *manager.Crash) bool {
   800  	if !mgr.cfg.Reproduce {
   801  		return false
   802  	}
   803  	if crash.FromHub || crash.FromDashboard {
   804  		return true
   805  	}
   806  	mgr.mu.Lock()
   807  	phase, features := mgr.phase, mgr.enabledFeatures
   808  	mgr.mu.Unlock()
   809  	if phase < phaseLoadedCorpus || (features&flatrpc.FeatureLeak != 0 &&
   810  		crash.Type != crash_pkg.MemoryLeak) {
   811  		// Leak checking is very slow, don't bother reproducing other crashes on leak instance.
   812  		return false
   813  	}
   814  	if mgr.dashRepro == nil {
   815  		return mgr.needLocalRepro(crash)
   816  	}
   817  	cid := &dashapi.CrashID{
   818  		BuildID:    mgr.cfg.Tag,
   819  		Title:      crash.Title,
   820  		Corrupted:  crash.Corrupted,
   821  		Suppressed: crash.Suppressed,
   822  		// When cfg.DashboardOnlyRepro is enabled, we don't sent any reports to dashboard.
   823  		// We also don't send leak reports w/o reproducers to dashboard, so they may be missing.
   824  		MayBeMissing: mgr.dash == nil || crash.Type == crash_pkg.MemoryLeak,
   825  	}
   826  	needRepro, err := mgr.dashRepro.NeedRepro(cid)
   827  	if err != nil {
   828  		log.Logf(0, "dashboard.NeedRepro failed: %v", err)
   829  	}
   830  	return needRepro
   831  }
   832  
   833  func truncateReproLog(log []byte) []byte {
   834  	// Repro logs can get quite large and we have trouble sending large API requests (see #4495).
   835  	// Let's truncate the log to a 512KB prefix and 512KB suffix.
   836  	return report.Truncate(log, 512000, 512000)
   837  }
   838  
   839  func (mgr *Manager) saveFailedRepro(rep *report.Report, stats *repro.Stats) {
   840  	reproLog := stats.FullLog()
   841  	if mgr.dash != nil {
   842  		if rep.Type == crash_pkg.MemoryLeak {
   843  			// Don't send failed leak repro attempts to dashboard
   844  			// as we did not send the crash itself.
   845  			log.Logf(1, "failed repro of '%v': not sending because of the memleak type", rep.Title)
   846  			return
   847  		}
   848  		cid := &dashapi.CrashID{
   849  			BuildID:      mgr.cfg.Tag,
   850  			Title:        rep.Title,
   851  			Corrupted:    rep.Corrupted,
   852  			Suppressed:   rep.Suppressed,
   853  			MayBeMissing: rep.Type == crash_pkg.MemoryLeak,
   854  			ReproLog:     truncateReproLog(reproLog),
   855  		}
   856  		if err := mgr.dash.ReportFailedRepro(cid); err != nil {
   857  			log.Logf(0, "failed to report failed repro to dashboard (log size %d): %v",
   858  				len(reproLog), err)
   859  		}
   860  		return
   861  	}
   862  	err := mgr.crashStore.SaveFailedRepro(rep.Title, reproLog)
   863  	if err != nil {
   864  		log.Logf(0, "failed to save repro log for %q: %v", rep.Title, err)
   865  	}
   866  }
   867  
   868  func (mgr *Manager) saveRepro(res *manager.ReproResult) {
   869  	repro := res.Repro
   870  	opts := fmt.Sprintf("# %+v\n", repro.Opts)
   871  	progText := repro.Prog.Serialize()
   872  
   873  	// Append this repro to repro list to send to hub if it didn't come from hub originally.
   874  	if !res.Crash.FromHub {
   875  		progForHub := []byte(fmt.Sprintf("# %+v\n# %v\n# %v\n%s",
   876  			repro.Opts, repro.Report.Title, mgr.cfg.Tag, progText))
   877  		mgr.mu.Lock()
   878  		mgr.newRepros = append(mgr.newRepros, progForHub)
   879  		mgr.mu.Unlock()
   880  	}
   881  
   882  	var cprogText []byte
   883  	if repro.CRepro {
   884  		var err error
   885  		cprogText, err = repro.CProgram()
   886  		if err != nil {
   887  			log.Logf(0, "failed to write C source: %v", err)
   888  		}
   889  	}
   890  
   891  	if mgr.dash != nil {
   892  		// Note: we intentionally don't set Corrupted for reproducers:
   893  		// 1. This is reproducible so can be debugged even with corrupted report.
   894  		// 2. Repro re-tried 3 times and still got corrupted report at the end,
   895  		//    so maybe corrupted report detection is broken.
   896  		// 3. Reproduction is expensive so it's good to persist the result.
   897  
   898  		reproReport := repro.Report
   899  		output := reproReport.Output
   900  
   901  		var crashFlags dashapi.CrashFlags
   902  		if res.Strace != nil {
   903  			// If syzkaller managed to successfully run the repro with strace, send
   904  			// the report and the output generated under strace.
   905  			reproReport = res.Strace.Report
   906  			output = res.Strace.Output
   907  			crashFlags = dashapi.CrashUnderStrace
   908  		}
   909  
   910  		dc := &dashapi.Crash{
   911  			BuildID:       mgr.cfg.Tag,
   912  			Title:         reproReport.Title,
   913  			AltTitles:     reproReport.AltTitles,
   914  			Suppressed:    reproReport.Suppressed,
   915  			Recipients:    reproReport.Recipients.ToDash(),
   916  			Log:           output,
   917  			Flags:         crashFlags,
   918  			Report:        report.SplitReportBytes(reproReport.Report)[0],
   919  			ReproOpts:     repro.Opts.Serialize(),
   920  			ReproSyz:      progText,
   921  			ReproC:        cprogText,
   922  			ReproLog:      truncateReproLog(res.Stats.FullLog()),
   923  			Assets:        mgr.uploadReproAssets(repro),
   924  			OriginalTitle: res.Crash.Title,
   925  		}
   926  		setGuiltyFiles(dc, reproReport)
   927  		if _, err := mgr.dash.ReportCrash(dc); err != nil {
   928  			log.Logf(0, "failed to report repro to dashboard: %v", err)
   929  		} else {
   930  			// Don't store the crash locally, if we've successfully
   931  			// uploaded it to the dashboard. These will just eat disk space.
   932  			return
   933  		}
   934  	}
   935  	err := mgr.crashStore.SaveRepro(res, append([]byte(opts), progText...), cprogText)
   936  	if err != nil {
   937  		log.Logf(0, "%s", err)
   938  	}
   939  }
   940  
   941  func (mgr *Manager) ResizeReproPool(size int) {
   942  	mgr.pool.ReserveForRun(size)
   943  }
   944  
   945  func (mgr *Manager) uploadReproAssets(repro *repro.Result) []dashapi.NewAsset {
   946  	if mgr.assetStorage == nil {
   947  		return nil
   948  	}
   949  
   950  	ret := []dashapi.NewAsset{}
   951  	repro.Prog.ForEachAsset(func(name string, typ prog.AssetType, r io.Reader, c *prog.Call) {
   952  		dashTyp, ok := map[prog.AssetType]dashapi.AssetType{
   953  			prog.MountInRepro: dashapi.MountInRepro,
   954  		}[typ]
   955  		if !ok {
   956  			panic("unknown extracted prog asset")
   957  		}
   958  		r2 := &bytes.Buffer{}
   959  		r1 := io.TeeReader(r, r2)
   960  		asset, err := mgr.assetStorage.UploadCrashAsset(r1, name, dashTyp, nil)
   961  		if err != nil {
   962  			log.Logf(1, "processing of the asset %v (%v) failed: %v", name, typ, err)
   963  			return
   964  		}
   965  		// Report file systems that fail fsck with a separate tag.
   966  		if mgr.cfg.RunFsck && dashTyp == dashapi.MountInRepro &&
   967  			c.Meta.Attrs.Fsck != "" && mgr.fsckChecker.Exists(c.Meta.Attrs.Fsck) {
   968  			logs, isClean, err := image.Fsck(r2, c.Meta.Attrs.Fsck)
   969  			if err != nil {
   970  				log.Errorf("fsck of the asset %v failed: %v", name, err)
   971  			} else {
   972  				asset.FsckLog = logs
   973  				asset.FsIsClean = isClean
   974  			}
   975  		}
   976  		ret = append(ret, asset)
   977  	})
   978  	return ret
   979  }
   980  
   981  func (mgr *Manager) corpusInputHandler(updates <-chan corpus.NewItemEvent) {
   982  	for update := range updates {
   983  		if len(update.NewCover) != 0 && mgr.coverFilters.ExecutorFilter != nil {
   984  			filtered := 0
   985  			for _, pc := range update.NewCover {
   986  				if _, ok := mgr.coverFilters.ExecutorFilter[pc]; ok {
   987  					filtered++
   988  				}
   989  			}
   990  			mgr.statCoverFiltered.Add(filtered)
   991  		}
   992  		if update.Exists {
   993  			// We only save new progs into the corpus.db file.
   994  			continue
   995  		}
   996  		mgr.corpusDBMu.Lock()
   997  		mgr.corpusDB.Save(update.Sig, update.ProgData, 0)
   998  		if err := mgr.corpusDB.Flush(); err != nil {
   999  			log.Errorf("failed to save corpus database: %v", err)
  1000  		}
  1001  		mgr.corpusDBMu.Unlock()
  1002  	}
  1003  }
  1004  
  1005  func (mgr *Manager) getMinimizedCorpus() []*corpus.Item {
  1006  	mgr.mu.Lock()
  1007  	defer mgr.mu.Unlock()
  1008  	mgr.minimizeCorpusLocked()
  1009  	return mgr.corpus.Items()
  1010  }
  1011  
  1012  func (mgr *Manager) getNewRepros() [][]byte {
  1013  	mgr.mu.Lock()
  1014  	defer mgr.mu.Unlock()
  1015  	repros := mgr.newRepros
  1016  	mgr.newRepros = nil
  1017  	return repros
  1018  }
  1019  
  1020  func (mgr *Manager) addNewCandidates(candidates []fuzzer.Candidate) {
  1021  	mgr.mu.Lock()
  1022  	if mgr.phase == phaseTriagedCorpus {
  1023  		mgr.setPhaseLocked(phaseQueriedHub)
  1024  	}
  1025  	mgr.mu.Unlock()
  1026  	if mgr.cfg.Experimental.ResetAccState {
  1027  		// Don't accept new candidates -- the execution is already very slow,
  1028  		// syz-hub will just overwhelm us.
  1029  		return
  1030  	}
  1031  	mgr.fuzzer.Load().AddCandidates(candidates)
  1032  }
  1033  
  1034  func (mgr *Manager) minimizeCorpusLocked() {
  1035  	// Don't minimize corpus until we have triaged all inputs from it.
  1036  	// During corpus triage it would happen very often since we are actively adding inputs,
  1037  	// and presumably the persistent corpus was reasonably minimial, and we don't use it for fuzzing yet.
  1038  	if mgr.phase < phaseTriagedCorpus {
  1039  		return
  1040  	}
  1041  	currSize := mgr.corpus.StatProgs.Val()
  1042  	if currSize <= mgr.lastMinCorpus*103/100 {
  1043  		return
  1044  	}
  1045  	mgr.corpus.Minimize(mgr.cfg.Cover)
  1046  	newSize := mgr.corpus.StatProgs.Val()
  1047  
  1048  	log.Logf(1, "minimized corpus: %v -> %v", currSize, newSize)
  1049  	mgr.lastMinCorpus = newSize
  1050  
  1051  	// From time to time we get corpus explosion due to different reason:
  1052  	// generic bugs, per-OS bugs, problems with fallback coverage, kcov bugs, etc.
  1053  	// This has bad effect on the instance and especially on instances
  1054  	// connected via hub. Do some per-syscall sanity checking to prevent this.
  1055  	for call, info := range mgr.corpus.CallCover() {
  1056  		if mgr.cfg.Cover {
  1057  			// If we have less than 1K inputs per this call,
  1058  			// accept all new inputs unconditionally.
  1059  			if info.Count < 1000 {
  1060  				continue
  1061  			}
  1062  			// If we have more than 3K already, don't accept any more.
  1063  			// Between 1K and 3K look at amount of coverage we are getting from these programs.
  1064  			// Empirically, real coverage for the most saturated syscalls is ~30-60
  1065  			// per program (even when we have a thousand of them). For explosion
  1066  			// case coverage tend to be much lower (~0.3-5 per program).
  1067  			if info.Count < 3000 && len(info.Cover)/info.Count >= 10 {
  1068  				continue
  1069  			}
  1070  		} else {
  1071  			// If we don't have real coverage, signal is weak.
  1072  			// If we have more than several hundreds, there is something wrong.
  1073  			if info.Count < 300 {
  1074  				continue
  1075  			}
  1076  		}
  1077  		if mgr.saturatedCalls[call] {
  1078  			continue
  1079  		}
  1080  		mgr.saturatedCalls[call] = true
  1081  		log.Logf(0, "coverage for %v has saturated, not accepting more inputs", call)
  1082  	}
  1083  
  1084  	mgr.corpusDBMu.Lock()
  1085  	defer mgr.corpusDBMu.Unlock()
  1086  	for key := range mgr.corpusDB.Records {
  1087  		ok1 := mgr.corpus.Item(key) != nil
  1088  		_, ok2 := mgr.disabledHashes[key]
  1089  		if !ok1 && !ok2 {
  1090  			mgr.corpusDB.Delete(key)
  1091  		}
  1092  	}
  1093  	if err := mgr.corpusDB.Flush(); err != nil {
  1094  		log.Fatalf("failed to save corpus database: %v", err)
  1095  	}
  1096  	mgr.corpusDB.BumpVersion(manager.CurrentDBVersion)
  1097  }
  1098  
  1099  func setGuiltyFiles(crash *dashapi.Crash, report *report.Report) {
  1100  	if report.GuiltyFile != "" {
  1101  		crash.GuiltyFiles = []string{report.GuiltyFile}
  1102  	}
  1103  }
  1104  
  1105  func (mgr *Manager) BugFrames() (leaks, races []string) {
  1106  	mgr.mu.Lock()
  1107  	defer mgr.mu.Unlock()
  1108  	for frame := range mgr.memoryLeakFrames {
  1109  		leaks = append(leaks, frame)
  1110  	}
  1111  	for frame := range mgr.dataRaceFrames {
  1112  		races = append(races, frame)
  1113  	}
  1114  	return
  1115  }
  1116  
  1117  func (mgr *Manager) MachineChecked(features flatrpc.Feature,
  1118  	enabledSyscalls map[*prog.Syscall]bool) (queue.Source, error) {
  1119  	if len(enabledSyscalls) == 0 {
  1120  		return nil, fmt.Errorf("all system calls are disabled")
  1121  	}
  1122  	if mgr.mode.ExitAfterMachineCheck {
  1123  		mgr.exit(mgr.mode.Name)
  1124  	}
  1125  
  1126  	// If KFuzzTest is enabled, we exclusively fuzz KFuzzTest targets - so
  1127  	// delete any existing entries in enabled syscalls, and enable all
  1128  	// discovered KFuzzTest targets explicitly.
  1129  	if mgr.cfg.Experimental.EnableKFuzzTest {
  1130  		for call := range enabledSyscalls {
  1131  			delete(enabledSyscalls, call)
  1132  		}
  1133  		data, err := kfuzztest.ExtractData(path.Join(mgr.cfg.KernelObj, "vmlinux"))
  1134  		if err != nil {
  1135  			return nil, err
  1136  		}
  1137  		for _, call := range data.Calls {
  1138  			enabledSyscalls[call] = true
  1139  		}
  1140  	}
  1141  
  1142  	mgr.mu.Lock()
  1143  	defer mgr.mu.Unlock()
  1144  	if mgr.phase != phaseInit {
  1145  		panic("machineChecked() called not during phaseInit")
  1146  	}
  1147  	if mgr.checkDone.Swap(true) {
  1148  		panic("MachineChecked called twice")
  1149  	}
  1150  	mgr.enabledFeatures = features
  1151  	mgr.http.EnabledSyscalls.Store(enabledSyscalls)
  1152  	mgr.firstConnect.Store(time.Now().Unix())
  1153  	statSyscalls := stat.New("syscalls", "Number of enabled syscalls",
  1154  		stat.Simple, stat.NoGraph, stat.Link("/syscalls"))
  1155  	statSyscalls.Add(len(enabledSyscalls))
  1156  	candidates := mgr.loadCorpus(enabledSyscalls)
  1157  	mgr.setPhaseLocked(phaseLoadedCorpus)
  1158  	opts := fuzzer.DefaultExecOpts(mgr.cfg, features, *flagDebug)
  1159  
  1160  	switch mgr.mode {
  1161  	case ModeFuzzing, ModeCorpusTriage:
  1162  		corpusUpdates := make(chan corpus.NewItemEvent, 128)
  1163  		mgr.corpus = corpus.NewFocusedCorpus(context.Background(),
  1164  			corpusUpdates, mgr.coverFilters.Areas)
  1165  		mgr.http.Corpus.Store(mgr.corpus)
  1166  
  1167  		rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
  1168  		fuzzerObj := fuzzer.NewFuzzer(context.Background(), &fuzzer.Config{
  1169  			Corpus:         mgr.corpus,
  1170  			Snapshot:       mgr.cfg.Snapshot,
  1171  			Coverage:       mgr.cfg.Cover,
  1172  			FaultInjection: features&flatrpc.FeatureFault != 0,
  1173  			Comparisons:    features&flatrpc.FeatureComparisons != 0,
  1174  			Collide:        true,
  1175  			EnabledCalls:   enabledSyscalls,
  1176  			NoMutateCalls:  mgr.cfg.NoMutateCalls,
  1177  			FetchRawCover:  mgr.cfg.RawCover,
  1178  			Logf: func(level int, msg string, args ...interface{}) {
  1179  				if level != 0 {
  1180  					return
  1181  				}
  1182  				log.Logf(level, msg, args...)
  1183  			},
  1184  			NewInputFilter: func(call string) bool {
  1185  				mgr.mu.Lock()
  1186  				defer mgr.mu.Unlock()
  1187  				return !mgr.saturatedCalls[call]
  1188  			},
  1189  			ModeKFuzzTest: mgr.cfg.Experimental.EnableKFuzzTest,
  1190  		}, rnd, mgr.target)
  1191  		fuzzerObj.AddCandidates(candidates)
  1192  		mgr.fuzzer.Store(fuzzerObj)
  1193  		mgr.http.Fuzzer.Store(fuzzerObj)
  1194  
  1195  		go mgr.corpusInputHandler(corpusUpdates)
  1196  		go mgr.corpusMinimization()
  1197  		go mgr.fuzzerLoop(fuzzerObj)
  1198  		if mgr.dash != nil {
  1199  			go mgr.dashboardReporter()
  1200  			if mgr.cfg.Reproduce {
  1201  				go mgr.dashboardReproTasks()
  1202  			}
  1203  		}
  1204  		source := queue.DefaultOpts(fuzzerObj, opts)
  1205  		if mgr.cfg.Snapshot {
  1206  			log.Logf(0, "restarting VMs for snapshot mode")
  1207  			mgr.snapshotSource = queue.Distribute(source)
  1208  			mgr.pool.SetDefault(mgr.snapshotInstance)
  1209  			mgr.serv.Close()
  1210  			mgr.serv = nil
  1211  			return queue.Callback(func() *queue.Request {
  1212  				return nil
  1213  			}), nil
  1214  		}
  1215  		return source, nil
  1216  	case ModeCorpusRun:
  1217  		ctx := &corpusRunner{
  1218  			candidates: candidates,
  1219  			rnd:        rand.New(rand.NewSource(time.Now().UnixNano())),
  1220  		}
  1221  		return queue.DefaultOpts(ctx, opts), nil
  1222  	case ModeRunTests:
  1223  		ctx := &runtest.Context{
  1224  			Dir:      filepath.Join(mgr.cfg.Syzkaller, "sys", mgr.cfg.Target.OS, "test"),
  1225  			Target:   mgr.cfg.Target,
  1226  			Features: features,
  1227  			EnabledCalls: map[string]map[*prog.Syscall]bool{
  1228  				mgr.cfg.Sandbox: enabledSyscalls,
  1229  			},
  1230  			LogFunc: func(text string) { fmt.Println(text) },
  1231  			Verbose: true,
  1232  			Debug:   *flagDebug,
  1233  			Tests:   *flagTests,
  1234  		}
  1235  		ctx.Init()
  1236  		go func() {
  1237  			err := ctx.Run(context.Background())
  1238  			if err != nil {
  1239  				log.Fatal(err)
  1240  			}
  1241  			mgr.exit("tests")
  1242  		}()
  1243  		return ctx, nil
  1244  	case ModeIfaceProbe:
  1245  		exec := queue.Plain()
  1246  		go func() {
  1247  			res, err := ifaceprobe.Run(vm.ShutdownCtx(), mgr.cfg, features, exec)
  1248  			if err != nil {
  1249  				log.Fatalf("interface probing failed: %v", err)
  1250  			}
  1251  			path := filepath.Join(mgr.cfg.Workdir, "interfaces.json")
  1252  			if err := osutil.WriteJSON(path, res); err != nil {
  1253  				log.Fatal(err)
  1254  			}
  1255  			mgr.exit("interface probe")
  1256  		}()
  1257  		return exec, nil
  1258  	}
  1259  	panic(fmt.Sprintf("unexpected mode %q", mgr.mode.Name))
  1260  }
  1261  
  1262  type corpusRunner struct {
  1263  	candidates []fuzzer.Candidate
  1264  	mu         sync.Mutex
  1265  	rnd        *rand.Rand
  1266  	seq        int
  1267  }
  1268  
  1269  func (cr *corpusRunner) Next() *queue.Request {
  1270  	cr.mu.Lock()
  1271  	defer cr.mu.Unlock()
  1272  
  1273  	var p *prog.Prog
  1274  	if cr.seq < len(cr.candidates) {
  1275  		// First run all candidates sequentially.
  1276  		p = cr.candidates[cr.seq].Prog
  1277  		cr.seq++
  1278  	} else {
  1279  		// Then pick random progs.
  1280  		p = cr.candidates[cr.rnd.Intn(len(cr.candidates))].Prog
  1281  	}
  1282  	return &queue.Request{
  1283  		Prog:      p,
  1284  		Important: true,
  1285  	}
  1286  }
  1287  
  1288  func (mgr *Manager) corpusMinimization() {
  1289  	for range time.NewTicker(time.Minute).C {
  1290  		mgr.mu.Lock()
  1291  		mgr.minimizeCorpusLocked()
  1292  		mgr.mu.Unlock()
  1293  	}
  1294  }
  1295  
  1296  func (mgr *Manager) MaxSignal() signal.Signal {
  1297  	if fuzzer := mgr.fuzzer.Load(); fuzzer != nil {
  1298  		return fuzzer.Cover.CopyMaxSignal()
  1299  	}
  1300  	return nil
  1301  }
  1302  
  1303  func (mgr *Manager) fuzzerLoop(fuzzer *fuzzer.Fuzzer) {
  1304  	for ; ; time.Sleep(time.Second / 2) {
  1305  		if mgr.cfg.Cover && !mgr.cfg.Snapshot {
  1306  			// Distribute new max signal over all instances.
  1307  			newSignal := fuzzer.Cover.GrabSignalDelta()
  1308  			if len(newSignal) != 0 {
  1309  				log.Logf(3, "distributing %d new signal", len(newSignal))
  1310  			}
  1311  			if len(newSignal) != 0 {
  1312  				mgr.serv.DistributeSignalDelta(newSignal)
  1313  			}
  1314  		}
  1315  
  1316  		// Update the state machine.
  1317  		if fuzzer.CandidateTriageFinished() {
  1318  			if mgr.mode == ModeCorpusTriage {
  1319  				mgr.exit("corpus triage")
  1320  			}
  1321  			mgr.mu.Lock()
  1322  			switch mgr.phase {
  1323  			case phaseLoadedCorpus:
  1324  				if !mgr.cfg.Snapshot {
  1325  					mgr.serv.TriagedCorpus()
  1326  				}
  1327  				if mgr.cfg.HubClient != "" {
  1328  					mgr.setPhaseLocked(phaseTriagedCorpus)
  1329  					go mgr.hubSyncLoop(pickGetter(mgr.cfg.HubKey),
  1330  						fuzzer.Config.EnabledCalls)
  1331  				} else {
  1332  					mgr.setPhaseLocked(phaseTriagedHub)
  1333  				}
  1334  			case phaseQueriedHub:
  1335  				mgr.setPhaseLocked(phaseTriagedHub)
  1336  			}
  1337  			mgr.mu.Unlock()
  1338  		}
  1339  	}
  1340  }
  1341  
  1342  func (mgr *Manager) setPhaseLocked(newPhase int) {
  1343  	if mgr.phase == newPhase {
  1344  		panic("repeated phase update")
  1345  	}
  1346  	// In VMLess mode, mgr.reproLoop is nil.
  1347  	if newPhase == phaseTriagedHub && mgr.reproLoop != nil {
  1348  		// Start reproductions.
  1349  		go mgr.reproLoop.Loop(vm.ShutdownCtx())
  1350  	}
  1351  	mgr.phase = newPhase
  1352  }
  1353  
  1354  func (mgr *Manager) needMoreCandidates() bool {
  1355  	return mgr.fuzzer.Load().CandidateTriageFinished()
  1356  }
  1357  
  1358  func (mgr *Manager) hubIsUnreachable() {
  1359  	var dash *dashapi.Dashboard
  1360  	mgr.mu.Lock()
  1361  	if mgr.phase == phaseTriagedCorpus {
  1362  		dash = mgr.dash
  1363  		mgr.setPhaseLocked(phaseTriagedHub)
  1364  		log.Errorf("did not manage to connect to syz-hub; moving forward")
  1365  	}
  1366  	mgr.mu.Unlock()
  1367  	if dash != nil {
  1368  		mgr.dash.LogError(mgr.cfg.Name, "did not manage to connect to syz-hub")
  1369  	}
  1370  }
  1371  
  1372  // trackUsedFiles() is checking that the files that syz-manager needs are not changed while it's running.
  1373  func (mgr *Manager) trackUsedFiles() {
  1374  	usedFiles := make(map[string]time.Time) // file name to modification time
  1375  	addUsedFile := func(f string) {
  1376  		if f == "" {
  1377  			return
  1378  		}
  1379  		stat, err := os.Stat(f)
  1380  		if err != nil {
  1381  			log.Fatalf("failed to stat %v: %v", f, err)
  1382  		}
  1383  		usedFiles[f] = stat.ModTime()
  1384  	}
  1385  	cfg := mgr.cfg
  1386  	addUsedFile(cfg.ExecprogBin)
  1387  	addUsedFile(cfg.ExecutorBin)
  1388  	addUsedFile(cfg.SSHKey)
  1389  	if vmlinux := filepath.Join(cfg.KernelObj, mgr.sysTarget.KernelObject); osutil.IsExist(vmlinux) {
  1390  		addUsedFile(vmlinux)
  1391  	}
  1392  	if cfg.Image != "9p" {
  1393  		addUsedFile(cfg.Image)
  1394  	}
  1395  	for range time.NewTicker(30 * time.Second).C {
  1396  		for f, mod := range usedFiles {
  1397  			stat, err := os.Stat(f)
  1398  			if err != nil {
  1399  				log.Fatalf("failed to stat %v: %v", f, err)
  1400  			}
  1401  			if mod != stat.ModTime() {
  1402  				log.Fatalf("file %v that syz-manager uses has been modified by an external program\n"+
  1403  					"this can lead to arbitrary syz-manager misbehavior\n"+
  1404  					"modification time has changed: %v -> %v\n"+
  1405  					"don't modify files that syz-manager uses. exiting to prevent harm",
  1406  					f, mod, stat.ModTime())
  1407  			}
  1408  		}
  1409  	}
  1410  }
  1411  
  1412  func (mgr *Manager) dashboardReporter() {
  1413  	webAddr := publicWebAddr(mgr.cfg.HTTP)
  1414  	triageInfoSent := false
  1415  	var lastFuzzingTime time.Duration
  1416  	var lastCrashes, lastSuppressedCrashes, lastExecs uint64
  1417  	for range time.NewTicker(time.Minute).C {
  1418  		mgr.mu.Lock()
  1419  		corpus := mgr.corpus
  1420  		mgr.mu.Unlock()
  1421  		if corpus == nil {
  1422  			continue
  1423  		}
  1424  		mgr.mu.Lock()
  1425  		req := &dashapi.ManagerStatsReq{
  1426  			Name:              mgr.cfg.Name,
  1427  			Addr:              webAddr,
  1428  			UpTime:            time.Duration(mgr.statUptime.Val()) * time.Second,
  1429  			Corpus:            uint64(corpus.StatProgs.Val()),
  1430  			PCs:               uint64(corpus.StatCover.Val()),
  1431  			Cover:             uint64(corpus.StatSignal.Val()),
  1432  			CrashTypes:        uint64(mgr.statCrashTypes.Val()),
  1433  			FuzzingTime:       time.Duration(mgr.statFuzzingTime.Val()) - lastFuzzingTime,
  1434  			Crashes:           uint64(mgr.statCrashes.Val()) - lastCrashes,
  1435  			SuppressedCrashes: uint64(mgr.statSuppressed.Val()) - lastSuppressedCrashes,
  1436  			Execs:             uint64(mgr.servStats.StatExecs.Val()) - lastExecs,
  1437  		}
  1438  		if mgr.phase >= phaseTriagedCorpus && !triageInfoSent {
  1439  			triageInfoSent = true
  1440  			req.TriagedCoverage = uint64(corpus.StatSignal.Val())
  1441  			req.TriagedPCs = uint64(corpus.StatCover.Val())
  1442  		}
  1443  		mgr.mu.Unlock()
  1444  
  1445  		if err := mgr.dash.UploadManagerStats(req); err != nil {
  1446  			log.Logf(0, "failed to upload dashboard stats: %v", err)
  1447  			continue
  1448  		}
  1449  		mgr.mu.Lock()
  1450  		lastFuzzingTime += req.FuzzingTime
  1451  		lastCrashes += req.Crashes
  1452  		lastSuppressedCrashes += req.SuppressedCrashes
  1453  		lastExecs += req.Execs
  1454  		mgr.mu.Unlock()
  1455  	}
  1456  }
  1457  
  1458  func (mgr *Manager) dashboardReproTasks() {
  1459  	for range time.NewTicker(20 * time.Minute).C {
  1460  		if !mgr.reproLoop.CanReproMore() {
  1461  			// We don't need reproducers at the moment.
  1462  			continue
  1463  		}
  1464  		resp, err := mgr.dash.LogToRepro(&dashapi.LogToReproReq{BuildID: mgr.cfg.Tag})
  1465  		if err != nil {
  1466  			log.Logf(0, "failed to query logs to reproduce: %v", err)
  1467  			continue
  1468  		}
  1469  		if len(resp.CrashLog) > 0 {
  1470  			mgr.externalReproQueue <- &manager.Crash{
  1471  				FromDashboard: true,
  1472  				Manual:        resp.Type == dashapi.ManualLog,
  1473  				Report: &report.Report{
  1474  					Title:  resp.Title,
  1475  					Output: resp.CrashLog,
  1476  				},
  1477  			}
  1478  		}
  1479  	}
  1480  }
  1481  
  1482  func (mgr *Manager) CoverageFilter(modules []*vminfo.KernelModule) ([]uint64, error) {
  1483  	mgr.reportGenerator.Init(modules)
  1484  	filters, err := manager.PrepareCoverageFilters(mgr.reportGenerator, mgr.cfg, true)
  1485  	if err != nil {
  1486  		return nil, fmt.Errorf("failed to init coverage filter: %w", err)
  1487  	}
  1488  	mgr.coverFilters = filters
  1489  	mgr.http.Cover.Store(&manager.CoverageInfo{
  1490  		Modules:         modules,
  1491  		ReportGenerator: mgr.reportGenerator,
  1492  		CoverFilter:     filters.ExecutorFilter,
  1493  	})
  1494  	var pcs []uint64
  1495  	for pc := range filters.ExecutorFilter {
  1496  		pcs = append(pcs, pc)
  1497  	}
  1498  	return pcs, nil
  1499  }
  1500  
  1501  func publicWebAddr(addr string) string {
  1502  	if addr == "" {
  1503  		return ""
  1504  	}
  1505  	_, port, err := net.SplitHostPort(addr)
  1506  	if err == nil && port != "" {
  1507  		if host, err := os.Hostname(); err == nil {
  1508  			addr = net.JoinHostPort(host, port)
  1509  		}
  1510  		if GCE, err := gce.NewContext(""); err == nil {
  1511  			addr = net.JoinHostPort(GCE.ExternalIP, port)
  1512  		}
  1513  	}
  1514  	return "http://" + addr
  1515  }