github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/syz-manager/manager.go (about)

     1  // Copyright 2015 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package main
     5  
     6  import (
     7  	"bytes"
     8  	"context"
     9  	"encoding/json"
    10  	"flag"
    11  	"fmt"
    12  	"io"
    13  	"math/rand"
    14  	"net"
    15  	"os"
    16  	"os/exec"
    17  	"path/filepath"
    18  	"sync"
    19  	"sync/atomic"
    20  	"time"
    21  
    22  	"github.com/google/syzkaller/dashboard/dashapi"
    23  	"github.com/google/syzkaller/pkg/asset"
    24  	"github.com/google/syzkaller/pkg/corpus"
    25  	"github.com/google/syzkaller/pkg/csource"
    26  	"github.com/google/syzkaller/pkg/db"
    27  	"github.com/google/syzkaller/pkg/flatrpc"
    28  	"github.com/google/syzkaller/pkg/fuzzer"
    29  	"github.com/google/syzkaller/pkg/fuzzer/queue"
    30  	"github.com/google/syzkaller/pkg/gce"
    31  	"github.com/google/syzkaller/pkg/hash"
    32  	"github.com/google/syzkaller/pkg/instance"
    33  	"github.com/google/syzkaller/pkg/ipc"
    34  	"github.com/google/syzkaller/pkg/log"
    35  	"github.com/google/syzkaller/pkg/mgrconfig"
    36  	"github.com/google/syzkaller/pkg/osutil"
    37  	"github.com/google/syzkaller/pkg/report"
    38  	crash_pkg "github.com/google/syzkaller/pkg/report/crash"
    39  	"github.com/google/syzkaller/pkg/repro"
    40  	"github.com/google/syzkaller/pkg/signal"
    41  	"github.com/google/syzkaller/pkg/stats"
    42  	"github.com/google/syzkaller/prog"
    43  	"github.com/google/syzkaller/sys/targets"
    44  	"github.com/google/syzkaller/vm"
    45  )
    46  
    47  var (
    48  	flagConfig = flag.String("config", "", "configuration file")
    49  	flagDebug  = flag.Bool("debug", false, "dump all VM output to console")
    50  	flagBench  = flag.String("bench", "", "write execution statistics into this file periodically")
    51  )
    52  
    53  type Manager struct {
    54  	cfg             *mgrconfig.Config
    55  	vmPool          *vm.Pool
    56  	target          *prog.Target
    57  	sysTarget       *targets.Target
    58  	reporter        *report.Reporter
    59  	crashdir        string
    60  	serv            *RPCServer
    61  	corpus          *corpus.Corpus
    62  	corpusDB        *db.DB
    63  	corpusDBMu      sync.Mutex // for concurrent operations on corpusDB
    64  	corpusPreloaded chan bool
    65  	firstConnect    atomic.Int64 // unix time, or 0 if not connected
    66  	crashTypes      map[string]bool
    67  	vmStop          chan bool
    68  	enabledFeatures flatrpc.Feature
    69  	checkDone       bool
    70  	fresh           bool
    71  	expertMode      bool
    72  	nextInstanceID  atomic.Uint64
    73  
    74  	dash *dashapi.Dashboard
    75  
    76  	mu                    sync.Mutex
    77  	fuzzer                atomic.Pointer[fuzzer.Fuzzer]
    78  	phase                 int
    79  	targetEnabledSyscalls map[*prog.Syscall]bool
    80  
    81  	disabledHashes   map[string]struct{}
    82  	seeds            [][]byte
    83  	newRepros        [][]byte
    84  	lastMinCorpus    int
    85  	memoryLeakFrames map[string]bool
    86  	dataRaceFrames   map[string]bool
    87  	saturatedCalls   map[string]bool
    88  
    89  	needMoreRepros     chan chan bool
    90  	externalReproQueue chan *Crash
    91  	reproRequest       chan chan map[string]bool
    92  
    93  	// For checking that files that we are using are not changing under us.
    94  	// Maps file name to modification time.
    95  	usedFiles map[string]time.Time
    96  
    97  	assetStorage *asset.Storage
    98  
    99  	bootTime stats.AverageValue[time.Duration]
   100  
   101  	Stats
   102  }
   103  
   104  const (
   105  	// Just started, nothing done yet.
   106  	phaseInit = iota
   107  	// Corpus is loaded and machine is checked.
   108  	phaseLoadedCorpus
   109  	// Triaged all inputs from corpus.
   110  	// This is when we start querying hub and minimizing persistent corpus.
   111  	phaseTriagedCorpus
   112  	// Done the first request to hub.
   113  	phaseQueriedHub
   114  	// Triaged all new inputs from hub.
   115  	// This is when we start reproducing crashes.
   116  	phaseTriagedHub
   117  )
   118  
   119  const currentDBVersion = 4
   120  
   121  type Crash struct {
   122  	instanceName  string
   123  	fromHub       bool // this crash was created based on a repro from syz-hub
   124  	fromDashboard bool // .. or from dashboard
   125  	*report.Report
   126  	machineInfo []byte
   127  }
   128  
   129  func main() {
   130  	if prog.GitRevision == "" {
   131  		log.Fatalf("bad syz-manager build: build with make, run bin/syz-manager")
   132  	}
   133  	flag.Parse()
   134  	log.EnableLogCaching(1000, 1<<20)
   135  	cfg, err := mgrconfig.LoadFile(*flagConfig)
   136  	if err != nil {
   137  		log.Fatalf("%v", err)
   138  	}
   139  	if cfg.DashboardAddr != "" {
   140  		// This lets better distinguish logs of individual syz-manager instances.
   141  		log.SetName(cfg.Name)
   142  	}
   143  	RunManager(cfg)
   144  }
   145  
   146  func RunManager(cfg *mgrconfig.Config) {
   147  	var vmPool *vm.Pool
   148  	// Type "none" is a special case for debugging/development when manager
   149  	// does not start any VMs, but instead you start them manually
   150  	// and start syz-fuzzer there.
   151  	if cfg.Type != "none" {
   152  		var err error
   153  		vmPool, err = vm.Create(cfg, *flagDebug)
   154  		if err != nil {
   155  			log.Fatalf("%v", err)
   156  		}
   157  	}
   158  
   159  	crashdir := filepath.Join(cfg.Workdir, "crashes")
   160  	osutil.MkdirAll(crashdir)
   161  
   162  	reporter, err := report.NewReporter(cfg)
   163  	if err != nil {
   164  		log.Fatalf("%v", err)
   165  	}
   166  
   167  	corpusUpdates := make(chan corpus.NewItemEvent, 32)
   168  	mgr := &Manager{
   169  		cfg:                cfg,
   170  		vmPool:             vmPool,
   171  		corpus:             corpus.NewMonitoredCorpus(context.Background(), corpusUpdates),
   172  		corpusPreloaded:    make(chan bool),
   173  		target:             cfg.Target,
   174  		sysTarget:          cfg.SysTarget,
   175  		reporter:           reporter,
   176  		crashdir:           crashdir,
   177  		crashTypes:         make(map[string]bool),
   178  		disabledHashes:     make(map[string]struct{}),
   179  		memoryLeakFrames:   make(map[string]bool),
   180  		dataRaceFrames:     make(map[string]bool),
   181  		fresh:              true,
   182  		vmStop:             make(chan bool),
   183  		externalReproQueue: make(chan *Crash, 10),
   184  		needMoreRepros:     make(chan chan bool),
   185  		reproRequest:       make(chan chan map[string]bool),
   186  		usedFiles:          make(map[string]time.Time),
   187  		saturatedCalls:     make(map[string]bool),
   188  	}
   189  
   190  	mgr.initStats()
   191  	go mgr.preloadCorpus()
   192  	mgr.initHTTP() // Creates HTTP server.
   193  	mgr.collectUsedFiles()
   194  	go mgr.corpusInputHandler(corpusUpdates)
   195  
   196  	// Create RPC server for fuzzers.
   197  	mgr.serv, err = startRPCServer(mgr)
   198  	if err != nil {
   199  		log.Fatalf("failed to create rpc server: %v", err)
   200  	}
   201  
   202  	if cfg.DashboardAddr != "" {
   203  		mgr.dash, err = dashapi.New(cfg.DashboardClient, cfg.DashboardAddr, cfg.DashboardKey)
   204  		if err != nil {
   205  			log.Fatalf("failed to create dashapi connection: %v", err)
   206  		}
   207  	}
   208  
   209  	if !cfg.AssetStorage.IsEmpty() {
   210  		mgr.assetStorage, err = asset.StorageFromConfig(cfg.AssetStorage, mgr.dash)
   211  		if err != nil {
   212  			log.Fatalf("failed to init asset storage: %v", err)
   213  		}
   214  	}
   215  
   216  	if *flagBench != "" {
   217  		mgr.initBench()
   218  	}
   219  
   220  	go mgr.heartbeatLoop()
   221  	osutil.HandleInterrupts(vm.Shutdown)
   222  	if mgr.vmPool == nil {
   223  		log.Logf(0, "no VMs started (type=none)")
   224  		log.Logf(0, "you are supposed to start syz-fuzzer manually as:")
   225  		log.Logf(0, "syz-fuzzer -manager=manager.ip:%v [other flags as necessary]", mgr.serv.port)
   226  		<-vm.Shutdown
   227  		return
   228  	}
   229  	mgr.vmLoop()
   230  }
   231  
   232  func (mgr *Manager) heartbeatLoop() {
   233  	lastTime := time.Now()
   234  	for now := range time.NewTicker(10 * time.Second).C {
   235  		diff := int(now.Sub(lastTime))
   236  		lastTime = now
   237  		if mgr.firstConnect.Load() == 0 {
   238  			continue
   239  		}
   240  		mgr.statFuzzingTime.Add(diff * mgr.statNumFuzzing.Val())
   241  		buf := new(bytes.Buffer)
   242  		for _, stat := range stats.Collect(stats.Console) {
   243  			fmt.Fprintf(buf, "%v=%v ", stat.Name, stat.Value)
   244  		}
   245  		log.Logf(0, "%s", buf.String())
   246  	}
   247  }
   248  
   249  func (mgr *Manager) initBench() {
   250  	f, err := os.OpenFile(*flagBench, os.O_WRONLY|os.O_CREATE|os.O_EXCL, osutil.DefaultFilePerm)
   251  	if err != nil {
   252  		log.Fatalf("failed to open bench file: %v", err)
   253  	}
   254  	go func() {
   255  		for range time.NewTicker(time.Minute).C {
   256  			vals := make(map[string]int)
   257  			for _, stat := range stats.Collect(stats.All) {
   258  				vals[stat.Name] = stat.V
   259  			}
   260  			data, err := json.MarshalIndent(vals, "", "  ")
   261  			if err != nil {
   262  				log.Fatalf("failed to serialize bench data")
   263  			}
   264  			if _, err := f.Write(append(data, '\n')); err != nil {
   265  				log.Fatalf("failed to write bench data")
   266  			}
   267  		}
   268  	}()
   269  }
   270  
   271  type RunResult struct {
   272  	idx   int
   273  	crash *Crash
   274  	err   error
   275  }
   276  
   277  type ReproResult struct {
   278  	instances     []int
   279  	report0       *report.Report // the original report we started reproducing
   280  	repro         *repro.Result
   281  	strace        *repro.StraceResult
   282  	stats         *repro.Stats
   283  	err           error
   284  	fromHub       bool
   285  	fromDashboard bool
   286  	originalTitle string // crash title before we started bug reproduction
   287  }
   288  
   289  // Manager needs to be refactored (#605).
   290  // nolint: gocyclo, gocognit, funlen
   291  func (mgr *Manager) vmLoop() {
   292  	log.Logf(0, "booting test machines...")
   293  	log.Logf(0, "wait for the connection from test machine...")
   294  	instancesPerRepro := 3
   295  	vmCount := mgr.vmPool.Count()
   296  	maxReproVMs := vmCount - mgr.cfg.FuzzingVMs
   297  	if instancesPerRepro > maxReproVMs && maxReproVMs > 0 {
   298  		instancesPerRepro = maxReproVMs
   299  	}
   300  	instances := SequentialResourcePool(vmCount, 5*time.Second)
   301  	runDone := make(chan *RunResult, 1)
   302  	pendingRepro := make(map[*Crash]bool)
   303  	reproducing := make(map[string]bool)
   304  	var reproQueue []*Crash
   305  	reproDone := make(chan *ReproResult, 1)
   306  	stopPending := false
   307  	shutdown := vm.Shutdown
   308  	for shutdown != nil || instances.Len() != vmCount {
   309  		mgr.mu.Lock()
   310  		phase := mgr.phase
   311  		mgr.mu.Unlock()
   312  
   313  		for crash := range pendingRepro {
   314  			if reproducing[crash.Title] {
   315  				continue
   316  			}
   317  			delete(pendingRepro, crash)
   318  			if !mgr.needRepro(crash) {
   319  				continue
   320  			}
   321  			log.Logf(1, "loop: add to repro queue '%v'", crash.Title)
   322  			reproducing[crash.Title] = true
   323  			reproQueue = append(reproQueue, crash)
   324  		}
   325  
   326  		log.Logf(1, "loop: phase=%v shutdown=%v instances=%v/%v %+v repro: pending=%v reproducing=%v queued=%v",
   327  			phase, shutdown == nil, instances.Len(), vmCount, instances.Snapshot(),
   328  			len(pendingRepro), len(reproducing), len(reproQueue))
   329  
   330  		canRepro := func() bool {
   331  			return phase >= phaseTriagedHub && len(reproQueue) != 0 &&
   332  				(mgr.statNumReproducing.Val()+1)*instancesPerRepro <= maxReproVMs
   333  		}
   334  
   335  		if shutdown != nil {
   336  			for canRepro() {
   337  				vmIndexes := instances.Take(instancesPerRepro)
   338  				if vmIndexes == nil {
   339  					break
   340  				}
   341  				last := len(reproQueue) - 1
   342  				crash := reproQueue[last]
   343  				reproQueue[last] = nil
   344  				reproQueue = reproQueue[:last]
   345  				mgr.statNumReproducing.Add(1)
   346  				log.Logf(0, "loop: starting repro of '%v' on instances %+v", crash.Title, vmIndexes)
   347  				go func() {
   348  					reproDone <- mgr.runRepro(crash, vmIndexes, instances.Put)
   349  				}()
   350  			}
   351  			for !canRepro() {
   352  				idx := instances.TakeOne()
   353  				if idx == nil {
   354  					break
   355  				}
   356  				log.Logf(1, "loop: starting instance %v", *idx)
   357  				go func() {
   358  					crash, err := mgr.runInstance(*idx)
   359  					runDone <- &RunResult{*idx, crash, err}
   360  				}()
   361  			}
   362  		}
   363  
   364  		var stopRequest chan bool
   365  		if !stopPending && canRepro() {
   366  			stopRequest = mgr.vmStop
   367  		}
   368  
   369  	wait:
   370  		select {
   371  		case <-instances.Freed:
   372  			// An instance has been released.
   373  		case stopRequest <- true:
   374  			log.Logf(1, "loop: issued stop request")
   375  			stopPending = true
   376  		case res := <-runDone:
   377  			log.Logf(1, "loop: instance %v finished, crash=%v", res.idx, res.crash != nil)
   378  			if res.err != nil && shutdown != nil {
   379  				log.Logf(0, "%v", res.err)
   380  			}
   381  			stopPending = false
   382  			instances.Put(res.idx)
   383  			// On shutdown qemu crashes with "qemu: terminating on signal 2",
   384  			// which we detect as "lost connection". Don't save that as crash.
   385  			if shutdown != nil && res.crash != nil {
   386  				needRepro := mgr.saveCrash(res.crash)
   387  				if needRepro {
   388  					log.Logf(1, "loop: add pending repro for '%v'", res.crash.Title)
   389  					pendingRepro[res.crash] = true
   390  				}
   391  			}
   392  		case res := <-reproDone:
   393  			mgr.statNumReproducing.Add(-1)
   394  			crepro := false
   395  			title := ""
   396  			if res.repro != nil {
   397  				crepro = res.repro.CRepro
   398  				title = res.repro.Report.Title
   399  			}
   400  			log.Logf(0, "loop: repro on %+v finished '%v', repro=%v crepro=%v desc='%v'"+
   401  				" hub=%v from_dashboard=%v",
   402  				res.instances, res.report0.Title, res.repro != nil, crepro, title,
   403  				res.fromHub, res.fromDashboard,
   404  			)
   405  			if res.err != nil {
   406  				reportReproError(res.err)
   407  			}
   408  			delete(reproducing, res.report0.Title)
   409  			if res.repro == nil {
   410  				if res.fromHub {
   411  					log.Logf(1, "repro '%v' came from syz-hub, not reporting the failure",
   412  						res.report0.Title)
   413  				} else {
   414  					log.Logf(1, "report repro failure of '%v'", res.report0.Title)
   415  					mgr.saveFailedRepro(res.report0, res.stats)
   416  				}
   417  			} else {
   418  				mgr.saveRepro(res)
   419  			}
   420  		case <-shutdown:
   421  			log.Logf(1, "loop: shutting down...")
   422  			shutdown = nil
   423  		case crash := <-mgr.externalReproQueue:
   424  			log.Logf(1, "loop: got repro request")
   425  			pendingRepro[crash] = true
   426  		case reply := <-mgr.needMoreRepros:
   427  			reply <- phase >= phaseTriagedHub &&
   428  				len(reproQueue)+len(pendingRepro)+len(reproducing) == 0
   429  			goto wait
   430  		case reply := <-mgr.reproRequest:
   431  			repros := make(map[string]bool)
   432  			for title := range reproducing {
   433  				repros[title] = true
   434  			}
   435  			reply <- repros
   436  			goto wait
   437  		}
   438  	}
   439  }
   440  
   441  func reportReproError(err error) {
   442  	shutdown := false
   443  	select {
   444  	case <-vm.Shutdown:
   445  		shutdown = true
   446  	default:
   447  	}
   448  
   449  	switch err {
   450  	case repro.ErrNoPrograms:
   451  		// This is not extraordinary as programs are collected via SSH.
   452  		log.Logf(0, "repro failed: %v", err)
   453  		return
   454  	case repro.ErrNoVMs:
   455  		// This error is to be expected if we're shutting down.
   456  		if shutdown {
   457  			return
   458  		}
   459  	}
   460  	// Report everything else as errors.
   461  	log.Errorf("repro failed: %v", err)
   462  }
   463  
   464  func (mgr *Manager) runRepro(crash *Crash, vmIndexes []int, putInstances func(...int)) *ReproResult {
   465  	res, stats, err := repro.Run(crash.Output, mgr.cfg, mgr.enabledFeatures, mgr.reporter, mgr.vmPool, vmIndexes)
   466  	ret := &ReproResult{
   467  		instances:     vmIndexes,
   468  		report0:       crash.Report,
   469  		repro:         res,
   470  		stats:         stats,
   471  		err:           err,
   472  		fromHub:       crash.fromHub,
   473  		fromDashboard: crash.fromDashboard,
   474  		originalTitle: crash.Title,
   475  	}
   476  	if err == nil && res != nil && mgr.cfg.StraceBin != "" {
   477  		// We need only one instance to get strace output, release the rest.
   478  		putInstances(vmIndexes[1:]...)
   479  		defer putInstances(vmIndexes[0])
   480  
   481  		const straceAttempts = 2
   482  		for i := 1; i <= straceAttempts; i++ {
   483  			strace := repro.RunStrace(res, mgr.cfg, mgr.reporter, mgr.vmPool, vmIndexes[0])
   484  			sameBug := strace.IsSameBug(res)
   485  			log.Logf(0, "strace run attempt %d/%d for '%s': same bug %v, error %v",
   486  				i, straceAttempts, res.Report.Title, sameBug, strace.Error)
   487  			// We only want to save strace output if it resulted in the same bug.
   488  			// Otherwise, it will be hard to reproduce on syzbot and will confuse users.
   489  			if sameBug {
   490  				ret.strace = strace
   491  				break
   492  			}
   493  		}
   494  	} else {
   495  		putInstances(vmIndexes...)
   496  	}
   497  	return ret
   498  }
   499  
   500  type ResourcePool struct {
   501  	ids   []int
   502  	mu    sync.RWMutex
   503  	Freed chan interface{}
   504  }
   505  
   506  func SequentialResourcePool(count int, delay time.Duration) *ResourcePool {
   507  	ret := &ResourcePool{Freed: make(chan interface{}, 1)}
   508  	go func() {
   509  		for i := 0; i < count; i++ {
   510  			ret.Put(i)
   511  			time.Sleep(delay)
   512  		}
   513  	}()
   514  	return ret
   515  }
   516  
   517  func (pool *ResourcePool) Put(ids ...int) {
   518  	pool.mu.Lock()
   519  	defer pool.mu.Unlock()
   520  	pool.ids = append(pool.ids, ids...)
   521  	// Notify the listener.
   522  	select {
   523  	case pool.Freed <- true:
   524  	default:
   525  	}
   526  }
   527  
   528  func (pool *ResourcePool) Len() int {
   529  	pool.mu.RLock()
   530  	defer pool.mu.RUnlock()
   531  	return len(pool.ids)
   532  }
   533  
   534  func (pool *ResourcePool) Snapshot() []int {
   535  	pool.mu.RLock()
   536  	defer pool.mu.RUnlock()
   537  	return append([]int{}, pool.ids...)
   538  }
   539  
   540  func (pool *ResourcePool) Take(cnt int) []int {
   541  	pool.mu.Lock()
   542  	defer pool.mu.Unlock()
   543  	totalItems := len(pool.ids)
   544  	if totalItems < cnt {
   545  		return nil
   546  	}
   547  	ret := append([]int{}, pool.ids[totalItems-cnt:]...)
   548  	pool.ids = pool.ids[:totalItems-cnt]
   549  	return ret
   550  }
   551  
   552  func (pool *ResourcePool) TakeOne() *int {
   553  	ret := pool.Take(1)
   554  	if ret == nil {
   555  		return nil
   556  	}
   557  	return &ret[0]
   558  }
   559  
   560  func (mgr *Manager) preloadCorpus() {
   561  	corpusDB, err := db.Open(filepath.Join(mgr.cfg.Workdir, "corpus.db"), true)
   562  	if err != nil {
   563  		if corpusDB == nil {
   564  			log.Fatalf("failed to open corpus database: %v", err)
   565  		}
   566  		log.Errorf("read %v inputs from corpus and got error: %v", len(corpusDB.Records), err)
   567  	}
   568  	mgr.corpusDB = corpusDB
   569  
   570  	if seedDir := filepath.Join(mgr.cfg.Syzkaller, "sys", mgr.cfg.TargetOS, "test"); osutil.IsExist(seedDir) {
   571  		seeds, err := os.ReadDir(seedDir)
   572  		if err != nil {
   573  			log.Fatalf("failed to read seeds dir: %v", err)
   574  		}
   575  		for _, seed := range seeds {
   576  			data, err := os.ReadFile(filepath.Join(seedDir, seed.Name()))
   577  			if err != nil {
   578  				log.Fatalf("failed to read seed %v: %v", seed.Name(), err)
   579  			}
   580  			mgr.seeds = append(mgr.seeds, data)
   581  		}
   582  	}
   583  	close(mgr.corpusPreloaded)
   584  }
   585  
   586  func (mgr *Manager) loadCorpus() {
   587  	<-mgr.corpusPreloaded
   588  	// By default we don't re-minimize/re-smash programs from corpus,
   589  	// it takes lots of time on start and is unnecessary.
   590  	// However, on version bumps we can selectively re-minimize/re-smash.
   591  	minimized, smashed := true, true
   592  	switch mgr.corpusDB.Version {
   593  	case 0:
   594  		// Version 0 had broken minimization, so we need to re-minimize.
   595  		minimized = false
   596  		fallthrough
   597  	case 1:
   598  		// Version 1->2: memory is preallocated so lots of mmaps become unnecessary.
   599  		minimized = false
   600  		fallthrough
   601  	case 2:
   602  		// Version 2->3: big-endian hints.
   603  		smashed = false
   604  		fallthrough
   605  	case 3:
   606  		// Version 3->4: to shake things up.
   607  		minimized = false
   608  		fallthrough
   609  	case currentDBVersion:
   610  	}
   611  	var candidates []fuzzer.Candidate
   612  	broken := 0
   613  	for key, rec := range mgr.corpusDB.Records {
   614  		drop, item := mgr.loadProg(rec.Val, minimized, smashed)
   615  		if drop {
   616  			mgr.corpusDB.Delete(key)
   617  			broken++
   618  		}
   619  		if item != nil {
   620  			candidates = append(candidates, *item)
   621  		}
   622  	}
   623  	mgr.fresh = len(mgr.corpusDB.Records) == 0
   624  	seeds := 0
   625  	for _, seed := range mgr.seeds {
   626  		_, item := mgr.loadProg(seed, true, false)
   627  		if item != nil {
   628  			candidates = append(candidates, *item)
   629  			seeds++
   630  		}
   631  	}
   632  	log.Logf(0, "%-24v: %v (%v broken, %v seeds)", "corpus", len(candidates), broken, seeds)
   633  	mgr.seeds = nil
   634  
   635  	// We duplicate all inputs in the corpus and shuffle the second part.
   636  	// This solves the following problem. A fuzzer can crash while triaging candidates,
   637  	// in such case it will also lost all cached candidates. Or, the input can be somewhat flaky
   638  	// and doesn't give the coverage on first try. So we give each input the second chance.
   639  	// Shuffling should alleviate deterministically losing the same inputs on fuzzer crashing.
   640  	candidates = append(candidates, candidates...)
   641  	shuffle := candidates[len(candidates)/2:]
   642  	rand.Shuffle(len(shuffle), func(i, j int) {
   643  		shuffle[i], shuffle[j] = shuffle[j], shuffle[i]
   644  	})
   645  	if mgr.phase != phaseInit {
   646  		panic(fmt.Sprintf("loadCorpus: bad phase %v", mgr.phase))
   647  	}
   648  	mgr.phase = phaseLoadedCorpus
   649  	mgr.fuzzer.Load().AddCandidates(candidates)
   650  }
   651  
   652  // Returns (delete item from the corpus, a fuzzer.Candidate object).
   653  func (mgr *Manager) loadProg(data []byte, minimized, smashed bool) (drop bool, candidate *fuzzer.Candidate) {
   654  	p, disabled, bad := parseProgram(mgr.target, mgr.targetEnabledSyscalls, data)
   655  	if bad != nil {
   656  		return true, nil
   657  	}
   658  	if disabled {
   659  		if mgr.cfg.PreserveCorpus {
   660  			// This program contains a disabled syscall.
   661  			// We won't execute it, but remember its hash so
   662  			// it is not deleted during minimization.
   663  			mgr.disabledHashes[hash.String(data)] = struct{}{}
   664  		} else {
   665  			// We cut out the disabled syscalls and let syz-fuzzer retriage and
   666  			// minimize what remains from the prog. The original prog will be
   667  			// deleted from the corpus.
   668  			leftover := programLeftover(mgr.target, mgr.targetEnabledSyscalls, data)
   669  			if leftover != nil {
   670  				candidate = &fuzzer.Candidate{
   671  					Prog:      leftover,
   672  					Minimized: false,
   673  					Smashed:   smashed,
   674  				}
   675  			}
   676  		}
   677  		return false, candidate
   678  	}
   679  	return false, &fuzzer.Candidate{
   680  		Prog:      p,
   681  		Minimized: minimized,
   682  		Smashed:   smashed,
   683  	}
   684  }
   685  
   686  func programLeftover(target *prog.Target, enabled map[*prog.Syscall]bool, data []byte) *prog.Prog {
   687  	p, err := target.Deserialize(data, prog.NonStrict)
   688  	if err != nil {
   689  		panic(fmt.Sprintf("subsequent deserialization failed: %s", data))
   690  	}
   691  	for i := 0; i < len(p.Calls); {
   692  		c := p.Calls[i]
   693  		if !enabled[c.Meta] {
   694  			p.RemoveCall(i)
   695  			continue
   696  		}
   697  		i++
   698  	}
   699  	return p
   700  }
   701  
   702  func parseProgram(target *prog.Target, enabled map[*prog.Syscall]bool, data []byte) (
   703  	p *prog.Prog, disabled bool, err error) {
   704  	p, err = target.Deserialize(data, prog.NonStrict)
   705  	if err != nil {
   706  		return
   707  	}
   708  	if len(p.Calls) > prog.MaxCalls {
   709  		return nil, false, fmt.Errorf("longer than %d calls", prog.MaxCalls)
   710  	}
   711  	// For some yet unknown reasons, programs with fail_nth > 0 may sneak in. Ignore them.
   712  	for _, call := range p.Calls {
   713  		if call.Props.FailNth > 0 {
   714  			return nil, false, fmt.Errorf("input has fail_nth > 0")
   715  		}
   716  	}
   717  	for _, c := range p.Calls {
   718  		if !enabled[c.Meta] {
   719  			return p, true, nil
   720  		}
   721  	}
   722  	return p, false, nil
   723  }
   724  
   725  func (mgr *Manager) runInstance(index int) (*Crash, error) {
   726  	mgr.checkUsedFiles()
   727  	var maxSignal signal.Signal
   728  	if fuzzer := mgr.fuzzer.Load(); fuzzer != nil {
   729  		maxSignal = fuzzer.Cover.CopyMaxSignal()
   730  	}
   731  	// Use unique instance names to prevent name collisions in case of untimely RPC messages.
   732  	instanceName := fmt.Sprintf("vm-%d", mgr.nextInstanceID.Add(1))
   733  	injectLog := make(chan []byte, 10)
   734  	mgr.serv.createInstance(instanceName, maxSignal, injectLog)
   735  
   736  	rep, vmInfo, err := mgr.runInstanceInner(index, instanceName, injectLog)
   737  	machineInfo := mgr.serv.shutdownInstance(instanceName, rep != nil)
   738  	if len(vmInfo) != 0 {
   739  		machineInfo = append(append(vmInfo, '\n'), machineInfo...)
   740  	}
   741  
   742  	// Error that is not a VM crash.
   743  	if err != nil {
   744  		return nil, err
   745  	}
   746  	// No crash.
   747  	if rep == nil {
   748  		return nil, nil
   749  	}
   750  	crash := &Crash{
   751  		instanceName: instanceName,
   752  		Report:       rep,
   753  		machineInfo:  machineInfo,
   754  	}
   755  	return crash, nil
   756  }
   757  
   758  func (mgr *Manager) runInstanceInner(index int, instanceName string, injectLog <-chan []byte) (
   759  	*report.Report, []byte, error) {
   760  	start := time.Now()
   761  
   762  	inst, err := mgr.vmPool.Create(index)
   763  	if err != nil {
   764  		return nil, nil, fmt.Errorf("failed to create instance: %w", err)
   765  	}
   766  	defer inst.Close()
   767  
   768  	fwdAddr, err := inst.Forward(mgr.serv.port)
   769  	if err != nil {
   770  		return nil, nil, fmt.Errorf("failed to setup port forwarding: %w", err)
   771  	}
   772  
   773  	fuzzerBin, err := inst.Copy(mgr.cfg.FuzzerBin)
   774  	if err != nil {
   775  		return nil, nil, fmt.Errorf("failed to copy binary: %w", err)
   776  	}
   777  
   778  	// If ExecutorBin is provided, it means that syz-executor is already in the image,
   779  	// so no need to copy it.
   780  	executorBin := mgr.sysTarget.ExecutorBin
   781  	if executorBin == "" {
   782  		executorBin, err = inst.Copy(mgr.cfg.ExecutorBin)
   783  		if err != nil {
   784  			return nil, nil, fmt.Errorf("failed to copy binary: %w", err)
   785  		}
   786  	}
   787  
   788  	fuzzerV := 0
   789  	procs := mgr.cfg.Procs
   790  	if *flagDebug {
   791  		fuzzerV = 100
   792  		procs = 1
   793  	}
   794  
   795  	// Run the fuzzer binary.
   796  	mgr.bootTime.Save(time.Since(start))
   797  	start = time.Now()
   798  	mgr.statNumFuzzing.Add(1)
   799  	defer mgr.statNumFuzzing.Add(-1)
   800  
   801  	args := &instance.FuzzerCmdArgs{
   802  		Fuzzer:    fuzzerBin,
   803  		Executor:  executorBin,
   804  		Name:      instanceName,
   805  		OS:        mgr.cfg.TargetOS,
   806  		Arch:      mgr.cfg.TargetArch,
   807  		FwdAddr:   fwdAddr,
   808  		Sandbox:   mgr.cfg.Sandbox,
   809  		Procs:     procs,
   810  		Verbosity: fuzzerV,
   811  		Cover:     mgr.cfg.Cover,
   812  		Debug:     *flagDebug,
   813  		Test:      false,
   814  		Optional: &instance.OptionalFuzzerArgs{
   815  			Slowdown:   mgr.cfg.Timeouts.Slowdown,
   816  			SandboxArg: mgr.cfg.SandboxArg,
   817  			PprofPort:  inst.PprofPort(),
   818  		},
   819  	}
   820  	cmd := instance.FuzzerCmd(args)
   821  	_, rep, err := inst.Run(mgr.cfg.Timeouts.VMRunningTime, mgr.reporter, cmd,
   822  		vm.ExitTimeout, vm.StopChan(mgr.vmStop), vm.InjectOutput(injectLog),
   823  		vm.EarlyFinishCb(func() {
   824  			// Depending on the crash type and kernel config, fuzzing may continue
   825  			// running for several seconds even after kernel has printed a crash report.
   826  			// This litters the log and we want to prevent it.
   827  			mgr.serv.stopFuzzing(instanceName)
   828  		}),
   829  	)
   830  	if err != nil {
   831  		return nil, nil, fmt.Errorf("failed to run fuzzer: %w", err)
   832  	}
   833  	if rep == nil {
   834  		// This is the only "OK" outcome.
   835  		log.Logf(0, "%s: running for %v, restarting", instanceName, time.Since(start))
   836  		return nil, nil, nil
   837  	}
   838  	vmInfo, err := inst.Info()
   839  	if err != nil {
   840  		vmInfo = []byte(fmt.Sprintf("error getting VM info: %v\n", err))
   841  	}
   842  	return rep, vmInfo, nil
   843  }
   844  
   845  func (mgr *Manager) emailCrash(crash *Crash) {
   846  	if len(mgr.cfg.EmailAddrs) == 0 {
   847  		return
   848  	}
   849  	args := []string{"-s", "syzkaller: " + crash.Title}
   850  	args = append(args, mgr.cfg.EmailAddrs...)
   851  	log.Logf(0, "sending email to %v", mgr.cfg.EmailAddrs)
   852  
   853  	cmd := exec.Command("mailx", args...)
   854  	cmd.Stdin = bytes.NewReader(crash.Report.Report)
   855  	if _, err := osutil.Run(10*time.Minute, cmd); err != nil {
   856  		log.Logf(0, "failed to send email: %v", err)
   857  	}
   858  }
   859  
   860  func (mgr *Manager) saveCrash(crash *Crash) bool {
   861  	if err := mgr.reporter.Symbolize(crash.Report); err != nil {
   862  		log.Errorf("failed to symbolize report: %v", err)
   863  	}
   864  	if crash.Type == crash_pkg.MemoryLeak {
   865  		mgr.mu.Lock()
   866  		mgr.memoryLeakFrames[crash.Frame] = true
   867  		mgr.mu.Unlock()
   868  	}
   869  	if crash.Type == crash_pkg.DataRace {
   870  		mgr.mu.Lock()
   871  		mgr.dataRaceFrames[crash.Frame] = true
   872  		mgr.mu.Unlock()
   873  	}
   874  	flags := ""
   875  	if crash.Corrupted {
   876  		flags += " [corrupted]"
   877  	}
   878  	if crash.Suppressed {
   879  		flags += " [suppressed]"
   880  	}
   881  	log.Logf(0, "%s: crash: %v%v", crash.instanceName, crash.Title, flags)
   882  
   883  	if crash.Suppressed {
   884  		// Collect all of them into a single bucket so that it's possible to control and assess them,
   885  		// e.g. if there are some spikes in suppressed reports.
   886  		crash.Title = "suppressed report"
   887  		mgr.statSuppressed.Add(1)
   888  	}
   889  
   890  	mgr.statCrashes.Add(1)
   891  	mgr.mu.Lock()
   892  	if !mgr.crashTypes[crash.Title] {
   893  		mgr.crashTypes[crash.Title] = true
   894  		mgr.statCrashTypes.Add(1)
   895  	}
   896  	mgr.mu.Unlock()
   897  
   898  	if mgr.dash != nil {
   899  		if crash.Type == crash_pkg.MemoryLeak {
   900  			return true
   901  		}
   902  		dc := &dashapi.Crash{
   903  			BuildID:     mgr.cfg.Tag,
   904  			Title:       crash.Title,
   905  			AltTitles:   crash.AltTitles,
   906  			Corrupted:   crash.Corrupted,
   907  			Suppressed:  crash.Suppressed,
   908  			Recipients:  crash.Recipients.ToDash(),
   909  			Log:         crash.Output,
   910  			Report:      crash.Report.Report,
   911  			MachineInfo: crash.machineInfo,
   912  		}
   913  		setGuiltyFiles(dc, crash.Report)
   914  		resp, err := mgr.dash.ReportCrash(dc)
   915  		if err != nil {
   916  			log.Logf(0, "failed to report crash to dashboard: %v", err)
   917  		} else {
   918  			// Don't store the crash locally, if we've successfully
   919  			// uploaded it to the dashboard. These will just eat disk space.
   920  			return resp.NeedRepro
   921  		}
   922  	}
   923  
   924  	sig := hash.Hash([]byte(crash.Title))
   925  	id := sig.String()
   926  	dir := filepath.Join(mgr.crashdir, id)
   927  	osutil.MkdirAll(dir)
   928  	if err := osutil.WriteFile(filepath.Join(dir, "description"), []byte(crash.Title+"\n")); err != nil {
   929  		log.Logf(0, "failed to write crash: %v", err)
   930  	}
   931  
   932  	// Save up to mgr.cfg.MaxCrashLogs reports, overwrite the oldest once we've reached that number.
   933  	// Newer reports are generally more useful. Overwriting is also needed
   934  	// to be able to understand if a particular bug still happens or already fixed.
   935  	oldestI := 0
   936  	var oldestTime time.Time
   937  	for i := 0; i < mgr.cfg.MaxCrashLogs; i++ {
   938  		info, err := os.Stat(filepath.Join(dir, fmt.Sprintf("log%v", i)))
   939  		if err != nil {
   940  			oldestI = i
   941  			if i == 0 {
   942  				go mgr.emailCrash(crash)
   943  			}
   944  			break
   945  		}
   946  		if oldestTime.IsZero() || info.ModTime().Before(oldestTime) {
   947  			oldestI = i
   948  			oldestTime = info.ModTime()
   949  		}
   950  	}
   951  	writeOrRemove := func(name string, data []byte) {
   952  		filename := filepath.Join(dir, name+fmt.Sprint(oldestI))
   953  		if len(data) == 0 {
   954  			os.Remove(filename)
   955  			return
   956  		}
   957  		osutil.WriteFile(filename, data)
   958  	}
   959  	writeOrRemove("log", crash.Output)
   960  	writeOrRemove("tag", []byte(mgr.cfg.Tag))
   961  	writeOrRemove("report", crash.Report.Report)
   962  	writeOrRemove("machineInfo", crash.machineInfo)
   963  	return mgr.needLocalRepro(crash)
   964  }
   965  
   966  const maxReproAttempts = 3
   967  
   968  func (mgr *Manager) needLocalRepro(crash *Crash) bool {
   969  	if !mgr.cfg.Reproduce || crash.Corrupted || crash.Suppressed {
   970  		return false
   971  	}
   972  	sig := hash.Hash([]byte(crash.Title))
   973  	dir := filepath.Join(mgr.crashdir, sig.String())
   974  	if osutil.IsExist(filepath.Join(dir, "repro.prog")) {
   975  		return false
   976  	}
   977  	for i := 0; i < maxReproAttempts; i++ {
   978  		if !osutil.IsExist(filepath.Join(dir, fmt.Sprintf("repro%v", i))) {
   979  			return true
   980  		}
   981  	}
   982  	return false
   983  }
   984  
   985  func (mgr *Manager) needRepro(crash *Crash) bool {
   986  	if crash.fromHub || crash.fromDashboard {
   987  		return true
   988  	}
   989  	if !mgr.checkDone || (mgr.enabledFeatures&flatrpc.FeatureLeak != 0 &&
   990  		crash.Type != crash_pkg.MemoryLeak) {
   991  		// Leak checking is very slow, don't bother reproducing other crashes on leak instance.
   992  		return false
   993  	}
   994  	if mgr.dash == nil {
   995  		return mgr.needLocalRepro(crash)
   996  	}
   997  	cid := &dashapi.CrashID{
   998  		BuildID:      mgr.cfg.Tag,
   999  		Title:        crash.Title,
  1000  		Corrupted:    crash.Corrupted,
  1001  		Suppressed:   crash.Suppressed,
  1002  		MayBeMissing: crash.Type == crash_pkg.MemoryLeak, // we did not send the original crash w/o repro
  1003  	}
  1004  	needRepro, err := mgr.dash.NeedRepro(cid)
  1005  	if err != nil {
  1006  		log.Logf(0, "dashboard.NeedRepro failed: %v", err)
  1007  	}
  1008  	return needRepro
  1009  }
  1010  
  1011  func truncateReproLog(log []byte) []byte {
  1012  	// Repro logs can get quite large and we have trouble sending large API requests (see #4495).
  1013  	// Let's truncate the log to a 512KB prefix and 512KB suffix.
  1014  	return report.Truncate(log, 512000, 512000)
  1015  }
  1016  
  1017  func (mgr *Manager) saveFailedRepro(rep *report.Report, stats *repro.Stats) {
  1018  	reproLog := fullReproLog(stats)
  1019  	if mgr.dash != nil {
  1020  		if rep.Type == crash_pkg.MemoryLeak {
  1021  			// Don't send failed leak repro attempts to dashboard
  1022  			// as we did not send the crash itself.
  1023  			log.Logf(1, "failed repro of '%v': not sending because of the memleak type", rep.Title)
  1024  			return
  1025  		}
  1026  		cid := &dashapi.CrashID{
  1027  			BuildID:      mgr.cfg.Tag,
  1028  			Title:        rep.Title,
  1029  			Corrupted:    rep.Corrupted,
  1030  			Suppressed:   rep.Suppressed,
  1031  			MayBeMissing: rep.Type == crash_pkg.MemoryLeak,
  1032  			ReproLog:     truncateReproLog(reproLog),
  1033  		}
  1034  		if err := mgr.dash.ReportFailedRepro(cid); err != nil {
  1035  			log.Logf(0, "failed to report failed repro to dashboard (log size %d): %v",
  1036  				len(reproLog), err)
  1037  		} else {
  1038  			return
  1039  		}
  1040  	}
  1041  	dir := filepath.Join(mgr.crashdir, hash.String([]byte(rep.Title)))
  1042  	osutil.MkdirAll(dir)
  1043  	for i := 0; i < maxReproAttempts; i++ {
  1044  		name := filepath.Join(dir, fmt.Sprintf("repro%v", i))
  1045  		if !osutil.IsExist(name) && len(reproLog) > 0 {
  1046  			osutil.WriteFile(name, reproLog)
  1047  			break
  1048  		}
  1049  	}
  1050  }
  1051  
  1052  func (mgr *Manager) saveRepro(res *ReproResult) {
  1053  	repro := res.repro
  1054  	opts := fmt.Sprintf("# %+v\n", repro.Opts)
  1055  	progText := repro.Prog.Serialize()
  1056  
  1057  	// Append this repro to repro list to send to hub if it didn't come from hub originally.
  1058  	if !res.fromHub {
  1059  		progForHub := []byte(fmt.Sprintf("# %+v\n# %v\n# %v\n%s",
  1060  			repro.Opts, repro.Report.Title, mgr.cfg.Tag, progText))
  1061  		mgr.mu.Lock()
  1062  		mgr.newRepros = append(mgr.newRepros, progForHub)
  1063  		mgr.mu.Unlock()
  1064  	}
  1065  
  1066  	var cprogText []byte
  1067  	if repro.CRepro {
  1068  		cprog, err := csource.Write(repro.Prog, repro.Opts)
  1069  		if err == nil {
  1070  			formatted, err := csource.Format(cprog)
  1071  			if err == nil {
  1072  				cprog = formatted
  1073  			}
  1074  			cprogText = cprog
  1075  		} else {
  1076  			log.Logf(0, "failed to write C source: %v", err)
  1077  		}
  1078  	}
  1079  
  1080  	if mgr.dash != nil {
  1081  		// Note: we intentionally don't set Corrupted for reproducers:
  1082  		// 1. This is reproducible so can be debugged even with corrupted report.
  1083  		// 2. Repro re-tried 3 times and still got corrupted report at the end,
  1084  		//    so maybe corrupted report detection is broken.
  1085  		// 3. Reproduction is expensive so it's good to persist the result.
  1086  
  1087  		report := repro.Report
  1088  		output := report.Output
  1089  
  1090  		var crashFlags dashapi.CrashFlags
  1091  		if res.strace != nil {
  1092  			// If syzkaller managed to successfully run the repro with strace, send
  1093  			// the report and the output generated under strace.
  1094  			report = res.strace.Report
  1095  			output = res.strace.Output
  1096  			crashFlags = dashapi.CrashUnderStrace
  1097  		}
  1098  
  1099  		dc := &dashapi.Crash{
  1100  			BuildID:       mgr.cfg.Tag,
  1101  			Title:         report.Title,
  1102  			AltTitles:     report.AltTitles,
  1103  			Suppressed:    report.Suppressed,
  1104  			Recipients:    report.Recipients.ToDash(),
  1105  			Log:           output,
  1106  			Flags:         crashFlags,
  1107  			Report:        report.Report,
  1108  			ReproOpts:     repro.Opts.Serialize(),
  1109  			ReproSyz:      progText,
  1110  			ReproC:        cprogText,
  1111  			ReproLog:      truncateReproLog(fullReproLog(res.stats)),
  1112  			Assets:        mgr.uploadReproAssets(repro),
  1113  			OriginalTitle: res.originalTitle,
  1114  		}
  1115  		setGuiltyFiles(dc, report)
  1116  		if _, err := mgr.dash.ReportCrash(dc); err != nil {
  1117  			log.Logf(0, "failed to report repro to dashboard: %v", err)
  1118  		} else {
  1119  			// Don't store the crash locally, if we've successfully
  1120  			// uploaded it to the dashboard. These will just eat disk space.
  1121  			return
  1122  		}
  1123  	}
  1124  
  1125  	rep := repro.Report
  1126  	dir := filepath.Join(mgr.crashdir, hash.String([]byte(rep.Title)))
  1127  	osutil.MkdirAll(dir)
  1128  
  1129  	if err := osutil.WriteFile(filepath.Join(dir, "description"), []byte(rep.Title+"\n")); err != nil {
  1130  		log.Logf(0, "failed to write crash: %v", err)
  1131  	}
  1132  	osutil.WriteFile(filepath.Join(dir, "repro.prog"), append([]byte(opts), progText...))
  1133  	if mgr.cfg.Tag != "" {
  1134  		osutil.WriteFile(filepath.Join(dir, "repro.tag"), []byte(mgr.cfg.Tag))
  1135  	}
  1136  	if len(rep.Output) > 0 {
  1137  		osutil.WriteFile(filepath.Join(dir, "repro.log"), rep.Output)
  1138  	}
  1139  	if len(rep.Report) > 0 {
  1140  		osutil.WriteFile(filepath.Join(dir, "repro.report"), rep.Report)
  1141  	}
  1142  	if len(cprogText) > 0 {
  1143  		osutil.WriteFile(filepath.Join(dir, "repro.cprog"), cprogText)
  1144  	}
  1145  	repro.Prog.ForEachAsset(func(name string, typ prog.AssetType, r io.Reader) {
  1146  		fileName := filepath.Join(dir, name+".gz")
  1147  		if err := osutil.WriteGzipStream(fileName, r); err != nil {
  1148  			log.Logf(0, "failed to write crash asset: type %d, write error %v", typ, err)
  1149  		}
  1150  	})
  1151  	if res.strace != nil {
  1152  		// Unlike dashboard reporting, we save strace output separately from the original log.
  1153  		if res.strace.Error != nil {
  1154  			osutil.WriteFile(filepath.Join(dir, "strace.error"),
  1155  				[]byte(fmt.Sprintf("%v", res.strace.Error)))
  1156  		}
  1157  		if len(res.strace.Output) > 0 {
  1158  			osutil.WriteFile(filepath.Join(dir, "strace.log"), res.strace.Output)
  1159  		}
  1160  	}
  1161  	if reproLog := fullReproLog(res.stats); len(reproLog) > 0 {
  1162  		osutil.WriteFile(filepath.Join(dir, "repro.stats"), reproLog)
  1163  	}
  1164  }
  1165  
  1166  func (mgr *Manager) uploadReproAssets(repro *repro.Result) []dashapi.NewAsset {
  1167  	if mgr.assetStorage == nil {
  1168  		return nil
  1169  	}
  1170  
  1171  	ret := []dashapi.NewAsset{}
  1172  	repro.Prog.ForEachAsset(func(name string, typ prog.AssetType, r io.Reader) {
  1173  		dashTyp, ok := map[prog.AssetType]dashapi.AssetType{
  1174  			prog.MountInRepro: dashapi.MountInRepro,
  1175  		}[typ]
  1176  		if !ok {
  1177  			panic("unknown extracted prog asset")
  1178  		}
  1179  		asset, err := mgr.assetStorage.UploadCrashAsset(r, name, dashTyp, nil)
  1180  		if err != nil {
  1181  			log.Logf(1, "processing of the asset %v (%v) failed: %v", name, typ, err)
  1182  			return
  1183  		}
  1184  		ret = append(ret, asset)
  1185  	})
  1186  	return ret
  1187  }
  1188  
  1189  func fullReproLog(stats *repro.Stats) []byte {
  1190  	if stats == nil {
  1191  		return nil
  1192  	}
  1193  	return []byte(fmt.Sprintf("Extracting prog: %v\nMinimizing prog: %v\n"+
  1194  		"Simplifying prog options: %v\nExtracting C: %v\nSimplifying C: %v\n\n\n%s",
  1195  		stats.ExtractProgTime, stats.MinimizeProgTime,
  1196  		stats.SimplifyProgTime, stats.ExtractCTime, stats.SimplifyCTime, stats.Log))
  1197  }
  1198  
  1199  func (mgr *Manager) corpusInputHandler(updates <-chan corpus.NewItemEvent) {
  1200  	for update := range updates {
  1201  		mgr.serv.updateCoverFilter(update.NewCover)
  1202  		if update.Exists {
  1203  			// We only save new progs into the corpus.db file.
  1204  			continue
  1205  		}
  1206  		mgr.corpusDBMu.Lock()
  1207  		mgr.corpusDB.Save(update.Sig, update.ProgData, 0)
  1208  		if err := mgr.corpusDB.Flush(); err != nil {
  1209  			log.Errorf("failed to save corpus database: %v", err)
  1210  		}
  1211  		mgr.corpusDBMu.Unlock()
  1212  	}
  1213  }
  1214  
  1215  func (mgr *Manager) getMinimizedCorpus() (corpus, repros [][]byte) {
  1216  	mgr.mu.Lock()
  1217  	defer mgr.mu.Unlock()
  1218  	mgr.minimizeCorpusLocked()
  1219  	items := mgr.corpus.Items()
  1220  	corpus = make([][]byte, 0, len(items))
  1221  	for _, inp := range items {
  1222  		corpus = append(corpus, inp.ProgData)
  1223  	}
  1224  	repros = mgr.newRepros
  1225  	mgr.newRepros = nil
  1226  	return
  1227  }
  1228  
  1229  func (mgr *Manager) addNewCandidates(candidates []fuzzer.Candidate) {
  1230  	if mgr.cfg.Experimental.ResetAccState {
  1231  		// Don't accept new candidates -- the execution is already very slow,
  1232  		// syz-hub will just overwhelm us.
  1233  		return
  1234  	}
  1235  	mgr.fuzzer.Load().AddCandidates(candidates)
  1236  	mgr.mu.Lock()
  1237  	defer mgr.mu.Unlock()
  1238  	if mgr.phase == phaseTriagedCorpus {
  1239  		mgr.phase = phaseQueriedHub
  1240  	}
  1241  }
  1242  
  1243  func (mgr *Manager) minimizeCorpusLocked() {
  1244  	currSize := mgr.corpus.StatProgs.Val()
  1245  	if currSize <= mgr.lastMinCorpus*103/100 {
  1246  		return
  1247  	}
  1248  	mgr.corpus.Minimize(mgr.cfg.Cover)
  1249  	newSize := mgr.corpus.StatProgs.Val()
  1250  
  1251  	log.Logf(1, "minimized corpus: %v -> %v", currSize, newSize)
  1252  	mgr.lastMinCorpus = newSize
  1253  
  1254  	// From time to time we get corpus explosion due to different reason:
  1255  	// generic bugs, per-OS bugs, problems with fallback coverage, kcov bugs, etc.
  1256  	// This has bad effect on the instance and especially on instances
  1257  	// connected via hub. Do some per-syscall sanity checking to prevent this.
  1258  	for call, info := range mgr.corpus.CallCover() {
  1259  		if mgr.cfg.Cover {
  1260  			// If we have less than 1K inputs per this call,
  1261  			// accept all new inputs unconditionally.
  1262  			if info.Count < 1000 {
  1263  				continue
  1264  			}
  1265  			// If we have more than 3K already, don't accept any more.
  1266  			// Between 1K and 3K look at amount of coverage we are getting from these programs.
  1267  			// Empirically, real coverage for the most saturated syscalls is ~30-60
  1268  			// per program (even when we have a thousand of them). For explosion
  1269  			// case coverage tend to be much lower (~0.3-5 per program).
  1270  			if info.Count < 3000 && len(info.Cover)/info.Count >= 10 {
  1271  				continue
  1272  			}
  1273  		} else {
  1274  			// If we don't have real coverage, signal is weak.
  1275  			// If we have more than several hundreds, there is something wrong.
  1276  			if info.Count < 300 {
  1277  				continue
  1278  			}
  1279  		}
  1280  		if mgr.saturatedCalls[call] {
  1281  			continue
  1282  		}
  1283  		mgr.saturatedCalls[call] = true
  1284  		log.Logf(0, "coverage for %v has saturated, not accepting more inputs", call)
  1285  	}
  1286  
  1287  	// Don't minimize persistent corpus until fuzzers have triaged all inputs from it.
  1288  	if mgr.phase < phaseTriagedCorpus {
  1289  		return
  1290  	}
  1291  	mgr.corpusDBMu.Lock()
  1292  	defer mgr.corpusDBMu.Unlock()
  1293  	for key := range mgr.corpusDB.Records {
  1294  		ok1 := mgr.corpus.Item(key) != nil
  1295  		_, ok2 := mgr.disabledHashes[key]
  1296  		if !ok1 && !ok2 {
  1297  			mgr.corpusDB.Delete(key)
  1298  		}
  1299  	}
  1300  	mgr.corpusDB.BumpVersion(currentDBVersion)
  1301  }
  1302  
  1303  func setGuiltyFiles(crash *dashapi.Crash, report *report.Report) {
  1304  	if report.GuiltyFile != "" {
  1305  		crash.GuiltyFiles = []string{report.GuiltyFile}
  1306  	}
  1307  }
  1308  
  1309  func (mgr *Manager) collectSyscallInfo() map[string]*corpus.CallCov {
  1310  	mgr.mu.Lock()
  1311  	enabledSyscalls := mgr.targetEnabledSyscalls
  1312  	mgr.mu.Unlock()
  1313  
  1314  	if enabledSyscalls == nil {
  1315  		return nil
  1316  	}
  1317  	calls := mgr.corpus.CallCover()
  1318  	// Add enabled, but not yet covered calls.
  1319  	for call := range enabledSyscalls {
  1320  		if calls[call.Name] == nil {
  1321  			calls[call.Name] = new(corpus.CallCov)
  1322  		}
  1323  	}
  1324  	return calls
  1325  }
  1326  
  1327  func (mgr *Manager) currentBugFrames() BugFrames {
  1328  	mgr.mu.Lock()
  1329  	defer mgr.mu.Unlock()
  1330  	frames := BugFrames{
  1331  		memoryLeaks: make([]string, 0, len(mgr.memoryLeakFrames)),
  1332  		dataRaces:   make([]string, 0, len(mgr.dataRaceFrames)),
  1333  	}
  1334  	for frame := range mgr.memoryLeakFrames {
  1335  		frames.memoryLeaks = append(frames.memoryLeaks, frame)
  1336  	}
  1337  	for frame := range mgr.dataRaceFrames {
  1338  		frames.dataRaces = append(frames.dataRaces, frame)
  1339  	}
  1340  	return frames
  1341  }
  1342  
  1343  func (mgr *Manager) machineChecked(features flatrpc.Feature, enabledSyscalls map[*prog.Syscall]bool,
  1344  	opts ipc.ExecOpts) queue.Source {
  1345  	mgr.mu.Lock()
  1346  	defer mgr.mu.Unlock()
  1347  	if mgr.checkDone {
  1348  		panic("machineChecked() called twice")
  1349  	}
  1350  	mgr.checkDone = true
  1351  	mgr.enabledFeatures = features
  1352  	mgr.targetEnabledSyscalls = enabledSyscalls
  1353  	statSyscalls := stats.Create("syscalls", "Number of enabled syscalls",
  1354  		stats.Simple, stats.NoGraph, stats.Link("/syscalls"))
  1355  	statSyscalls.Add(len(enabledSyscalls))
  1356  
  1357  	rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
  1358  	fuzzerObj := fuzzer.NewFuzzer(context.Background(), &fuzzer.Config{
  1359  		Corpus:         mgr.corpus,
  1360  		BaseOpts:       opts,
  1361  		Coverage:       mgr.cfg.Cover,
  1362  		FaultInjection: features&flatrpc.FeatureFault != 0,
  1363  		Comparisons:    features&flatrpc.FeatureComparisons != 0,
  1364  		Collide:        true,
  1365  		EnabledCalls:   enabledSyscalls,
  1366  		NoMutateCalls:  mgr.cfg.NoMutateCalls,
  1367  		FetchRawCover:  mgr.cfg.RawCover,
  1368  		Logf: func(level int, msg string, args ...interface{}) {
  1369  			if level != 0 {
  1370  				return
  1371  			}
  1372  			log.Logf(level, msg, args...)
  1373  		},
  1374  		NewInputFilter: func(call string) bool {
  1375  			mgr.mu.Lock()
  1376  			defer mgr.mu.Unlock()
  1377  			return !mgr.saturatedCalls[call]
  1378  		},
  1379  	}, rnd, mgr.target)
  1380  	mgr.fuzzer.Store(fuzzerObj)
  1381  
  1382  	mgr.loadCorpus()
  1383  	mgr.firstConnect.Store(time.Now().Unix())
  1384  	go mgr.corpusMinimization()
  1385  	go mgr.fuzzerLoop(fuzzerObj)
  1386  	if mgr.dash != nil {
  1387  		go mgr.dashboardReporter()
  1388  		if mgr.cfg.Reproduce {
  1389  			go mgr.dashboardReproTasks()
  1390  		}
  1391  	}
  1392  	return fuzzerObj
  1393  }
  1394  
  1395  func (mgr *Manager) corpusMinimization() {
  1396  	for range time.NewTicker(time.Minute).C {
  1397  		mgr.mu.Lock()
  1398  		mgr.minimizeCorpusLocked()
  1399  		mgr.mu.Unlock()
  1400  	}
  1401  }
  1402  
  1403  func (mgr *Manager) fuzzerSignalRotation() {
  1404  	const (
  1405  		rotateSignals      = 1000
  1406  		timeBetweenRotates = 15 * time.Minute
  1407  		// Every X dropped signals may in the worst case lead up to 3 * X
  1408  		// additional triage executions, which is in this case constitutes
  1409  		// 3000/60000 = 5%.
  1410  		execsBetweenRotates = 60000
  1411  	)
  1412  	lastExecTotal := 0
  1413  	lastRotation := time.Now()
  1414  	for range time.NewTicker(5 * time.Minute).C {
  1415  		if mgr.statExecs.Val()-lastExecTotal < execsBetweenRotates {
  1416  			continue
  1417  		}
  1418  		if time.Since(lastRotation) < timeBetweenRotates {
  1419  			continue
  1420  		}
  1421  		mgr.fuzzer.Load().RotateMaxSignal(rotateSignals)
  1422  		lastRotation = time.Now()
  1423  		lastExecTotal = mgr.statExecs.Val()
  1424  	}
  1425  }
  1426  
  1427  func (mgr *Manager) fuzzerLoop(fuzzer *fuzzer.Fuzzer) {
  1428  	for ; ; time.Sleep(time.Second / 2) {
  1429  		// Distribute new max signal over all instances.
  1430  		newSignal, dropSignal := fuzzer.Cover.GrabSignalDelta()
  1431  		log.Logf(2, "distributing %d new signal, %d dropped signal",
  1432  			len(newSignal), len(dropSignal))
  1433  		if len(newSignal)+len(dropSignal) != 0 {
  1434  			mgr.serv.distributeSignalDelta(newSignal, dropSignal)
  1435  		}
  1436  
  1437  		// Update the state machine.
  1438  		if fuzzer.StatCandidates.Val() == 0 {
  1439  			mgr.mu.Lock()
  1440  			if mgr.phase == phaseLoadedCorpus {
  1441  				go mgr.fuzzerSignalRotation()
  1442  				if mgr.cfg.HubClient != "" {
  1443  					mgr.phase = phaseTriagedCorpus
  1444  					go mgr.hubSyncLoop(pickGetter(mgr.cfg.HubKey))
  1445  				} else {
  1446  					mgr.phase = phaseTriagedHub
  1447  				}
  1448  			} else if mgr.phase == phaseQueriedHub {
  1449  				mgr.phase = phaseTriagedHub
  1450  			}
  1451  			mgr.mu.Unlock()
  1452  		}
  1453  	}
  1454  }
  1455  
  1456  func (mgr *Manager) hubIsUnreachable() {
  1457  	var dash *dashapi.Dashboard
  1458  	mgr.mu.Lock()
  1459  	if mgr.phase == phaseTriagedCorpus {
  1460  		dash = mgr.dash
  1461  		mgr.phase = phaseTriagedHub
  1462  		log.Errorf("did not manage to connect to syz-hub; moving forward")
  1463  	}
  1464  	mgr.mu.Unlock()
  1465  	if dash != nil {
  1466  		mgr.dash.LogError(mgr.cfg.Name, "did not manage to connect to syz-hub")
  1467  	}
  1468  }
  1469  
  1470  func (mgr *Manager) collectUsedFiles() {
  1471  	if mgr.vmPool == nil {
  1472  		return
  1473  	}
  1474  	addUsedFile := func(f string) {
  1475  		if f == "" {
  1476  			return
  1477  		}
  1478  		stat, err := os.Stat(f)
  1479  		if err != nil {
  1480  			log.Fatalf("failed to stat %v: %v", f, err)
  1481  		}
  1482  		mgr.usedFiles[f] = stat.ModTime()
  1483  	}
  1484  	cfg := mgr.cfg
  1485  	addUsedFile(cfg.FuzzerBin)
  1486  	addUsedFile(cfg.ExecprogBin)
  1487  	addUsedFile(cfg.ExecutorBin)
  1488  	addUsedFile(cfg.SSHKey)
  1489  	if vmlinux := filepath.Join(cfg.KernelObj, mgr.sysTarget.KernelObject); osutil.IsExist(vmlinux) {
  1490  		addUsedFile(vmlinux)
  1491  	}
  1492  	if cfg.Image != "9p" {
  1493  		addUsedFile(cfg.Image)
  1494  	}
  1495  }
  1496  
  1497  func (mgr *Manager) checkUsedFiles() {
  1498  	for f, mod := range mgr.usedFiles {
  1499  		stat, err := os.Stat(f)
  1500  		if err != nil {
  1501  			log.Fatalf("failed to stat %v: %v", f, err)
  1502  		}
  1503  		if mod != stat.ModTime() {
  1504  			log.Fatalf("file %v that syz-manager uses has been modified by an external program\n"+
  1505  				"this can lead to arbitrary syz-manager misbehavior\n"+
  1506  				"modification time has changed: %v -> %v\n"+
  1507  				"don't modify files that syz-manager uses. exiting to prevent harm",
  1508  				f, mod, stat.ModTime())
  1509  		}
  1510  	}
  1511  }
  1512  
  1513  func (mgr *Manager) dashboardReporter() {
  1514  	webAddr := publicWebAddr(mgr.cfg.HTTP)
  1515  	triageInfoSent := false
  1516  	var lastFuzzingTime time.Duration
  1517  	var lastCrashes, lastSuppressedCrashes, lastExecs uint64
  1518  	for range time.NewTicker(time.Minute).C {
  1519  		mgr.mu.Lock()
  1520  		req := &dashapi.ManagerStatsReq{
  1521  			Name:              mgr.cfg.Name,
  1522  			Addr:              webAddr,
  1523  			UpTime:            time.Duration(mgr.statUptime.Val()) * time.Second,
  1524  			Corpus:            uint64(mgr.corpus.StatProgs.Val()),
  1525  			PCs:               uint64(mgr.corpus.StatCover.Val()),
  1526  			Cover:             uint64(mgr.corpus.StatSignal.Val()),
  1527  			CrashTypes:        uint64(mgr.statCrashTypes.Val()),
  1528  			FuzzingTime:       time.Duration(mgr.statFuzzingTime.Val()) - lastFuzzingTime,
  1529  			Crashes:           uint64(mgr.statCrashes.Val()) - lastCrashes,
  1530  			SuppressedCrashes: uint64(mgr.statSuppressed.Val()) - lastSuppressedCrashes,
  1531  			Execs:             uint64(mgr.statExecs.Val()) - lastExecs,
  1532  		}
  1533  		if mgr.phase >= phaseTriagedCorpus && !triageInfoSent {
  1534  			triageInfoSent = true
  1535  			req.TriagedCoverage = uint64(mgr.corpus.StatSignal.Val())
  1536  			req.TriagedPCs = uint64(mgr.corpus.StatCover.Val())
  1537  		}
  1538  		mgr.mu.Unlock()
  1539  
  1540  		if err := mgr.dash.UploadManagerStats(req); err != nil {
  1541  			log.Logf(0, "failed to upload dashboard stats: %v", err)
  1542  			continue
  1543  		}
  1544  		mgr.mu.Lock()
  1545  		lastFuzzingTime += req.FuzzingTime
  1546  		lastCrashes += req.Crashes
  1547  		lastSuppressedCrashes += req.SuppressedCrashes
  1548  		lastExecs += req.Execs
  1549  		mgr.mu.Unlock()
  1550  	}
  1551  }
  1552  
  1553  func (mgr *Manager) dashboardReproTasks() {
  1554  	for range time.NewTicker(20 * time.Minute).C {
  1555  		needReproReply := make(chan bool)
  1556  		mgr.needMoreRepros <- needReproReply
  1557  		if !<-needReproReply {
  1558  			// We don't need reproducers at the moment.
  1559  			continue
  1560  		}
  1561  		resp, err := mgr.dash.LogToRepro(&dashapi.LogToReproReq{BuildID: mgr.cfg.Tag})
  1562  		if err != nil {
  1563  			log.Logf(0, "failed to query logs to reproduce: %v", err)
  1564  			continue
  1565  		}
  1566  		if len(resp.CrashLog) > 0 {
  1567  			mgr.externalReproQueue <- &Crash{
  1568  				fromDashboard: true,
  1569  				Report: &report.Report{
  1570  					Title:  resp.Title,
  1571  					Output: resp.CrashLog,
  1572  				},
  1573  			}
  1574  		}
  1575  	}
  1576  }
  1577  
  1578  func publicWebAddr(addr string) string {
  1579  	_, port, err := net.SplitHostPort(addr)
  1580  	if err == nil && port != "" {
  1581  		if host, err := os.Hostname(); err == nil {
  1582  			addr = net.JoinHostPort(host, port)
  1583  		}
  1584  		if GCE, err := gce.NewContext(""); err == nil {
  1585  			addr = net.JoinHostPort(GCE.ExternalIP, port)
  1586  		}
  1587  	}
  1588  	return "http://" + addr
  1589  }