github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/syz-manager/manager.go (about) 1 // Copyright 2015 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package main 5 6 import ( 7 "bytes" 8 "context" 9 "encoding/json" 10 "errors" 11 "flag" 12 "fmt" 13 "io" 14 "math/rand" 15 "net" 16 "os" 17 "os/exec" 18 "path" 19 "path/filepath" 20 "sort" 21 "sync" 22 "sync/atomic" 23 "time" 24 25 "github.com/google/syzkaller/dashboard/dashapi" 26 "github.com/google/syzkaller/pkg/asset" 27 "github.com/google/syzkaller/pkg/corpus" 28 "github.com/google/syzkaller/pkg/db" 29 "github.com/google/syzkaller/pkg/flatrpc" 30 "github.com/google/syzkaller/pkg/fuzzer" 31 "github.com/google/syzkaller/pkg/fuzzer/queue" 32 "github.com/google/syzkaller/pkg/gce" 33 "github.com/google/syzkaller/pkg/ifaceprobe" 34 "github.com/google/syzkaller/pkg/image" 35 "github.com/google/syzkaller/pkg/kfuzztest" 36 "github.com/google/syzkaller/pkg/log" 37 "github.com/google/syzkaller/pkg/manager" 38 "github.com/google/syzkaller/pkg/mgrconfig" 39 "github.com/google/syzkaller/pkg/osutil" 40 "github.com/google/syzkaller/pkg/report" 41 crash_pkg "github.com/google/syzkaller/pkg/report/crash" 42 "github.com/google/syzkaller/pkg/repro" 43 "github.com/google/syzkaller/pkg/rpcserver" 44 "github.com/google/syzkaller/pkg/runtest" 45 "github.com/google/syzkaller/pkg/signal" 46 "github.com/google/syzkaller/pkg/stat" 47 "github.com/google/syzkaller/pkg/vminfo" 48 "github.com/google/syzkaller/prog" 49 "github.com/google/syzkaller/sys/targets" 50 "github.com/google/syzkaller/vm" 51 "github.com/google/syzkaller/vm/dispatcher" 52 ) 53 54 var ( 55 flagConfig = flag.String("config", "", "configuration file") 56 flagDebug = flag.Bool("debug", false, "dump all VM output to console") 57 flagBench = flag.String("bench", "", "write execution statistics into this file periodically") 58 flagMode = flag.String("mode", ModeFuzzing.Name, modesDescription()) 59 flagTests = flag.String("tests", "", "prefix to match test file names (for -mode run-tests)") 60 ) 61 62 type Manager struct { 63 cfg *mgrconfig.Config 64 mode *Mode 65 vmPool *vm.Pool 66 pool *vm.Dispatcher 67 target *prog.Target 68 sysTarget *targets.Target 69 reporter *report.Reporter 70 crashStore *manager.CrashStore 71 serv rpcserver.Server 72 http *manager.HTTPServer 73 servStats rpcserver.Stats 74 corpus *corpus.Corpus 75 corpusDB *db.DB 76 corpusDBMu sync.Mutex // for concurrent operations on corpusDB 77 corpusPreload chan []fuzzer.Candidate 78 firstConnect atomic.Int64 // unix time, or 0 if not connected 79 crashTypes map[string]bool 80 enabledFeatures flatrpc.Feature 81 checkDone atomic.Bool 82 reportGenerator *manager.ReportGeneratorWrapper 83 fresh bool 84 coverFilters manager.CoverageFilters 85 86 dash *dashapi.Dashboard 87 // This is specifically separated from dash, so that we can keep dash = nil when 88 // cfg.DashboardOnlyRepro is set, so that we don't accidentially use dash for anything. 89 dashRepro *dashapi.Dashboard 90 91 mu sync.Mutex 92 fuzzer atomic.Pointer[fuzzer.Fuzzer] 93 snapshotSource *queue.Distributor 94 phase int 95 96 disabledHashes map[string]struct{} 97 newRepros [][]byte 98 lastMinCorpus int 99 memoryLeakFrames map[string]bool 100 dataRaceFrames map[string]bool 101 saturatedCalls map[string]bool 102 103 externalReproQueue chan *manager.Crash 104 crashes chan *manager.Crash 105 106 benchMu sync.Mutex 107 benchFile *os.File 108 109 assetStorage *asset.Storage 110 fsckChecker image.FsckChecker 111 112 reproLoop *manager.ReproLoop 113 114 Stats 115 } 116 117 type Mode struct { 118 Name string 119 Description string 120 UseDashboard bool // the mode connects to dashboard/hub 121 LoadCorpus bool // the mode needs to load the corpus 122 ExitAfterMachineCheck bool // exit with 0 status when machine check is done 123 // Exit with non-zero status and save the report to workdir/report.json if any kernel crash happens. 124 FailOnCrashes bool 125 CheckConfig func(cfg *mgrconfig.Config) error 126 } 127 128 var ( 129 ModeFuzzing = &Mode{ 130 Name: "fuzzing", 131 Description: `the default continuous fuzzing mode`, 132 UseDashboard: true, 133 LoadCorpus: true, 134 } 135 ModeSmokeTest = &Mode{ 136 Name: "smoke-test", 137 Description: `run smoke test for syzkaller+kernel 138 The test consists of booting VMs and running some simple test programs 139 to ensure that fuzzing can proceed in general. After completing the test 140 the process exits and the exit status indicates success/failure. 141 If the kernel oopses during testing, the report is saved to workdir/report.json.`, 142 ExitAfterMachineCheck: true, 143 FailOnCrashes: true, 144 } 145 ModeCorpusTriage = &Mode{ 146 Name: "corpus-triage", 147 Description: `triage corpus and exit 148 This is useful mostly for benchmarking with testbed.`, 149 LoadCorpus: true, 150 } 151 ModeCorpusRun = &Mode{ 152 Name: "corpus-run", 153 Description: `continuously run the corpus programs`, 154 LoadCorpus: true, 155 } 156 ModeRunTests = &Mode{ 157 Name: "run-tests", 158 Description: `run unit tests 159 Run sys/os/test/* tests in various modes and print results.`, 160 } 161 ModeIfaceProbe = &Mode{ 162 Name: "iface-probe", 163 Description: `run dynamic part of kernel interface auto-extraction 164 When the probe is finished, manager writes the result to workdir/interfaces.json file and exits.`, 165 CheckConfig: func(cfg *mgrconfig.Config) error { 166 if cfg.Snapshot { 167 return fmt.Errorf("snapshot mode is not supported") 168 } 169 if cfg.Sandbox != "none" { 170 return fmt.Errorf("sandbox \"%v\" is not supported (only \"none\")", cfg.Sandbox) 171 } 172 if !cfg.Cover { 173 return fmt.Errorf("coverage is required") 174 } 175 return nil 176 }, 177 } 178 179 modes = []*Mode{ 180 ModeFuzzing, 181 ModeSmokeTest, 182 ModeCorpusTriage, 183 ModeCorpusRun, 184 ModeRunTests, 185 ModeIfaceProbe, 186 } 187 ) 188 189 func modesDescription() string { 190 desc := "mode of operation, one of:\n" 191 for _, mode := range modes { 192 desc += fmt.Sprintf(" - %v: %v\n", mode.Name, mode.Description) 193 } 194 return desc 195 } 196 197 const ( 198 // Just started, nothing done yet. 199 phaseInit = iota 200 // Corpus is loaded and machine is checked. 201 phaseLoadedCorpus 202 // Triaged all inputs from corpus. 203 // This is when we start querying hub and minimizing persistent corpus. 204 phaseTriagedCorpus 205 // Done the first request to hub. 206 phaseQueriedHub 207 // Triaged all new inputs from hub. 208 // This is when we start reproducing crashes. 209 phaseTriagedHub 210 ) 211 212 func main() { 213 flag.Parse() 214 if !prog.GitRevisionKnown() { 215 log.Fatalf("bad syz-manager build: build with make, run bin/syz-manager") 216 } 217 log.EnableLogCaching(1000, 1<<20) 218 cfg, err := mgrconfig.LoadFile(*flagConfig) 219 if err != nil { 220 log.Fatalf("%v", err) 221 } 222 if cfg.DashboardAddr != "" { 223 // This lets better distinguish logs of individual syz-manager instances. 224 log.SetName(cfg.Name) 225 } 226 var mode *Mode 227 for _, m := range modes { 228 if *flagMode == m.Name { 229 mode = m 230 break 231 } 232 } 233 if mode == nil { 234 flag.PrintDefaults() 235 log.Fatalf("unknown mode: %v", *flagMode) 236 } 237 if mode.CheckConfig != nil { 238 if err := mode.CheckConfig(cfg); err != nil { 239 log.Fatalf("%v mode: %v", mode.Name, err) 240 } 241 } 242 if !mode.UseDashboard { 243 cfg.DashboardClient = "" 244 cfg.HubClient = "" 245 } 246 if cfg.Experimental.EnableKFuzzTest { 247 vmLinuxPath := path.Join(cfg.KernelObj, cfg.SysTarget.KernelObject) 248 log.Log(0, "enabling KFuzzTest targets") 249 _, err := kfuzztest.ActivateKFuzzTargets(cfg.Target, vmLinuxPath) 250 if err != nil { 251 log.Fatalf("failed to enable KFuzzTest targets: %v", err) 252 } 253 } 254 RunManager(mode, cfg) 255 } 256 257 func RunManager(mode *Mode, cfg *mgrconfig.Config) { 258 var vmPool *vm.Pool 259 if !cfg.VMLess { 260 var err error 261 vmPool, err = vm.Create(cfg, *flagDebug) 262 if err != nil { 263 log.Fatalf("%v", err) 264 } 265 defer vmPool.Close() 266 } 267 268 osutil.MkdirAll(cfg.Workdir) 269 270 reporter, err := report.NewReporter(cfg) 271 if err != nil { 272 log.Fatalf("%v", err) 273 } 274 275 mgr := &Manager{ 276 cfg: cfg, 277 mode: mode, 278 vmPool: vmPool, 279 corpusPreload: make(chan []fuzzer.Candidate), 280 target: cfg.Target, 281 sysTarget: cfg.SysTarget, 282 reporter: reporter, 283 crashStore: manager.NewCrashStore(cfg), 284 crashTypes: make(map[string]bool), 285 disabledHashes: make(map[string]struct{}), 286 memoryLeakFrames: make(map[string]bool), 287 dataRaceFrames: make(map[string]bool), 288 fresh: true, 289 externalReproQueue: make(chan *manager.Crash, 10), 290 crashes: make(chan *manager.Crash, 10), 291 saturatedCalls: make(map[string]bool), 292 reportGenerator: manager.ReportGeneratorCache(cfg), 293 } 294 if *flagDebug { 295 mgr.cfg.Procs = 1 296 } 297 mgr.http = &manager.HTTPServer{ 298 // Note that if cfg.HTTP == "", we don't start the server. 299 Cfg: cfg, 300 StartTime: time.Now(), 301 CrashStore: mgr.crashStore, 302 } 303 304 mgr.initStats() 305 if mgr.mode.LoadCorpus { 306 go mgr.preloadCorpus() 307 } else { 308 close(mgr.corpusPreload) 309 } 310 311 // Create RPC server for fuzzers. 312 mgr.servStats = rpcserver.NewStats() 313 rpcCfg := &rpcserver.RemoteConfig{ 314 Config: mgr.cfg, 315 Manager: mgr, 316 Stats: mgr.servStats, 317 Debug: *flagDebug, 318 } 319 mgr.serv, err = rpcserver.New(rpcCfg) 320 if err != nil { 321 log.Fatalf("failed to create rpc server: %v", err) 322 } 323 if err := mgr.serv.Listen(); err != nil { 324 log.Fatalf("failed to start rpc server: %v", err) 325 } 326 ctx := vm.ShutdownCtx() 327 go func() { 328 err := mgr.serv.Serve(ctx) 329 if err != nil { 330 log.Fatalf("%s", err) 331 } 332 }() 333 log.Logf(0, "serving rpc on tcp://%v", mgr.serv.Port()) 334 335 if cfg.DashboardAddr != "" { 336 opts := []dashapi.DashboardOpts{} 337 if cfg.DashboardUserAgent != "" { 338 opts = append(opts, dashapi.UserAgent(cfg.DashboardUserAgent)) 339 } 340 dash, err := dashapi.New(cfg.DashboardClient, cfg.DashboardAddr, cfg.DashboardKey, opts...) 341 if err != nil { 342 log.Fatalf("failed to create dashapi connection: %v", err) 343 } 344 mgr.dashRepro = dash 345 if !cfg.DashboardOnlyRepro { 346 mgr.dash = dash 347 } 348 } 349 350 if !cfg.AssetStorage.IsEmpty() { 351 mgr.assetStorage, err = asset.StorageFromConfig(cfg.AssetStorage, mgr.dash) 352 if err != nil { 353 log.Fatalf("failed to init asset storage: %v", err) 354 } 355 } 356 357 if *flagBench != "" { 358 mgr.initBench() 359 } 360 361 go mgr.heartbeatLoop() 362 if mgr.mode != ModeSmokeTest { 363 osutil.HandleInterrupts(vm.Shutdown) 364 } 365 if mgr.vmPool == nil { 366 log.Logf(0, "no VMs started (type=none)") 367 log.Logf(0, "you are supposed to start syz-executor manually as:") 368 log.Logf(0, "syz-executor runner local manager.ip %v", mgr.serv.Port()) 369 <-vm.Shutdown 370 return 371 } 372 mgr.pool = vm.NewDispatcher(mgr.vmPool, mgr.fuzzerInstance) 373 mgr.http.Pool = mgr.pool 374 reproVMs := max(0, mgr.vmPool.Count()-mgr.cfg.FuzzingVMs) 375 mgr.reproLoop = manager.NewReproLoop(mgr, reproVMs, mgr.cfg.DashboardOnlyRepro) 376 mgr.http.ReproLoop = mgr.reproLoop 377 mgr.http.TogglePause = mgr.pool.TogglePause 378 379 if mgr.cfg.HTTP != "" { 380 go func() { 381 err := mgr.http.Serve(ctx) 382 if err != nil { 383 log.Fatalf("failed to serve HTTP: %v", err) 384 } 385 }() 386 } 387 go mgr.trackUsedFiles() 388 go mgr.processFuzzingResults(ctx) 389 mgr.pool.Loop(ctx) 390 } 391 392 // Exit successfully in special operation modes. 393 func (mgr *Manager) exit(reason string) { 394 log.Logf(0, "%v finished, shutting down...", reason) 395 mgr.writeBench() 396 close(vm.Shutdown) 397 time.Sleep(10 * time.Second) 398 os.Exit(0) 399 } 400 401 func (mgr *Manager) heartbeatLoop() { 402 lastTime := time.Now() 403 for now := range time.NewTicker(10 * time.Second).C { 404 diff := int(now.Sub(lastTime)) 405 lastTime = now 406 if mgr.firstConnect.Load() == 0 { 407 continue 408 } 409 mgr.statFuzzingTime.Add(diff * mgr.servStats.StatNumFuzzing.Val()) 410 buf := new(bytes.Buffer) 411 for _, stat := range stat.Collect(stat.Console) { 412 fmt.Fprintf(buf, "%v=%v ", stat.Name, stat.Value) 413 } 414 log.Logf(0, "%s", buf.String()) 415 } 416 } 417 418 func (mgr *Manager) initBench() { 419 f, err := os.OpenFile(*flagBench, os.O_WRONLY|os.O_CREATE|os.O_EXCL, osutil.DefaultFilePerm) 420 if err != nil { 421 log.Fatalf("failed to open bench file: %v", err) 422 } 423 mgr.benchFile = f 424 go func() { 425 for range time.NewTicker(time.Minute).C { 426 mgr.writeBench() 427 } 428 }() 429 } 430 431 func (mgr *Manager) writeBench() { 432 if mgr.benchFile == nil { 433 return 434 } 435 mgr.benchMu.Lock() 436 defer mgr.benchMu.Unlock() 437 vals := make(map[string]int) 438 for _, stat := range stat.Collect(stat.All) { 439 vals[stat.Name] = stat.V 440 } 441 data, err := json.MarshalIndent(vals, "", " ") 442 if err != nil { 443 log.Fatalf("failed to serialize bench data") 444 } 445 if _, err := mgr.benchFile.Write(append(data, '\n')); err != nil { 446 log.Fatalf("failed to write bench data") 447 } 448 } 449 450 func (mgr *Manager) processFuzzingResults(ctx context.Context) { 451 for { 452 select { 453 case <-ctx.Done(): 454 return 455 case crash := <-mgr.crashes: 456 needRepro := mgr.saveCrash(crash) 457 if mgr.cfg.Reproduce && needRepro { 458 mgr.reproLoop.Enqueue(crash) 459 } 460 case err := <-mgr.pool.BootErrors: 461 crash := mgr.convertBootError(err) 462 if crash != nil { 463 mgr.saveCrash(crash) 464 } 465 case crash := <-mgr.externalReproQueue: 466 if mgr.NeedRepro(crash) { 467 mgr.reproLoop.Enqueue(crash) 468 } 469 } 470 } 471 } 472 473 func (mgr *Manager) convertBootError(err error) *manager.Crash { 474 var bootErr vm.BootErrorer 475 if errors.As(err, &bootErr) { 476 title, output := bootErr.BootError() 477 rep := mgr.reporter.Parse(output) 478 if rep != nil && rep.Type == crash_pkg.UnexpectedReboot { 479 // Avoid detecting any boot crash as "unexpected kernel reboot". 480 rep = mgr.reporter.ParseFrom(output, rep.SkipPos) 481 } 482 if rep == nil { 483 rep = &report.Report{ 484 Title: title, 485 Output: output, 486 } 487 } 488 return &manager.Crash{ 489 Report: rep, 490 } 491 } 492 return nil 493 } 494 495 func reportReproError(err error) { 496 shutdown := false 497 select { 498 case <-vm.Shutdown: 499 shutdown = true 500 default: 501 } 502 503 if errors.Is(err, repro.ErrEmptyCrashLog) { 504 // The kernel could have crashed before we executed any programs. 505 log.Logf(0, "repro failed: %v", err) 506 return 507 } else if errors.Is(err, repro.ErrNoVMs) || errors.Is(err, context.Canceled) { 508 // This error is to be expected if we're shutting down. 509 if shutdown { 510 return 511 } 512 } 513 // Report everything else as errors. 514 log.Errorf("repro failed: %v", err) 515 } 516 517 func (mgr *Manager) RunRepro(ctx context.Context, crash *manager.Crash) *manager.ReproResult { 518 res, stats, err := repro.Run(ctx, crash.Output, repro.Environment{ 519 Config: mgr.cfg, 520 Features: mgr.enabledFeatures, 521 Reporter: mgr.reporter, 522 Pool: mgr.pool, 523 }) 524 ret := &manager.ReproResult{ 525 Crash: crash, 526 Repro: res, 527 Stats: stats, 528 Err: err, 529 } 530 if err == nil && res != nil && mgr.cfg.StraceBin != "" { 531 const straceAttempts = 2 532 for i := 1; i <= straceAttempts; i++ { 533 strace := repro.RunStrace(res, mgr.cfg, mgr.reporter, mgr.pool) 534 sameBug := strace.IsSameBug(res) 535 log.Logf(0, "strace run attempt %d/%d for '%s': same bug %v, error %v", 536 i, straceAttempts, res.Report.Title, sameBug, strace.Error) 537 // We only want to save strace output if it resulted in the same bug. 538 // Otherwise, it will be hard to reproduce on syzbot and will confuse users. 539 if sameBug { 540 ret.Strace = strace 541 break 542 } 543 } 544 } 545 546 mgr.processRepro(ret) 547 548 return ret 549 } 550 551 func (mgr *Manager) processRepro(res *manager.ReproResult) { 552 if res.Err != nil { 553 reportReproError(res.Err) 554 } 555 if res.Repro == nil { 556 if res.Crash.Title == "" { 557 log.Logf(1, "repro '%v' not from dashboard, so not reporting the failure", 558 res.Crash.FullTitle()) 559 } else { 560 log.Logf(1, "report repro failure of '%v'", res.Crash.Title) 561 mgr.saveFailedRepro(res.Crash.Report, res.Stats) 562 } 563 } else { 564 mgr.saveRepro(res) 565 } 566 } 567 568 func (mgr *Manager) preloadCorpus() { 569 info, err := manager.LoadSeeds(mgr.cfg, false) 570 if err != nil { 571 log.Fatalf("failed to load corpus: %v", err) 572 } 573 mgr.fresh = info.Fresh 574 mgr.corpusDB = info.CorpusDB 575 mgr.corpusPreload <- info.Candidates 576 } 577 578 func (mgr *Manager) loadCorpus(enabledSyscalls map[*prog.Syscall]bool) []fuzzer.Candidate { 579 ret := manager.FilterCandidates(<-mgr.corpusPreload, enabledSyscalls, true) 580 if mgr.cfg.PreserveCorpus { 581 for _, hash := range ret.ModifiedHashes { 582 // This program contains a disabled syscall. 583 // We won't execute it, but remember its hash so 584 // it is not deleted during minimization. 585 mgr.disabledHashes[hash] = struct{}{} 586 } 587 } 588 // Let's favorize smaller programs, otherwise the poorly minimized ones may overshadow the rest. 589 sort.SliceStable(ret.Candidates, func(i, j int) bool { 590 return len(ret.Candidates[i].Prog.Calls) < len(ret.Candidates[j].Prog.Calls) 591 }) 592 reminimized := ret.ReminimizeSubset() 593 resmashed := ret.ResmashSubset() 594 log.Logf(0, "%-24v: %v (%v seeds), %d to be reminimized, %d to be resmashed", 595 "corpus", len(ret.Candidates), ret.SeedCount, reminimized, resmashed) 596 return ret.Candidates 597 } 598 599 func (mgr *Manager) fuzzerInstance(ctx context.Context, inst *vm.Instance, updInfo dispatcher.UpdateInfo) { 600 mgr.mu.Lock() 601 serv := mgr.serv 602 mgr.mu.Unlock() 603 if serv == nil { 604 // We're in the process of switching off the RPCServer. 605 return 606 } 607 injectExec := make(chan bool, 10) 608 serv.CreateInstance(inst.Index(), injectExec, updInfo) 609 610 reps, vmInfo, err := mgr.runInstanceInner(ctx, inst, 611 vm.WithExitCondition(vm.ExitTimeout), 612 vm.WithInjectExecuting(injectExec), 613 vm.WithEarlyFinishCb(func() { 614 // Depending on the crash type and kernel config, fuzzing may continue 615 // running for several seconds even after kernel has printed a crash report. 616 // This litters the log, and we want to prevent it. 617 serv.StopFuzzing(inst.Index()) 618 })) 619 var extraExecs []report.ExecutorInfo 620 var rep *report.Report 621 if len(reps) != 0 { 622 rep = reps[0] 623 } 624 if rep != nil && rep.Executor != nil { 625 extraExecs = []report.ExecutorInfo{*rep.Executor} 626 } 627 lastExec, machineInfo := serv.ShutdownInstance(inst.Index(), rep != nil, extraExecs...) 628 if rep != nil { 629 rpcserver.PrependExecuting(rep, lastExec) 630 if len(vmInfo) != 0 { 631 machineInfo = append(append(vmInfo, '\n'), machineInfo...) 632 } 633 rep.MachineInfo = machineInfo 634 } 635 if err == nil && rep != nil { 636 mgr.crashes <- &manager.Crash{ 637 InstanceIndex: inst.Index(), 638 Report: rep, 639 TailReports: reps[1:], 640 } 641 } 642 if err != nil { 643 log.Logf(1, "VM %v: failed with error: %v", inst.Index(), err) 644 } 645 } 646 647 func (mgr *Manager) runInstanceInner(ctx context.Context, inst *vm.Instance, opts ...func(*vm.RunOptions), 648 ) ([]*report.Report, []byte, error) { 649 fwdAddr, err := inst.Forward(mgr.serv.Port()) 650 if err != nil { 651 return nil, nil, fmt.Errorf("failed to setup port forwarding: %w", err) 652 } 653 654 // If ExecutorBin is provided, it means that syz-executor is already in the image, 655 // so no need to copy it. 656 executorBin := mgr.sysTarget.ExecutorBin 657 if executorBin == "" { 658 executorBin, err = inst.Copy(mgr.cfg.ExecutorBin) 659 if err != nil { 660 return nil, nil, fmt.Errorf("failed to copy binary: %w", err) 661 } 662 } 663 664 // Run the fuzzer binary. 665 start := time.Now() 666 667 host, port, err := net.SplitHostPort(fwdAddr) 668 if err != nil { 669 return nil, nil, fmt.Errorf("failed to parse manager's address") 670 } 671 cmd := fmt.Sprintf("%v runner %v %v %v", executorBin, inst.Index(), host, port) 672 ctxTimeout, cancel := context.WithTimeout(ctx, mgr.cfg.Timeouts.VMRunningTime) 673 defer cancel() 674 _, reps, err := inst.Run(ctxTimeout, mgr.reporter, cmd, opts...) 675 if err != nil { 676 return nil, nil, fmt.Errorf("failed to run fuzzer: %w", err) 677 } 678 if len(reps) == 0 { 679 // This is the only "OK" outcome. 680 log.Logf(0, "VM %v: running for %v, restarting", inst.Index(), time.Since(start)) 681 return nil, nil, nil 682 } 683 vmInfo, err := inst.Info() 684 if err != nil { 685 vmInfo = []byte(fmt.Sprintf("error getting VM info: %v\n", err)) 686 } 687 return reps, vmInfo, nil 688 } 689 690 func (mgr *Manager) emailCrash(crash *manager.Crash) { 691 if len(mgr.cfg.EmailAddrs) == 0 { 692 return 693 } 694 args := []string{"-s", "syzkaller: " + crash.Title} 695 args = append(args, mgr.cfg.EmailAddrs...) 696 log.Logf(0, "sending email to %v", mgr.cfg.EmailAddrs) 697 698 cmd := exec.Command("mailx", args...) 699 cmd.Stdin = bytes.NewReader(crash.Report.Report) 700 if _, err := osutil.Run(10*time.Minute, cmd); err != nil { 701 log.Logf(0, "failed to send email: %v", err) 702 } 703 } 704 705 func (mgr *Manager) saveCrash(crash *manager.Crash) bool { 706 if err := mgr.reporter.Symbolize(crash.Report); err != nil { 707 log.Errorf("failed to symbolize report: %v", err) 708 } 709 if crash.Type == crash_pkg.MemoryLeak { 710 mgr.mu.Lock() 711 mgr.memoryLeakFrames[crash.Frame] = true 712 mgr.mu.Unlock() 713 } 714 if crash.Type == crash_pkg.KCSANDataRace { 715 mgr.mu.Lock() 716 mgr.dataRaceFrames[crash.Frame] = true 717 mgr.mu.Unlock() 718 } 719 flags := "" 720 if crash.Corrupted { 721 flags += " [corrupted]" 722 } 723 if crash.Suppressed { 724 flags += " [suppressed]" 725 } 726 log.Logf(0, "VM %v: crash: %v%v", crash.InstanceIndex, crash.Report.Title, flags) 727 for i, report := range crash.TailReports { 728 log.Logf(0, "VM %v: crash(tail%d): %v%v", crash.InstanceIndex, i, report.Title, flags) 729 } 730 731 if mgr.mode.FailOnCrashes { 732 path := filepath.Join(mgr.cfg.Workdir, "report.json") 733 if err := osutil.WriteJSON(path, crash.Report); err != nil { 734 log.Fatal(err) 735 } 736 log.Fatalf("kernel crashed in smoke testing mode, exiting") 737 } 738 739 if crash.Suppressed { 740 // Collect all of them into a single bucket so that it's possible to control and assess them, 741 // e.g. if there are some spikes in suppressed reports. 742 crash.Title = "suppressed report" 743 mgr.statSuppressed.Add(1) 744 } 745 746 mgr.statCrashes.Add(1) 747 mgr.mu.Lock() 748 if !mgr.crashTypes[crash.Title] { 749 mgr.crashTypes[crash.Title] = true 750 mgr.statCrashTypes.Add(1) 751 } 752 mgr.mu.Unlock() 753 754 if mgr.dash != nil { 755 if crash.Type == crash_pkg.MemoryLeak { 756 return true 757 } 758 dc := &dashapi.Crash{ 759 BuildID: mgr.cfg.Tag, 760 Title: crash.Title, 761 AltTitles: crash.AltTitles, 762 Corrupted: crash.Corrupted, 763 Suppressed: crash.Suppressed, 764 Recipients: crash.Recipients.ToDash(), 765 Log: crash.Output, 766 Report: report.SplitReportBytes(crash.Report.Report)[0], 767 MachineInfo: crash.MachineInfo, 768 } 769 setGuiltyFiles(dc, crash.Report) 770 resp, err := mgr.dash.ReportCrash(dc) 771 if err != nil { 772 log.Logf(0, "failed to report crash to dashboard: %v", err) 773 } 774 // Don't store the crash locally even if we failed to upload it. 775 // There is 0 chance that one will ever look in the crashes/ folder of those instances. 776 return mgr.cfg.Reproduce && resp.NeedRepro 777 } 778 first, err := mgr.crashStore.SaveCrash(crash) 779 if err != nil { 780 log.Logf(0, "failed to save the cash: %v", err) 781 return false 782 } 783 if first { 784 go mgr.emailCrash(crash) 785 } 786 return mgr.NeedRepro(crash) 787 } 788 789 func (mgr *Manager) needLocalRepro(crash *manager.Crash) bool { 790 if !mgr.cfg.Reproduce || crash.Corrupted || crash.Suppressed { 791 return false 792 } 793 if mgr.crashStore.HasRepro(crash.Title) { 794 return false 795 } 796 return mgr.crashStore.MoreReproAttempts(crash.Title) 797 } 798 799 func (mgr *Manager) NeedRepro(crash *manager.Crash) bool { 800 if !mgr.cfg.Reproduce { 801 return false 802 } 803 if crash.FromHub || crash.FromDashboard { 804 return true 805 } 806 mgr.mu.Lock() 807 phase, features := mgr.phase, mgr.enabledFeatures 808 mgr.mu.Unlock() 809 if phase < phaseLoadedCorpus || (features&flatrpc.FeatureLeak != 0 && 810 crash.Type != crash_pkg.MemoryLeak) { 811 // Leak checking is very slow, don't bother reproducing other crashes on leak instance. 812 return false 813 } 814 if mgr.dashRepro == nil { 815 return mgr.needLocalRepro(crash) 816 } 817 cid := &dashapi.CrashID{ 818 BuildID: mgr.cfg.Tag, 819 Title: crash.Title, 820 Corrupted: crash.Corrupted, 821 Suppressed: crash.Suppressed, 822 // When cfg.DashboardOnlyRepro is enabled, we don't sent any reports to dashboard. 823 // We also don't send leak reports w/o reproducers to dashboard, so they may be missing. 824 MayBeMissing: mgr.dash == nil || crash.Type == crash_pkg.MemoryLeak, 825 } 826 needRepro, err := mgr.dashRepro.NeedRepro(cid) 827 if err != nil { 828 log.Logf(0, "dashboard.NeedRepro failed: %v", err) 829 } 830 return needRepro 831 } 832 833 func truncateReproLog(log []byte) []byte { 834 // Repro logs can get quite large and we have trouble sending large API requests (see #4495). 835 // Let's truncate the log to a 512KB prefix and 512KB suffix. 836 return report.Truncate(log, 512000, 512000) 837 } 838 839 func (mgr *Manager) saveFailedRepro(rep *report.Report, stats *repro.Stats) { 840 reproLog := stats.FullLog() 841 if mgr.dash != nil { 842 if rep.Type == crash_pkg.MemoryLeak { 843 // Don't send failed leak repro attempts to dashboard 844 // as we did not send the crash itself. 845 log.Logf(1, "failed repro of '%v': not sending because of the memleak type", rep.Title) 846 return 847 } 848 cid := &dashapi.CrashID{ 849 BuildID: mgr.cfg.Tag, 850 Title: rep.Title, 851 Corrupted: rep.Corrupted, 852 Suppressed: rep.Suppressed, 853 MayBeMissing: rep.Type == crash_pkg.MemoryLeak, 854 ReproLog: truncateReproLog(reproLog), 855 } 856 if err := mgr.dash.ReportFailedRepro(cid); err != nil { 857 log.Logf(0, "failed to report failed repro to dashboard (log size %d): %v", 858 len(reproLog), err) 859 } 860 return 861 } 862 err := mgr.crashStore.SaveFailedRepro(rep.Title, reproLog) 863 if err != nil { 864 log.Logf(0, "failed to save repro log for %q: %v", rep.Title, err) 865 } 866 } 867 868 func (mgr *Manager) saveRepro(res *manager.ReproResult) { 869 repro := res.Repro 870 opts := fmt.Sprintf("# %+v\n", repro.Opts) 871 progText := repro.Prog.Serialize() 872 873 // Append this repro to repro list to send to hub if it didn't come from hub originally. 874 if !res.Crash.FromHub { 875 progForHub := []byte(fmt.Sprintf("# %+v\n# %v\n# %v\n%s", 876 repro.Opts, repro.Report.Title, mgr.cfg.Tag, progText)) 877 mgr.mu.Lock() 878 mgr.newRepros = append(mgr.newRepros, progForHub) 879 mgr.mu.Unlock() 880 } 881 882 var cprogText []byte 883 if repro.CRepro { 884 var err error 885 cprogText, err = repro.CProgram() 886 if err != nil { 887 log.Logf(0, "failed to write C source: %v", err) 888 } 889 } 890 891 if mgr.dash != nil { 892 // Note: we intentionally don't set Corrupted for reproducers: 893 // 1. This is reproducible so can be debugged even with corrupted report. 894 // 2. Repro re-tried 3 times and still got corrupted report at the end, 895 // so maybe corrupted report detection is broken. 896 // 3. Reproduction is expensive so it's good to persist the result. 897 898 reproReport := repro.Report 899 output := reproReport.Output 900 901 var crashFlags dashapi.CrashFlags 902 if res.Strace != nil { 903 // If syzkaller managed to successfully run the repro with strace, send 904 // the report and the output generated under strace. 905 reproReport = res.Strace.Report 906 output = res.Strace.Output 907 crashFlags = dashapi.CrashUnderStrace 908 } 909 910 dc := &dashapi.Crash{ 911 BuildID: mgr.cfg.Tag, 912 Title: reproReport.Title, 913 AltTitles: reproReport.AltTitles, 914 Suppressed: reproReport.Suppressed, 915 Recipients: reproReport.Recipients.ToDash(), 916 Log: output, 917 Flags: crashFlags, 918 Report: report.SplitReportBytes(reproReport.Report)[0], 919 ReproOpts: repro.Opts.Serialize(), 920 ReproSyz: progText, 921 ReproC: cprogText, 922 ReproLog: truncateReproLog(res.Stats.FullLog()), 923 Assets: mgr.uploadReproAssets(repro), 924 OriginalTitle: res.Crash.Title, 925 } 926 setGuiltyFiles(dc, reproReport) 927 if _, err := mgr.dash.ReportCrash(dc); err != nil { 928 log.Logf(0, "failed to report repro to dashboard: %v", err) 929 } else { 930 // Don't store the crash locally, if we've successfully 931 // uploaded it to the dashboard. These will just eat disk space. 932 return 933 } 934 } 935 err := mgr.crashStore.SaveRepro(res, append([]byte(opts), progText...), cprogText) 936 if err != nil { 937 log.Logf(0, "%s", err) 938 } 939 } 940 941 func (mgr *Manager) ResizeReproPool(size int) { 942 mgr.pool.ReserveForRun(size) 943 } 944 945 func (mgr *Manager) uploadReproAssets(repro *repro.Result) []dashapi.NewAsset { 946 if mgr.assetStorage == nil { 947 return nil 948 } 949 950 ret := []dashapi.NewAsset{} 951 repro.Prog.ForEachAsset(func(name string, typ prog.AssetType, r io.Reader, c *prog.Call) { 952 dashTyp, ok := map[prog.AssetType]dashapi.AssetType{ 953 prog.MountInRepro: dashapi.MountInRepro, 954 }[typ] 955 if !ok { 956 panic("unknown extracted prog asset") 957 } 958 r2 := &bytes.Buffer{} 959 r1 := io.TeeReader(r, r2) 960 asset, err := mgr.assetStorage.UploadCrashAsset(r1, name, dashTyp, nil) 961 if err != nil { 962 log.Logf(1, "processing of the asset %v (%v) failed: %v", name, typ, err) 963 return 964 } 965 // Report file systems that fail fsck with a separate tag. 966 if mgr.cfg.RunFsck && dashTyp == dashapi.MountInRepro && 967 c.Meta.Attrs.Fsck != "" && mgr.fsckChecker.Exists(c.Meta.Attrs.Fsck) { 968 logs, isClean, err := image.Fsck(r2, c.Meta.Attrs.Fsck) 969 if err != nil { 970 log.Errorf("fsck of the asset %v failed: %v", name, err) 971 } else { 972 asset.FsckLog = logs 973 asset.FsIsClean = isClean 974 } 975 } 976 ret = append(ret, asset) 977 }) 978 return ret 979 } 980 981 func (mgr *Manager) corpusInputHandler(updates <-chan corpus.NewItemEvent) { 982 for update := range updates { 983 if len(update.NewCover) != 0 && mgr.coverFilters.ExecutorFilter != nil { 984 filtered := 0 985 for _, pc := range update.NewCover { 986 if _, ok := mgr.coverFilters.ExecutorFilter[pc]; ok { 987 filtered++ 988 } 989 } 990 mgr.statCoverFiltered.Add(filtered) 991 } 992 if update.Exists { 993 // We only save new progs into the corpus.db file. 994 continue 995 } 996 mgr.corpusDBMu.Lock() 997 mgr.corpusDB.Save(update.Sig, update.ProgData, 0) 998 if err := mgr.corpusDB.Flush(); err != nil { 999 log.Errorf("failed to save corpus database: %v", err) 1000 } 1001 mgr.corpusDBMu.Unlock() 1002 } 1003 } 1004 1005 func (mgr *Manager) getMinimizedCorpus() []*corpus.Item { 1006 mgr.mu.Lock() 1007 defer mgr.mu.Unlock() 1008 mgr.minimizeCorpusLocked() 1009 return mgr.corpus.Items() 1010 } 1011 1012 func (mgr *Manager) getNewRepros() [][]byte { 1013 mgr.mu.Lock() 1014 defer mgr.mu.Unlock() 1015 repros := mgr.newRepros 1016 mgr.newRepros = nil 1017 return repros 1018 } 1019 1020 func (mgr *Manager) addNewCandidates(candidates []fuzzer.Candidate) { 1021 mgr.mu.Lock() 1022 if mgr.phase == phaseTriagedCorpus { 1023 mgr.setPhaseLocked(phaseQueriedHub) 1024 } 1025 mgr.mu.Unlock() 1026 if mgr.cfg.Experimental.ResetAccState { 1027 // Don't accept new candidates -- the execution is already very slow, 1028 // syz-hub will just overwhelm us. 1029 return 1030 } 1031 mgr.fuzzer.Load().AddCandidates(candidates) 1032 } 1033 1034 func (mgr *Manager) minimizeCorpusLocked() { 1035 // Don't minimize corpus until we have triaged all inputs from it. 1036 // During corpus triage it would happen very often since we are actively adding inputs, 1037 // and presumably the persistent corpus was reasonably minimial, and we don't use it for fuzzing yet. 1038 if mgr.phase < phaseTriagedCorpus { 1039 return 1040 } 1041 currSize := mgr.corpus.StatProgs.Val() 1042 if currSize <= mgr.lastMinCorpus*103/100 { 1043 return 1044 } 1045 mgr.corpus.Minimize(mgr.cfg.Cover) 1046 newSize := mgr.corpus.StatProgs.Val() 1047 1048 log.Logf(1, "minimized corpus: %v -> %v", currSize, newSize) 1049 mgr.lastMinCorpus = newSize 1050 1051 // From time to time we get corpus explosion due to different reason: 1052 // generic bugs, per-OS bugs, problems with fallback coverage, kcov bugs, etc. 1053 // This has bad effect on the instance and especially on instances 1054 // connected via hub. Do some per-syscall sanity checking to prevent this. 1055 for call, info := range mgr.corpus.CallCover() { 1056 if mgr.cfg.Cover { 1057 // If we have less than 1K inputs per this call, 1058 // accept all new inputs unconditionally. 1059 if info.Count < 1000 { 1060 continue 1061 } 1062 // If we have more than 3K already, don't accept any more. 1063 // Between 1K and 3K look at amount of coverage we are getting from these programs. 1064 // Empirically, real coverage for the most saturated syscalls is ~30-60 1065 // per program (even when we have a thousand of them). For explosion 1066 // case coverage tend to be much lower (~0.3-5 per program). 1067 if info.Count < 3000 && len(info.Cover)/info.Count >= 10 { 1068 continue 1069 } 1070 } else { 1071 // If we don't have real coverage, signal is weak. 1072 // If we have more than several hundreds, there is something wrong. 1073 if info.Count < 300 { 1074 continue 1075 } 1076 } 1077 if mgr.saturatedCalls[call] { 1078 continue 1079 } 1080 mgr.saturatedCalls[call] = true 1081 log.Logf(0, "coverage for %v has saturated, not accepting more inputs", call) 1082 } 1083 1084 mgr.corpusDBMu.Lock() 1085 defer mgr.corpusDBMu.Unlock() 1086 for key := range mgr.corpusDB.Records { 1087 ok1 := mgr.corpus.Item(key) != nil 1088 _, ok2 := mgr.disabledHashes[key] 1089 if !ok1 && !ok2 { 1090 mgr.corpusDB.Delete(key) 1091 } 1092 } 1093 if err := mgr.corpusDB.Flush(); err != nil { 1094 log.Fatalf("failed to save corpus database: %v", err) 1095 } 1096 mgr.corpusDB.BumpVersion(manager.CurrentDBVersion) 1097 } 1098 1099 func setGuiltyFiles(crash *dashapi.Crash, report *report.Report) { 1100 if report.GuiltyFile != "" { 1101 crash.GuiltyFiles = []string{report.GuiltyFile} 1102 } 1103 } 1104 1105 func (mgr *Manager) BugFrames() (leaks, races []string) { 1106 mgr.mu.Lock() 1107 defer mgr.mu.Unlock() 1108 for frame := range mgr.memoryLeakFrames { 1109 leaks = append(leaks, frame) 1110 } 1111 for frame := range mgr.dataRaceFrames { 1112 races = append(races, frame) 1113 } 1114 return 1115 } 1116 1117 func (mgr *Manager) MachineChecked(features flatrpc.Feature, 1118 enabledSyscalls map[*prog.Syscall]bool) (queue.Source, error) { 1119 if len(enabledSyscalls) == 0 { 1120 return nil, fmt.Errorf("all system calls are disabled") 1121 } 1122 if mgr.mode.ExitAfterMachineCheck { 1123 mgr.exit(mgr.mode.Name) 1124 } 1125 1126 // If KFuzzTest is enabled, we exclusively fuzz KFuzzTest targets - so 1127 // delete any existing entries in enabled syscalls, and enable all 1128 // discovered KFuzzTest targets explicitly. 1129 if mgr.cfg.Experimental.EnableKFuzzTest { 1130 for call := range enabledSyscalls { 1131 delete(enabledSyscalls, call) 1132 } 1133 data, err := kfuzztest.ExtractData(path.Join(mgr.cfg.KernelObj, "vmlinux")) 1134 if err != nil { 1135 return nil, err 1136 } 1137 for _, call := range data.Calls { 1138 enabledSyscalls[call] = true 1139 } 1140 } 1141 1142 mgr.mu.Lock() 1143 defer mgr.mu.Unlock() 1144 if mgr.phase != phaseInit { 1145 panic("machineChecked() called not during phaseInit") 1146 } 1147 if mgr.checkDone.Swap(true) { 1148 panic("MachineChecked called twice") 1149 } 1150 mgr.enabledFeatures = features 1151 mgr.http.EnabledSyscalls.Store(enabledSyscalls) 1152 mgr.firstConnect.Store(time.Now().Unix()) 1153 statSyscalls := stat.New("syscalls", "Number of enabled syscalls", 1154 stat.Simple, stat.NoGraph, stat.Link("/syscalls")) 1155 statSyscalls.Add(len(enabledSyscalls)) 1156 candidates := mgr.loadCorpus(enabledSyscalls) 1157 mgr.setPhaseLocked(phaseLoadedCorpus) 1158 opts := fuzzer.DefaultExecOpts(mgr.cfg, features, *flagDebug) 1159 1160 switch mgr.mode { 1161 case ModeFuzzing, ModeCorpusTriage: 1162 corpusUpdates := make(chan corpus.NewItemEvent, 128) 1163 mgr.corpus = corpus.NewFocusedCorpus(context.Background(), 1164 corpusUpdates, mgr.coverFilters.Areas) 1165 mgr.http.Corpus.Store(mgr.corpus) 1166 1167 rnd := rand.New(rand.NewSource(time.Now().UnixNano())) 1168 fuzzerObj := fuzzer.NewFuzzer(context.Background(), &fuzzer.Config{ 1169 Corpus: mgr.corpus, 1170 Snapshot: mgr.cfg.Snapshot, 1171 Coverage: mgr.cfg.Cover, 1172 FaultInjection: features&flatrpc.FeatureFault != 0, 1173 Comparisons: features&flatrpc.FeatureComparisons != 0, 1174 Collide: true, 1175 EnabledCalls: enabledSyscalls, 1176 NoMutateCalls: mgr.cfg.NoMutateCalls, 1177 FetchRawCover: mgr.cfg.RawCover, 1178 Logf: func(level int, msg string, args ...interface{}) { 1179 if level != 0 { 1180 return 1181 } 1182 log.Logf(level, msg, args...) 1183 }, 1184 NewInputFilter: func(call string) bool { 1185 mgr.mu.Lock() 1186 defer mgr.mu.Unlock() 1187 return !mgr.saturatedCalls[call] 1188 }, 1189 ModeKFuzzTest: mgr.cfg.Experimental.EnableKFuzzTest, 1190 }, rnd, mgr.target) 1191 fuzzerObj.AddCandidates(candidates) 1192 mgr.fuzzer.Store(fuzzerObj) 1193 mgr.http.Fuzzer.Store(fuzzerObj) 1194 1195 go mgr.corpusInputHandler(corpusUpdates) 1196 go mgr.corpusMinimization() 1197 go mgr.fuzzerLoop(fuzzerObj) 1198 if mgr.dash != nil { 1199 go mgr.dashboardReporter() 1200 if mgr.cfg.Reproduce { 1201 go mgr.dashboardReproTasks() 1202 } 1203 } 1204 source := queue.DefaultOpts(fuzzerObj, opts) 1205 if mgr.cfg.Snapshot { 1206 log.Logf(0, "restarting VMs for snapshot mode") 1207 mgr.snapshotSource = queue.Distribute(source) 1208 mgr.pool.SetDefault(mgr.snapshotInstance) 1209 mgr.serv.Close() 1210 mgr.serv = nil 1211 return queue.Callback(func() *queue.Request { 1212 return nil 1213 }), nil 1214 } 1215 return source, nil 1216 case ModeCorpusRun: 1217 ctx := &corpusRunner{ 1218 candidates: candidates, 1219 rnd: rand.New(rand.NewSource(time.Now().UnixNano())), 1220 } 1221 return queue.DefaultOpts(ctx, opts), nil 1222 case ModeRunTests: 1223 ctx := &runtest.Context{ 1224 Dir: filepath.Join(mgr.cfg.Syzkaller, "sys", mgr.cfg.Target.OS, "test"), 1225 Target: mgr.cfg.Target, 1226 Features: features, 1227 EnabledCalls: map[string]map[*prog.Syscall]bool{ 1228 mgr.cfg.Sandbox: enabledSyscalls, 1229 }, 1230 LogFunc: func(text string) { fmt.Println(text) }, 1231 Verbose: true, 1232 Debug: *flagDebug, 1233 Tests: *flagTests, 1234 } 1235 ctx.Init() 1236 go func() { 1237 err := ctx.Run(context.Background()) 1238 if err != nil { 1239 log.Fatal(err) 1240 } 1241 mgr.exit("tests") 1242 }() 1243 return ctx, nil 1244 case ModeIfaceProbe: 1245 exec := queue.Plain() 1246 go func() { 1247 res, err := ifaceprobe.Run(vm.ShutdownCtx(), mgr.cfg, features, exec) 1248 if err != nil { 1249 log.Fatalf("interface probing failed: %v", err) 1250 } 1251 path := filepath.Join(mgr.cfg.Workdir, "interfaces.json") 1252 if err := osutil.WriteJSON(path, res); err != nil { 1253 log.Fatal(err) 1254 } 1255 mgr.exit("interface probe") 1256 }() 1257 return exec, nil 1258 } 1259 panic(fmt.Sprintf("unexpected mode %q", mgr.mode.Name)) 1260 } 1261 1262 type corpusRunner struct { 1263 candidates []fuzzer.Candidate 1264 mu sync.Mutex 1265 rnd *rand.Rand 1266 seq int 1267 } 1268 1269 func (cr *corpusRunner) Next() *queue.Request { 1270 cr.mu.Lock() 1271 defer cr.mu.Unlock() 1272 1273 var p *prog.Prog 1274 if cr.seq < len(cr.candidates) { 1275 // First run all candidates sequentially. 1276 p = cr.candidates[cr.seq].Prog 1277 cr.seq++ 1278 } else { 1279 // Then pick random progs. 1280 p = cr.candidates[cr.rnd.Intn(len(cr.candidates))].Prog 1281 } 1282 return &queue.Request{ 1283 Prog: p, 1284 Important: true, 1285 } 1286 } 1287 1288 func (mgr *Manager) corpusMinimization() { 1289 for range time.NewTicker(time.Minute).C { 1290 mgr.mu.Lock() 1291 mgr.minimizeCorpusLocked() 1292 mgr.mu.Unlock() 1293 } 1294 } 1295 1296 func (mgr *Manager) MaxSignal() signal.Signal { 1297 if fuzzer := mgr.fuzzer.Load(); fuzzer != nil { 1298 return fuzzer.Cover.CopyMaxSignal() 1299 } 1300 return nil 1301 } 1302 1303 func (mgr *Manager) fuzzerLoop(fuzzer *fuzzer.Fuzzer) { 1304 for ; ; time.Sleep(time.Second / 2) { 1305 if mgr.cfg.Cover && !mgr.cfg.Snapshot { 1306 // Distribute new max signal over all instances. 1307 newSignal := fuzzer.Cover.GrabSignalDelta() 1308 if len(newSignal) != 0 { 1309 log.Logf(3, "distributing %d new signal", len(newSignal)) 1310 } 1311 if len(newSignal) != 0 { 1312 mgr.serv.DistributeSignalDelta(newSignal) 1313 } 1314 } 1315 1316 // Update the state machine. 1317 if fuzzer.CandidateTriageFinished() { 1318 if mgr.mode == ModeCorpusTriage { 1319 mgr.exit("corpus triage") 1320 } 1321 mgr.mu.Lock() 1322 switch mgr.phase { 1323 case phaseLoadedCorpus: 1324 if !mgr.cfg.Snapshot { 1325 mgr.serv.TriagedCorpus() 1326 } 1327 if mgr.cfg.HubClient != "" { 1328 mgr.setPhaseLocked(phaseTriagedCorpus) 1329 go mgr.hubSyncLoop(pickGetter(mgr.cfg.HubKey), 1330 fuzzer.Config.EnabledCalls) 1331 } else { 1332 mgr.setPhaseLocked(phaseTriagedHub) 1333 } 1334 case phaseQueriedHub: 1335 mgr.setPhaseLocked(phaseTriagedHub) 1336 } 1337 mgr.mu.Unlock() 1338 } 1339 } 1340 } 1341 1342 func (mgr *Manager) setPhaseLocked(newPhase int) { 1343 if mgr.phase == newPhase { 1344 panic("repeated phase update") 1345 } 1346 // In VMLess mode, mgr.reproLoop is nil. 1347 if newPhase == phaseTriagedHub && mgr.reproLoop != nil { 1348 // Start reproductions. 1349 go mgr.reproLoop.Loop(vm.ShutdownCtx()) 1350 } 1351 mgr.phase = newPhase 1352 } 1353 1354 func (mgr *Manager) needMoreCandidates() bool { 1355 return mgr.fuzzer.Load().CandidateTriageFinished() 1356 } 1357 1358 func (mgr *Manager) hubIsUnreachable() { 1359 var dash *dashapi.Dashboard 1360 mgr.mu.Lock() 1361 if mgr.phase == phaseTriagedCorpus { 1362 dash = mgr.dash 1363 mgr.setPhaseLocked(phaseTriagedHub) 1364 log.Errorf("did not manage to connect to syz-hub; moving forward") 1365 } 1366 mgr.mu.Unlock() 1367 if dash != nil { 1368 mgr.dash.LogError(mgr.cfg.Name, "did not manage to connect to syz-hub") 1369 } 1370 } 1371 1372 // trackUsedFiles() is checking that the files that syz-manager needs are not changed while it's running. 1373 func (mgr *Manager) trackUsedFiles() { 1374 usedFiles := make(map[string]time.Time) // file name to modification time 1375 addUsedFile := func(f string) { 1376 if f == "" { 1377 return 1378 } 1379 stat, err := os.Stat(f) 1380 if err != nil { 1381 log.Fatalf("failed to stat %v: %v", f, err) 1382 } 1383 usedFiles[f] = stat.ModTime() 1384 } 1385 cfg := mgr.cfg 1386 addUsedFile(cfg.ExecprogBin) 1387 addUsedFile(cfg.ExecutorBin) 1388 addUsedFile(cfg.SSHKey) 1389 if vmlinux := filepath.Join(cfg.KernelObj, mgr.sysTarget.KernelObject); osutil.IsExist(vmlinux) { 1390 addUsedFile(vmlinux) 1391 } 1392 if cfg.Image != "9p" { 1393 addUsedFile(cfg.Image) 1394 } 1395 for range time.NewTicker(30 * time.Second).C { 1396 for f, mod := range usedFiles { 1397 stat, err := os.Stat(f) 1398 if err != nil { 1399 log.Fatalf("failed to stat %v: %v", f, err) 1400 } 1401 if mod != stat.ModTime() { 1402 log.Fatalf("file %v that syz-manager uses has been modified by an external program\n"+ 1403 "this can lead to arbitrary syz-manager misbehavior\n"+ 1404 "modification time has changed: %v -> %v\n"+ 1405 "don't modify files that syz-manager uses. exiting to prevent harm", 1406 f, mod, stat.ModTime()) 1407 } 1408 } 1409 } 1410 } 1411 1412 func (mgr *Manager) dashboardReporter() { 1413 webAddr := publicWebAddr(mgr.cfg.HTTP) 1414 triageInfoSent := false 1415 var lastFuzzingTime time.Duration 1416 var lastCrashes, lastSuppressedCrashes, lastExecs uint64 1417 for range time.NewTicker(time.Minute).C { 1418 mgr.mu.Lock() 1419 corpus := mgr.corpus 1420 mgr.mu.Unlock() 1421 if corpus == nil { 1422 continue 1423 } 1424 mgr.mu.Lock() 1425 req := &dashapi.ManagerStatsReq{ 1426 Name: mgr.cfg.Name, 1427 Addr: webAddr, 1428 UpTime: time.Duration(mgr.statUptime.Val()) * time.Second, 1429 Corpus: uint64(corpus.StatProgs.Val()), 1430 PCs: uint64(corpus.StatCover.Val()), 1431 Cover: uint64(corpus.StatSignal.Val()), 1432 CrashTypes: uint64(mgr.statCrashTypes.Val()), 1433 FuzzingTime: time.Duration(mgr.statFuzzingTime.Val()) - lastFuzzingTime, 1434 Crashes: uint64(mgr.statCrashes.Val()) - lastCrashes, 1435 SuppressedCrashes: uint64(mgr.statSuppressed.Val()) - lastSuppressedCrashes, 1436 Execs: uint64(mgr.servStats.StatExecs.Val()) - lastExecs, 1437 } 1438 if mgr.phase >= phaseTriagedCorpus && !triageInfoSent { 1439 triageInfoSent = true 1440 req.TriagedCoverage = uint64(corpus.StatSignal.Val()) 1441 req.TriagedPCs = uint64(corpus.StatCover.Val()) 1442 } 1443 mgr.mu.Unlock() 1444 1445 if err := mgr.dash.UploadManagerStats(req); err != nil { 1446 log.Logf(0, "failed to upload dashboard stats: %v", err) 1447 continue 1448 } 1449 mgr.mu.Lock() 1450 lastFuzzingTime += req.FuzzingTime 1451 lastCrashes += req.Crashes 1452 lastSuppressedCrashes += req.SuppressedCrashes 1453 lastExecs += req.Execs 1454 mgr.mu.Unlock() 1455 } 1456 } 1457 1458 func (mgr *Manager) dashboardReproTasks() { 1459 for range time.NewTicker(20 * time.Minute).C { 1460 if !mgr.reproLoop.CanReproMore() { 1461 // We don't need reproducers at the moment. 1462 continue 1463 } 1464 resp, err := mgr.dash.LogToRepro(&dashapi.LogToReproReq{BuildID: mgr.cfg.Tag}) 1465 if err != nil { 1466 log.Logf(0, "failed to query logs to reproduce: %v", err) 1467 continue 1468 } 1469 if len(resp.CrashLog) > 0 { 1470 mgr.externalReproQueue <- &manager.Crash{ 1471 FromDashboard: true, 1472 Manual: resp.Type == dashapi.ManualLog, 1473 Report: &report.Report{ 1474 Title: resp.Title, 1475 Output: resp.CrashLog, 1476 }, 1477 } 1478 } 1479 } 1480 } 1481 1482 func (mgr *Manager) CoverageFilter(modules []*vminfo.KernelModule) ([]uint64, error) { 1483 mgr.reportGenerator.Init(modules) 1484 filters, err := manager.PrepareCoverageFilters(mgr.reportGenerator, mgr.cfg, true) 1485 if err != nil { 1486 return nil, fmt.Errorf("failed to init coverage filter: %w", err) 1487 } 1488 mgr.coverFilters = filters 1489 mgr.http.Cover.Store(&manager.CoverageInfo{ 1490 Modules: modules, 1491 ReportGenerator: mgr.reportGenerator, 1492 CoverFilter: filters.ExecutorFilter, 1493 }) 1494 var pcs []uint64 1495 for pc := range filters.ExecutorFilter { 1496 pcs = append(pcs, pc) 1497 } 1498 return pcs, nil 1499 } 1500 1501 func publicWebAddr(addr string) string { 1502 if addr == "" { 1503 return "" 1504 } 1505 _, port, err := net.SplitHostPort(addr) 1506 if err == nil && port != "" { 1507 if host, err := os.Hostname(); err == nil { 1508 addr = net.JoinHostPort(host, port) 1509 } 1510 if GCE, err := gce.NewContext(""); err == nil { 1511 addr = net.JoinHostPort(GCE.ExternalIP, port) 1512 } 1513 } 1514 return "http://" + addr 1515 }