github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/manager/diff.go (about) 1 // Copyright 2024 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package manager 5 6 import ( 7 "context" 8 "encoding/json" 9 "errors" 10 "fmt" 11 "math/rand" 12 "net" 13 "sort" 14 "strings" 15 "sync" 16 "sync/atomic" 17 "time" 18 19 "github.com/google/syzkaller/pkg/corpus" 20 "github.com/google/syzkaller/pkg/flatrpc" 21 "github.com/google/syzkaller/pkg/fuzzer" 22 "github.com/google/syzkaller/pkg/fuzzer/queue" 23 "github.com/google/syzkaller/pkg/instance" 24 "github.com/google/syzkaller/pkg/log" 25 "github.com/google/syzkaller/pkg/mgrconfig" 26 "github.com/google/syzkaller/pkg/osutil" 27 "github.com/google/syzkaller/pkg/report" 28 "github.com/google/syzkaller/pkg/repro" 29 "github.com/google/syzkaller/pkg/rpcserver" 30 "github.com/google/syzkaller/pkg/signal" 31 "github.com/google/syzkaller/pkg/stat" 32 "github.com/google/syzkaller/pkg/vcs" 33 "github.com/google/syzkaller/pkg/vminfo" 34 "github.com/google/syzkaller/prog" 35 "github.com/google/syzkaller/vm" 36 "github.com/google/syzkaller/vm/dispatcher" 37 "golang.org/x/sync/errgroup" 38 ) 39 40 type DiffFuzzerConfig struct { 41 Debug bool 42 PatchedOnly chan *UniqueBug 43 BaseCrashes chan string 44 Store *DiffFuzzerStore 45 ArtifactsDir string // Where to store the artifacts that supplement the logs. 46 // The fuzzer waits no more than MaxTriageTime time until it starts taking VMs away 47 // for bug reproduction. 48 // The option may help find a balance between spending too much time triaging 49 // the corpus and not reaching a proper kernel coverage. 50 MaxTriageTime time.Duration 51 // If non-empty, the fuzzer will spend no more than this amount of time 52 // trying to reach the modified code. The time is counted since the moment 53 // 99% of the corpus is triaged. 54 FuzzToReachPatched time.Duration 55 // The callback may be used to consult external systems on whether 56 // the crash should be ignored. E.g. because it doesn't match the filter or 57 // the particular base kernel has already been seen to crash with the given title. 58 // It helps reduce the number of unnecessary reproductions. 59 IgnoreCrash func(context.Context, string) (bool, error) 60 } 61 62 func (cfg *DiffFuzzerConfig) TriageDeadline() <-chan time.Time { 63 if cfg.MaxTriageTime == 0 { 64 return nil 65 } 66 return time.After(cfg.MaxTriageTime) 67 } 68 69 type UniqueBug struct { 70 // The report from the patched kernel. 71 Report *report.Report 72 Repro *repro.Result 73 } 74 75 func RunDiffFuzzer(ctx context.Context, baseCfg, newCfg *mgrconfig.Config, cfg DiffFuzzerConfig) error { 76 if cfg.PatchedOnly == nil { 77 return fmt.Errorf("you must set up a patched only channel") 78 } 79 base, err := setup("base", baseCfg, cfg.Debug) 80 if err != nil { 81 return err 82 } 83 new, err := setup("new", newCfg, cfg.Debug) 84 if err != nil { 85 return err 86 } 87 eg, ctx := errgroup.WithContext(ctx) 88 eg.Go(func() error { 89 info, err := LoadSeeds(newCfg, true) 90 if err != nil { 91 return err 92 } 93 select { 94 case new.candidates <- info.Candidates: 95 case <-ctx.Done(): 96 } 97 return nil 98 }) 99 100 stream := queue.NewRandomQueue(4096, rand.New(rand.NewSource(time.Now().UnixNano()))) 101 base.source = stream 102 new.duplicateInto = stream 103 104 diffCtx := &diffContext{ 105 cfg: cfg, 106 doneRepro: make(chan *ReproResult), 107 base: base, 108 new: new, 109 store: cfg.Store, 110 reproAttempts: map[string]int{}, 111 patchedOnly: cfg.PatchedOnly, 112 } 113 if newCfg.HTTP != "" { 114 diffCtx.http = &HTTPServer{ 115 Cfg: newCfg, 116 StartTime: time.Now(), 117 DiffStore: cfg.Store, 118 Pools: map[string]*vm.Dispatcher{ 119 new.name: new.pool, 120 base.name: base.pool, 121 }, 122 } 123 new.http = diffCtx.http 124 } 125 eg.Go(func() error { 126 return diffCtx.Loop(ctx) 127 }) 128 return eg.Wait() 129 } 130 131 type diffContext struct { 132 cfg DiffFuzzerConfig 133 store *DiffFuzzerStore 134 http *HTTPServer 135 136 doneRepro chan *ReproResult 137 base *kernelContext 138 new *kernelContext 139 patchedOnly chan *UniqueBug 140 141 mu sync.Mutex 142 reproAttempts map[string]int 143 } 144 145 const ( 146 // Don't start reproductions until 90% of the corpus has been triaged. 147 corpusTriageToRepro = 0.9 148 // Start to monitor whether we reached the modified files only after triaging 99%. 149 corpusTriageToMonitor = 0.99 150 ) 151 152 func (dc *diffContext) Loop(baseCtx context.Context) error { 153 g, ctx := errgroup.WithContext(baseCtx) 154 reproLoop := NewReproLoop(dc, dc.new.pool.Total()-dc.new.cfg.FuzzingVMs, false) 155 if dc.http != nil { 156 dc.http.ReproLoop = reproLoop 157 g.Go(func() error { 158 return dc.http.Serve(ctx) 159 }) 160 } 161 162 g.Go(func() error { 163 select { 164 case <-ctx.Done(): 165 return nil 166 case <-dc.waitCorpusTriage(ctx, corpusTriageToRepro): 167 case <-dc.cfg.TriageDeadline(): 168 log.Logf(0, "timed out waiting for coprus triage") 169 } 170 log.Logf(0, "starting bug reproductions") 171 reproLoop.Loop(ctx) 172 return nil 173 }) 174 175 g.Go(func() error { return dc.monitorPatchedCoverage(ctx) }) 176 g.Go(func() error { return dc.base.Loop(ctx) }) 177 g.Go(func() error { return dc.new.Loop(ctx) }) 178 179 runner := &reproRunner{done: make(chan reproRunnerResult, 2), kernel: dc.base} 180 statTimer := time.NewTicker(5 * time.Minute) 181 loop: 182 for { 183 select { 184 case <-ctx.Done(): 185 break loop 186 case <-statTimer.C: 187 vals := make(map[string]int) 188 for _, stat := range stat.Collect(stat.All) { 189 vals[stat.Name] = stat.V 190 } 191 data, _ := json.MarshalIndent(vals, "", " ") 192 log.Logf(0, "STAT %s", data) 193 case rep := <-dc.base.crashes: 194 log.Logf(1, "base crash: %v", rep.Title) 195 dc.reportBaseCrash(ctx, rep) 196 case ret := <-runner.done: 197 // We have run the reproducer on the base instance. 198 199 // A sanity check: the base kernel might have crashed with the same title 200 // since the moment we have stared the reproduction / running on the repro base. 201 ignored := dc.ignoreCrash(ctx, ret.reproReport.Title) 202 if ret.crashReport == nil && ignored { 203 // Report it as error so that we could at least find it in the logs. 204 log.Errorf("resulting crash of an approved repro result is to be ignored: %s", 205 ret.reproReport.Title) 206 } else if ret.crashReport == nil { 207 dc.store.BaseNotCrashed(ret.reproReport.Title) 208 select { 209 case <-ctx.Done(): 210 case dc.patchedOnly <- &UniqueBug{ 211 Report: ret.reproReport, 212 Repro: ret.repro, 213 }: 214 } 215 log.Logf(0, "patched-only: %s", ret.reproReport.Title) 216 // Now that we know this bug only affects the patch kernel, we can spend more time 217 // generating a minimalistic repro and a C repro. 218 if !ret.fullRepro { 219 reproLoop.Enqueue(&Crash{ 220 Report: &report.Report{ 221 Title: ret.reproReport.Title, 222 Output: ret.repro.Prog.Serialize(), 223 }, 224 FullRepro: true, 225 }) 226 } 227 } else { 228 dc.reportBaseCrash(ctx, ret.crashReport) 229 log.Logf(0, "crashes both: %s / %s", ret.reproReport.Title, ret.crashReport.Title) 230 } 231 case ret := <-dc.doneRepro: 232 // We have finished reproducing a crash from the patched instance. 233 if ret.Repro != nil && ret.Repro.Report != nil { 234 origTitle := ret.Crash.Report.Title 235 if ret.Repro.Report.Title == origTitle { 236 origTitle = "-SAME-" 237 } 238 log.Logf(1, "found repro for %q (orig title: %q, reliability: %2.f), took %.2f minutes", 239 ret.Repro.Report.Title, origTitle, ret.Repro.Reliability, ret.Stats.TotalTime.Minutes()) 240 g.Go(func() error { 241 runner.Run(ctx, ret.Repro, ret.Crash.FullRepro) 242 return nil 243 }) 244 } else { 245 origTitle := ret.Crash.Report.Title 246 log.Logf(1, "failed repro for %q, err=%s", origTitle, ret.Err) 247 } 248 dc.store.SaveRepro(ret) 249 case rep := <-dc.new.crashes: 250 // A new crash is found on the patched instance. 251 crash := &Crash{Report: rep} 252 need := dc.NeedRepro(crash) 253 log.Logf(0, "patched crashed: %v [need repro = %v]", 254 rep.Title, need) 255 dc.store.PatchedCrashed(rep.Title, rep.Report, rep.Output) 256 if need { 257 reproLoop.Enqueue(crash) 258 } 259 } 260 } 261 return g.Wait() 262 } 263 264 func (dc *diffContext) ignoreCrash(ctx context.Context, title string) bool { 265 if dc.store.EverCrashedBase(title) { 266 return true 267 } 268 // Let's try to ask the external systems about it as well. 269 if dc.cfg.IgnoreCrash != nil { 270 ignore, err := dc.cfg.IgnoreCrash(ctx, title) 271 if err != nil { 272 log.Logf(0, "a call to IgnoreCrash failed: %v", err) 273 } else { 274 if ignore { 275 log.Logf(0, "base crash %q is to be ignored", title) 276 } 277 return ignore 278 } 279 } 280 return false 281 } 282 283 func (dc *diffContext) reportBaseCrash(ctx context.Context, rep *report.Report) { 284 dc.store.BaseCrashed(rep.Title, rep.Report) 285 if dc.cfg.BaseCrashes == nil { 286 return 287 } 288 select { 289 case dc.cfg.BaseCrashes <- rep.Title: 290 case <-ctx.Done(): 291 } 292 } 293 294 func (dc *diffContext) waitCorpusTriage(ctx context.Context, threshold float64) chan struct{} { 295 const backOffTime = 30 * time.Second 296 ret := make(chan struct{}) 297 go func() { 298 for { 299 select { 300 case <-time.After(backOffTime): 301 case <-ctx.Done(): 302 return 303 } 304 triaged := dc.new.triageProgress() 305 if triaged >= threshold { 306 log.Logf(0, "triaged %.1f%% of the corpus", triaged*100.0) 307 close(ret) 308 return 309 } 310 } 311 }() 312 return ret 313 } 314 315 var ErrPatchedAreaNotReached = errors.New("fuzzer has not reached the patched area") 316 317 func (dc *diffContext) monitorPatchedCoverage(ctx context.Context) error { 318 if dc.cfg.FuzzToReachPatched == 0 { 319 // The feature is disabled. 320 return nil 321 } 322 323 // First wait until we have almost triaged all of the corpus. 324 select { 325 case <-ctx.Done(): 326 return nil 327 case <-dc.waitCorpusTriage(ctx, corpusTriageToMonitor): 328 } 329 330 // By this moment, we must have coverage filters already filled out. 331 focusPCs := 0 332 // The last one is "everything else", so it's not of interest. 333 coverFilters := dc.new.coverFilters 334 for i := 0; i < len(coverFilters.Areas)-1; i++ { 335 focusPCs += len(coverFilters.Areas[i].CoverPCs) 336 } 337 if focusPCs == 0 { 338 // No areas were configured. 339 log.Logf(1, "no PCs in the areas of focused fuzzing, skipping the zero patched coverage check") 340 return nil 341 } 342 343 // Then give the fuzzer some change to get through. 344 select { 345 case <-time.After(dc.cfg.FuzzToReachPatched): 346 case <-ctx.Done(): 347 return nil 348 } 349 focusAreaStats := dc.new.progsPerArea() 350 if focusAreaStats[symbolsArea]+focusAreaStats[filesArea]+focusAreaStats[includesArea] > 0 { 351 log.Logf(0, "fuzzer has reached the modified code (%d + %d + %d), continuing fuzzing", 352 focusAreaStats[symbolsArea], focusAreaStats[filesArea], focusAreaStats[includesArea]) 353 return nil 354 } 355 log.Logf(0, "fuzzer has not reached the modified code in %s, aborting", 356 dc.cfg.FuzzToReachPatched) 357 return ErrPatchedAreaNotReached 358 } 359 360 // TODO: instead of this limit, consider expotentially growing delays between reproduction attempts. 361 const maxReproAttempts = 6 362 363 func needReproForTitle(title string) bool { 364 if strings.Contains(title, "no output") || 365 strings.Contains(title, "lost connection") || 366 strings.Contains(title, "detected stall") || 367 strings.Contains(title, "SYZ") { 368 // Don't waste time reproducing these. 369 return false 370 } 371 return true 372 } 373 374 func (dc *diffContext) NeedRepro(crash *Crash) bool { 375 if crash.FullRepro { 376 return true 377 } 378 if !needReproForTitle(crash.Title) { 379 return false 380 } 381 ctx, cancel := context.WithTimeout(context.Background(), time.Minute) 382 defer cancel() 383 if dc.ignoreCrash(ctx, crash.Title) { 384 return false 385 } 386 dc.mu.Lock() 387 defer dc.mu.Unlock() 388 return dc.reproAttempts[crash.Title] <= maxReproAttempts 389 } 390 391 func (dc *diffContext) RunRepro(ctx context.Context, crash *Crash) *ReproResult { 392 dc.mu.Lock() 393 dc.reproAttempts[crash.Title]++ 394 dc.mu.Unlock() 395 396 res, stats, err := repro.Run(ctx, crash.Output, repro.Environment{ 397 Config: dc.new.cfg, 398 Features: dc.new.features, 399 Reporter: dc.new.reporter, 400 Pool: dc.new.pool, 401 Fast: !crash.FullRepro, 402 }) 403 if res != nil && res.Report != nil { 404 dc.mu.Lock() 405 dc.reproAttempts[res.Report.Title] = maxReproAttempts 406 dc.mu.Unlock() 407 } 408 ret := &ReproResult{ 409 Crash: crash, 410 Repro: res, 411 Stats: stats, 412 Err: err, 413 } 414 select { 415 case dc.doneRepro <- ret: 416 case <-ctx.Done(): 417 // If the context is cancelled, no one may be listening on doneRepro. 418 } 419 return ret 420 } 421 422 func (dc *diffContext) ResizeReproPool(size int) { 423 dc.new.pool.ReserveForRun(size) 424 } 425 426 type kernelContext struct { 427 name string 428 ctx context.Context 429 debug bool 430 cfg *mgrconfig.Config 431 reporter *report.Reporter 432 fuzzer atomic.Pointer[fuzzer.Fuzzer] 433 serv rpcserver.Server 434 servStats rpcserver.Stats 435 crashes chan *report.Report 436 pool *vm.Dispatcher 437 features flatrpc.Feature 438 candidates chan []fuzzer.Candidate 439 // Once candidates is assigned, candidatesCount holds their original count. 440 candidatesCount atomic.Int64 441 442 coverFilters CoverageFilters 443 reportGenerator *ReportGeneratorWrapper 444 445 http *HTTPServer 446 source queue.Source 447 duplicateInto queue.Executor 448 } 449 450 func setup(name string, cfg *mgrconfig.Config, debug bool) (*kernelContext, error) { 451 osutil.MkdirAll(cfg.Workdir) 452 453 kernelCtx := &kernelContext{ 454 name: name, 455 debug: debug, 456 cfg: cfg, 457 crashes: make(chan *report.Report, 128), 458 candidates: make(chan []fuzzer.Candidate), 459 servStats: rpcserver.NewNamedStats(name), 460 reportGenerator: ReportGeneratorCache(cfg), 461 } 462 463 var err error 464 kernelCtx.reporter, err = report.NewReporter(cfg) 465 if err != nil { 466 return nil, fmt.Errorf("failed to create reporter for %q: %w", name, err) 467 } 468 469 kernelCtx.serv, err = rpcserver.New(&rpcserver.RemoteConfig{ 470 Config: cfg, 471 Manager: kernelCtx, 472 Stats: kernelCtx.servStats, 473 Debug: debug, 474 }) 475 if err != nil { 476 return nil, fmt.Errorf("failed to create rpc server for %q: %w", name, err) 477 } 478 479 vmPool, err := vm.Create(cfg, debug) 480 if err != nil { 481 return nil, fmt.Errorf("failed to create vm.Pool for %q: %w", name, err) 482 } 483 484 kernelCtx.pool = vm.NewDispatcher(vmPool, kernelCtx.fuzzerInstance) 485 return kernelCtx, nil 486 } 487 488 func (kc *kernelContext) Loop(baseCtx context.Context) error { 489 defer log.Logf(1, "%s: kernel context loop terminated", kc.name) 490 491 if err := kc.serv.Listen(); err != nil { 492 return fmt.Errorf("failed to start rpc server: %w", err) 493 } 494 eg, ctx := errgroup.WithContext(baseCtx) 495 kc.ctx = ctx 496 eg.Go(func() error { 497 defer log.Logf(1, "%s: rpc server terminaled", kc.name) 498 return kc.serv.Serve(ctx) 499 }) 500 eg.Go(func() error { 501 defer log.Logf(1, "%s: pool terminated", kc.name) 502 kc.pool.Loop(ctx) 503 return nil 504 }) 505 eg.Go(func() error { 506 for { 507 select { 508 case <-ctx.Done(): 509 return nil 510 case err := <-kc.pool.BootErrors: 511 title := "unknown" 512 var bootErr vm.BootErrorer 513 if errors.As(err, &bootErr) { 514 title, _ = bootErr.BootError() 515 } 516 // Boot errors are not useful for patch fuzzing (at least yet). 517 // Fetch them to not block the channel and print them to the logs. 518 log.Logf(0, "%s: boot error: %s", kc.name, title) 519 } 520 } 521 }) 522 return eg.Wait() 523 } 524 525 func (kc *kernelContext) MaxSignal() signal.Signal { 526 if fuzzer := kc.fuzzer.Load(); fuzzer != nil { 527 return fuzzer.Cover.CopyMaxSignal() 528 } 529 return nil 530 } 531 532 func (kc *kernelContext) BugFrames() (leaks, races []string) { 533 return nil, nil 534 } 535 536 func (kc *kernelContext) MachineChecked(features flatrpc.Feature, 537 syscalls map[*prog.Syscall]bool) (queue.Source, error) { 538 if len(syscalls) == 0 { 539 return nil, fmt.Errorf("all system calls are disabled") 540 } 541 log.Logf(0, "%s: machine check complete", kc.name) 542 kc.features = features 543 544 var source queue.Source 545 if kc.source == nil { 546 source = queue.Tee(kc.setupFuzzer(features, syscalls), kc.duplicateInto) 547 } else { 548 source = kc.source 549 } 550 opts := fuzzer.DefaultExecOpts(kc.cfg, features, kc.debug) 551 return queue.DefaultOpts(source, opts), nil 552 } 553 554 func (kc *kernelContext) setupFuzzer(features flatrpc.Feature, syscalls map[*prog.Syscall]bool) queue.Source { 555 rnd := rand.New(rand.NewSource(time.Now().UnixNano())) 556 corpusObj := corpus.NewFocusedCorpus(kc.ctx, nil, kc.coverFilters.Areas) 557 fuzzerObj := fuzzer.NewFuzzer(kc.ctx, &fuzzer.Config{ 558 Corpus: corpusObj, 559 Coverage: kc.cfg.Cover, 560 // Fault injection may bring instaibility into bug reproducibility, which may lead to false positives. 561 FaultInjection: false, 562 Comparisons: features&flatrpc.FeatureComparisons != 0, 563 Collide: true, 564 EnabledCalls: syscalls, 565 NoMutateCalls: kc.cfg.NoMutateCalls, 566 PatchTest: true, 567 Logf: func(level int, msg string, args ...interface{}) { 568 if level != 0 { 569 return 570 } 571 log.Logf(level, msg, args...) 572 }, 573 }, rnd, kc.cfg.Target) 574 575 if kc.http != nil { 576 kc.http.Fuzzer.Store(fuzzerObj) 577 kc.http.EnabledSyscalls.Store(syscalls) 578 kc.http.Corpus.Store(corpusObj) 579 } 580 581 var candidates []fuzzer.Candidate 582 select { 583 case candidates = <-kc.candidates: 584 case <-kc.ctx.Done(): 585 // The loop will be aborted later. 586 break 587 } 588 // We assign kc.fuzzer after kc.candidatesCount to simplify the triageProgress implementation. 589 kc.candidatesCount.Store(int64(len(candidates))) 590 kc.fuzzer.Store(fuzzerObj) 591 592 filtered := FilterCandidates(candidates, syscalls, false).Candidates 593 log.Logf(0, "%s: adding %d seeds", kc.name, len(filtered)) 594 fuzzerObj.AddCandidates(filtered) 595 596 go func() { 597 if !kc.cfg.Cover { 598 return 599 } 600 for { 601 select { 602 case <-time.After(time.Second): 603 case <-kc.ctx.Done(): 604 return 605 } 606 newSignal := fuzzerObj.Cover.GrabSignalDelta() 607 if len(newSignal) == 0 { 608 continue 609 } 610 kc.serv.DistributeSignalDelta(newSignal) 611 } 612 }() 613 return fuzzerObj 614 } 615 616 func (kc *kernelContext) CoverageFilter(modules []*vminfo.KernelModule) ([]uint64, error) { 617 kc.reportGenerator.Init(modules) 618 filters, err := PrepareCoverageFilters(kc.reportGenerator, kc.cfg, false) 619 if err != nil { 620 return nil, fmt.Errorf("failed to init coverage filter: %w", err) 621 } 622 kc.coverFilters = filters 623 for _, area := range filters.Areas { 624 log.Logf(0, "area %q: %d PCs in the cover filter", 625 area.Name, len(area.CoverPCs)) 626 } 627 log.Logf(0, "executor cover filter: %d PCs", len(filters.ExecutorFilter)) 628 if kc.http != nil { 629 kc.http.Cover.Store(&CoverageInfo{ 630 Modules: modules, 631 ReportGenerator: kc.reportGenerator, 632 CoverFilter: filters.ExecutorFilter, 633 }) 634 } 635 var pcs []uint64 636 for pc := range filters.ExecutorFilter { 637 pcs = append(pcs, pc) 638 } 639 return pcs, nil 640 } 641 642 func (kc *kernelContext) fuzzerInstance(ctx context.Context, inst *vm.Instance, updInfo dispatcher.UpdateInfo) { 643 index := inst.Index() 644 injectExec := make(chan bool, 10) 645 kc.serv.CreateInstance(index, injectExec, updInfo) 646 rep, err := kc.runInstance(ctx, inst, injectExec) 647 lastExec, _ := kc.serv.ShutdownInstance(index, rep != nil) 648 if rep != nil { 649 rpcserver.PrependExecuting(rep, lastExec) 650 select { 651 case kc.crashes <- rep: 652 case <-ctx.Done(): 653 } 654 } 655 if err != nil { 656 log.Errorf("#%d run failed: %s", inst.Index(), err) 657 } 658 } 659 660 func (kc *kernelContext) runInstance(ctx context.Context, inst *vm.Instance, 661 injectExec <-chan bool) (*report.Report, error) { 662 fwdAddr, err := inst.Forward(kc.serv.Port()) 663 if err != nil { 664 return nil, fmt.Errorf("failed to setup port forwarding: %w", err) 665 } 666 executorBin, err := inst.Copy(kc.cfg.ExecutorBin) 667 if err != nil { 668 return nil, fmt.Errorf("failed to copy binary: %w", err) 669 } 670 host, port, err := net.SplitHostPort(fwdAddr) 671 if err != nil { 672 return nil, fmt.Errorf("failed to parse manager's address") 673 } 674 cmd := fmt.Sprintf("%v runner %v %v %v", executorBin, inst.Index(), host, port) 675 ctxTimeout, cancel := context.WithTimeout(ctx, kc.cfg.Timeouts.VMRunningTime) 676 defer cancel() 677 _, reps, err := inst.Run(ctxTimeout, kc.reporter, cmd, 678 vm.WithExitCondition(vm.ExitTimeout), 679 vm.WithInjectExecuting(injectExec), 680 vm.WithEarlyFinishCb(func() { 681 // Depending on the crash type and kernel config, fuzzing may continue 682 // running for several seconds even after kernel has printed a crash report. 683 // This litters the log and we want to prevent it. 684 kc.serv.StopFuzzing(inst.Index()) 685 }), 686 ) 687 if len(reps) > 0 { 688 return reps[0], err 689 } 690 return nil, err 691 } 692 693 func (kc *kernelContext) triageProgress() float64 { 694 fuzzer := kc.fuzzer.Load() 695 if fuzzer == nil { 696 return 0 697 } 698 total := kc.candidatesCount.Load() 699 if total == 0.0 { 700 // There were no candidates in the first place. 701 return 1 702 } 703 return 1.0 - float64(fuzzer.CandidatesToTriage())/float64(total) 704 } 705 706 func (kc *kernelContext) progsPerArea() map[string]int { 707 fuzzer := kc.fuzzer.Load() 708 if fuzzer == nil { 709 return nil 710 } 711 return fuzzer.Config.Corpus.ProgsPerArea() 712 } 713 714 // reproRunner is used to run reproducers on the base kernel to determine whether it is affected. 715 type reproRunner struct { 716 done chan reproRunnerResult 717 running atomic.Int64 718 kernel *kernelContext 719 } 720 721 type reproRunnerResult struct { 722 reproReport *report.Report 723 crashReport *report.Report 724 repro *repro.Result 725 fullRepro bool // whether this was a full reproduction 726 } 727 728 const ( 729 // We want to avoid false positives as much as possible, so let's use 730 // a stricter relibability cut-off than what's used inside pkg/repro. 731 reliabilityCutOff = 0.4 732 // 80% reliability x 3 runs is a 0.8% chance of false positives. 733 // 6 runs at 40% reproducibility gives a ~4% false positive chance. 734 reliabilityThreshold = 0.8 735 ) 736 737 // Run executes the reproducer 3 times with slightly different options. 738 // The objective is to verify whether the bug triggered by the reproducer affects the base kernel. 739 // To avoid reporting false positives, the function does not require the kernel to crash with exactly 740 // the same crash title as in the original crash report. Any single crash is accepted. 741 // The result is sent back over the rr.done channel. 742 func (rr *reproRunner) Run(ctx context.Context, r *repro.Result, fullRepro bool) { 743 if r.Reliability < reliabilityCutOff { 744 log.Logf(1, "%s: repro is too unreliable, skipping", r.Report.Title) 745 return 746 } 747 needRuns := 3 748 if r.Reliability < reliabilityThreshold { 749 needRuns = 6 750 } 751 752 pool := rr.kernel.pool 753 cnt := int(rr.running.Add(1)) 754 pool.ReserveForRun(min(cnt, pool.Total())) 755 defer func() { 756 cnt := int(rr.running.Add(-1)) 757 rr.kernel.pool.ReserveForRun(min(cnt, pool.Total())) 758 }() 759 760 ret := reproRunnerResult{reproReport: r.Report, repro: r, fullRepro: fullRepro} 761 for doneRuns := 0; doneRuns < needRuns; { 762 if ctx.Err() != nil { 763 return 764 } 765 opts := r.Opts 766 opts.Repeat = true 767 if doneRuns%3 != 2 { 768 // Two times out of 3, test with Threaded=true. 769 // The third time we leave it as it was in the reproducer (in case it was important). 770 opts.Threaded = true 771 } 772 var err error 773 var result *instance.RunResult 774 runErr := pool.Run(ctx, func(ctx context.Context, inst *vm.Instance, updInfo dispatcher.UpdateInfo) { 775 var ret *instance.ExecProgInstance 776 ret, err = instance.SetupExecProg(inst, rr.kernel.cfg, rr.kernel.reporter, nil) 777 if err != nil { 778 return 779 } 780 result, err = ret.RunSyzProg(instance.ExecParams{ 781 SyzProg: r.Prog.Serialize(), 782 Duration: max(r.Duration, time.Minute), 783 Opts: opts, 784 }) 785 }) 786 logPrefix := fmt.Sprintf("attempt #%d to run %q on base", doneRuns, ret.reproReport.Title) 787 if errors.Is(runErr, context.Canceled) { 788 // Just exit without sending anything over the channel. 789 log.Logf(1, "%s: aborting due to context cancelation", logPrefix) 790 return 791 } else if runErr != nil || err != nil { 792 log.Logf(1, "%s: skipping due to errors: %v / %v", logPrefix, runErr, err) 793 continue 794 } 795 doneRuns++ 796 if result != nil && result.Report != nil { 797 log.Logf(1, "%s: crashed with %s", logPrefix, result.Report.Title) 798 ret.crashReport = result.Report 799 break 800 } else { 801 log.Logf(1, "%s: did not crash", logPrefix) 802 } 803 } 804 select { 805 case rr.done <- ret: 806 case <-ctx.Done(): 807 } 808 } 809 810 const ( 811 symbolsArea = "symbols" 812 filesArea = "files" 813 includesArea = "included" 814 ) 815 816 func PatchFocusAreas(cfg *mgrconfig.Config, gitPatches [][]byte, baseHashes, patchedHashes map[string]string) { 817 funcs := modifiedSymbols(baseHashes, patchedHashes) 818 if len(funcs) > 0 { 819 log.Logf(0, "adding modified_functions to focus areas: %q", funcs) 820 cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas, 821 mgrconfig.FocusArea{ 822 Name: symbolsArea, 823 Filter: mgrconfig.CovFilterCfg{ 824 Functions: funcs, 825 }, 826 Weight: 6.0, 827 }) 828 } 829 830 direct, transitive := affectedFiles(cfg, gitPatches) 831 if len(direct) > 0 { 832 sort.Strings(direct) 833 log.Logf(0, "adding directly modified files to focus areas: %q", direct) 834 cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas, 835 mgrconfig.FocusArea{ 836 Name: filesArea, 837 Filter: mgrconfig.CovFilterCfg{ 838 Files: direct, 839 }, 840 Weight: 3.0, 841 }) 842 } 843 844 if len(transitive) > 0 { 845 sort.Strings(transitive) 846 log.Logf(0, "adding transitively affected to focus areas: %q", transitive) 847 cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas, 848 mgrconfig.FocusArea{ 849 Name: includesArea, 850 Filter: mgrconfig.CovFilterCfg{ 851 Files: transitive, 852 }, 853 Weight: 2.0, 854 }) 855 } 856 857 // Still fuzz the rest of the kernel. 858 if len(cfg.Experimental.FocusAreas) > 0 { 859 cfg.Experimental.FocusAreas = append(cfg.Experimental.FocusAreas, 860 mgrconfig.FocusArea{ 861 Weight: 1.0, 862 }) 863 } 864 } 865 866 func affectedFiles(cfg *mgrconfig.Config, gitPatches [][]byte) (direct, transitive []string) { 867 const maxAffectedByHeader = 50 868 869 directMap := make(map[string]struct{}) 870 transitiveMap := make(map[string]struct{}) 871 var allFiles []string 872 for _, patch := range gitPatches { 873 allFiles = append(allFiles, vcs.ParseGitDiff(patch)...) 874 } 875 for _, file := range allFiles { 876 directMap[file] = struct{}{} 877 if strings.HasSuffix(file, ".h") && cfg.KernelSrc != "" { 878 // For .h files, we want to determine all the .c files that include them. 879 // Ideally, we should combine this with the recompilation process - then we know 880 // exactly which files were affected by the patch. 881 matching, err := osutil.GrepFiles(cfg.KernelSrc, `.c`, 882 []byte(`<`+strings.TrimPrefix(file, "include/")+`>`)) 883 if err != nil { 884 log.Logf(0, "failed to grep for includes: %s", err) 885 continue 886 } 887 if len(matching) >= maxAffectedByHeader { 888 // It's too widespread. It won't help us focus on anything. 889 log.Logf(0, "the header %q is included in too many files (%d)", file, len(matching)) 890 continue 891 } 892 for _, name := range matching { 893 transitiveMap[name] = struct{}{} 894 } 895 } 896 } 897 for name := range directMap { 898 direct = append(direct, name) 899 } 900 for name := range transitiveMap { 901 if _, ok := directMap[name]; ok { 902 continue 903 } 904 transitive = append(transitive, name) 905 } 906 return 907 } 908 909 // If there are too many different symbols, they are no longer specific enough. 910 // Don't use them to focus the fuzzer. 911 const modifiedSymbolThreshold = 0.05 912 913 func modifiedSymbols(baseHashes, patchedHashes map[string]string) []string { 914 var ret []string 915 for name, hash := range patchedHashes { 916 if baseHash, ok := baseHashes[name]; !ok || baseHash != hash { 917 ret = append(ret, name) 918 if float64(len(ret)) > float64(len(patchedHashes))*modifiedSymbolThreshold { 919 return nil 920 } 921 } 922 } 923 sort.Strings(ret) 924 return ret 925 }