github.com/bir3/gocompiler@v0.9.2202/src/internal/fuzz/fuzz.go (about) 1 // Copyright 2020 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package fuzz provides common fuzzing functionality for tests built with 6 // "go test" and for programs that use fuzzing functionality in the testing 7 // package. 8 package fuzz 9 10 import ( 11 "bytes" 12 "context" 13 "crypto/sha256" 14 "errors" 15 "fmt" 16 "github.com/bir3/gocompiler/src/internal/godebug" 17 "io" 18 "math/bits" 19 "os" 20 "path/filepath" 21 "reflect" 22 "runtime" 23 "strings" 24 "time" 25 ) 26 27 // CoordinateFuzzingOpts is a set of arguments for CoordinateFuzzing. 28 // The zero value is valid for each field unless specified otherwise. 29 type CoordinateFuzzingOpts struct { 30 // Log is a writer for logging progress messages and warnings. 31 // If nil, io.Discard will be used instead. 32 Log io.Writer 33 34 // Timeout is the amount of wall clock time to spend fuzzing after the corpus 35 // has loaded. If zero, there will be no time limit. 36 Timeout time.Duration 37 38 // Limit is the number of random values to generate and test. If zero, 39 // there will be no limit on the number of generated values. 40 Limit int64 41 42 // MinimizeTimeout is the amount of wall clock time to spend minimizing 43 // after discovering a crasher. If zero, there will be no time limit. If 44 // MinimizeTimeout and MinimizeLimit are both zero, then minimization will 45 // be disabled. 46 MinimizeTimeout time.Duration 47 48 // MinimizeLimit is the maximum number of calls to the fuzz function to be 49 // made while minimizing after finding a crash. If zero, there will be no 50 // limit. Calls to the fuzz function made when minimizing also count toward 51 // Limit. If MinimizeTimeout and MinimizeLimit are both zero, then 52 // minimization will be disabled. 53 MinimizeLimit int64 54 55 // parallel is the number of worker processes to run in parallel. If zero, 56 // CoordinateFuzzing will run GOMAXPROCS workers. 57 Parallel int 58 59 // Seed is a list of seed values added by the fuzz target with testing.F.Add 60 // and in testdata. 61 Seed []CorpusEntry 62 63 // Types is the list of types which make up a corpus entry. 64 // Types must be set and must match values in Seed. 65 Types []reflect.Type 66 67 // CorpusDir is a directory where files containing values that crash the 68 // code being tested may be written. CorpusDir must be set. 69 CorpusDir string 70 71 // CacheDir is a directory containing additional "interesting" values. 72 // The fuzzer may derive new values from these, and may write new values here. 73 CacheDir string 74 } 75 76 // CoordinateFuzzing creates several worker processes and communicates with 77 // them to test random inputs that could trigger crashes and expose bugs. 78 // The worker processes run the same binary in the same directory with the 79 // same environment variables as the coordinator process. Workers also run 80 // with the same arguments as the coordinator, except with the -test.fuzzworker 81 // flag prepended to the argument list. 82 // 83 // If a crash occurs, the function will return an error containing information 84 // about the crash, which can be reported to the user. 85 func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err error) { 86 if err := ctx.Err(); err != nil { 87 return err 88 } 89 if opts.Log == nil { 90 opts.Log = io.Discard 91 } 92 if opts.Parallel == 0 { 93 opts.Parallel = runtime.GOMAXPROCS(0) 94 } 95 if opts.Limit > 0 && int64(opts.Parallel) > opts.Limit { 96 // Don't start more workers than we need. 97 opts.Parallel = int(opts.Limit) 98 } 99 100 c, err := newCoordinator(opts) 101 if err != nil { 102 return err 103 } 104 105 if opts.Timeout > 0 { 106 var cancel func() 107 ctx, cancel = context.WithTimeout(ctx, opts.Timeout) 108 defer cancel() 109 } 110 111 // fuzzCtx is used to stop workers, for example, after finding a crasher. 112 fuzzCtx, cancelWorkers := context.WithCancel(ctx) 113 defer cancelWorkers() 114 doneC := ctx.Done() 115 116 // stop is called when a worker encounters a fatal error. 117 var fuzzErr error 118 stopping := false 119 stop := func(err error) { 120 if shouldPrintDebugInfo() { 121 _, file, line, ok := runtime.Caller(1) 122 if ok { 123 c.debugLogf("stop called at %s:%d. stopping: %t", file, line, stopping) 124 } else { 125 c.debugLogf("stop called at unknown. stopping: %t", stopping) 126 } 127 } 128 129 if err == fuzzCtx.Err() || isInterruptError(err) { 130 // Suppress cancellation errors and terminations due to SIGINT. 131 // The messages are not helpful since either the user triggered the error 132 // (with ^C) or another more helpful message will be printed (a crasher). 133 err = nil 134 } 135 if err != nil && (fuzzErr == nil || fuzzErr == ctx.Err()) { 136 fuzzErr = err 137 } 138 if stopping { 139 return 140 } 141 stopping = true 142 cancelWorkers() 143 doneC = nil 144 } 145 146 // Ensure that any crash we find is written to the corpus, even if an error 147 // or interruption occurs while minimizing it. 148 crashWritten := false 149 defer func() { 150 if c.crashMinimizing == nil || crashWritten { 151 return 152 } 153 werr := writeToCorpus(&c.crashMinimizing.entry, opts.CorpusDir) 154 if werr != nil { 155 err = fmt.Errorf("%w\n%v", err, werr) 156 return 157 } 158 if err == nil { 159 err = &crashError{ 160 path: c.crashMinimizing.entry.Path, 161 err: errors.New(c.crashMinimizing.crasherMsg), 162 } 163 } 164 }() 165 166 // Start workers. 167 // TODO(jayconrod): do we want to support fuzzing different binaries? 168 dir := "" // same as self 169 binPath := os.Args[0] 170 args := append([]string{"-test.fuzzworker"}, os.Args[1:]...) 171 env := os.Environ() // same as self 172 173 errC := make(chan error) 174 workers := make([]*worker, opts.Parallel) 175 for i := range workers { 176 var err error 177 workers[i], err = newWorker(c, dir, binPath, args, env) 178 if err != nil { 179 return err 180 } 181 } 182 for i := range workers { 183 w := workers[i] 184 go func() { 185 err := w.coordinate(fuzzCtx) 186 if fuzzCtx.Err() != nil || isInterruptError(err) { 187 err = nil 188 } 189 cleanErr := w.cleanup() 190 if err == nil { 191 err = cleanErr 192 } 193 errC <- err 194 }() 195 } 196 197 // Main event loop. 198 // Do not return until all workers have terminated. We avoid a deadlock by 199 // receiving messages from workers even after ctx is cancelled. 200 activeWorkers := len(workers) 201 statTicker := time.NewTicker(3 * time.Second) 202 defer statTicker.Stop() 203 defer c.logStats() 204 205 c.logStats() 206 for { 207 // If there is an execution limit, and we've reached it, stop. 208 if c.opts.Limit > 0 && c.count >= c.opts.Limit { 209 stop(nil) 210 } 211 212 var inputC chan fuzzInput 213 input, ok := c.peekInput() 214 if ok && c.crashMinimizing == nil && !stopping { 215 inputC = c.inputC 216 } 217 218 var minimizeC chan fuzzMinimizeInput 219 minimizeInput, ok := c.peekMinimizeInput() 220 if ok && !stopping { 221 minimizeC = c.minimizeC 222 } 223 224 select { 225 case <-doneC: 226 // Interrupted, cancelled, or timed out. 227 // stop sets doneC to nil so we don't busy wait here. 228 stop(ctx.Err()) 229 230 case err := <-errC: 231 // A worker terminated, possibly after encountering a fatal error. 232 stop(err) 233 activeWorkers-- 234 if activeWorkers == 0 { 235 return fuzzErr 236 } 237 238 case result := <-c.resultC: 239 // Received response from worker. 240 if stopping { 241 break 242 } 243 c.updateStats(result) 244 245 if result.crasherMsg != "" { 246 if c.warmupRun() && result.entry.IsSeed { 247 target := filepath.Base(c.opts.CorpusDir) 248 fmt.Fprintf(c.opts.Log, "failure while testing seed corpus entry: %s/%s\n", target, testName(result.entry.Parent)) 249 stop(errors.New(result.crasherMsg)) 250 break 251 } 252 if c.canMinimize() && result.canMinimize { 253 if c.crashMinimizing != nil { 254 // This crash is not minimized, and another crash is being minimized. 255 // Ignore this one and wait for the other one to finish. 256 if shouldPrintDebugInfo() { 257 c.debugLogf("found unminimized crasher, skipping in favor of minimizable crasher") 258 } 259 break 260 } 261 // Found a crasher but haven't yet attempted to minimize it. 262 // Send it back to a worker for minimization. Disable inputC so 263 // other workers don't continue fuzzing. 264 c.crashMinimizing = &result 265 fmt.Fprintf(c.opts.Log, "fuzz: minimizing %d-byte failing input file\n", len(result.entry.Data)) 266 c.queueForMinimization(result, nil) 267 } else if !crashWritten { 268 // Found a crasher that's either minimized or not minimizable. 269 // Write to corpus and stop. 270 err := writeToCorpus(&result.entry, opts.CorpusDir) 271 if err == nil { 272 crashWritten = true 273 err = &crashError{ 274 path: result.entry.Path, 275 err: errors.New(result.crasherMsg), 276 } 277 } 278 if shouldPrintDebugInfo() { 279 c.debugLogf( 280 "found crasher, id: %s, parent: %s, gen: %d, size: %d, exec time: %s", 281 result.entry.Path, 282 result.entry.Parent, 283 result.entry.Generation, 284 len(result.entry.Data), 285 result.entryDuration, 286 ) 287 } 288 stop(err) 289 } 290 } else if result.coverageData != nil { 291 if c.warmupRun() { 292 if shouldPrintDebugInfo() { 293 c.debugLogf( 294 "processed an initial input, id: %s, new bits: %d, size: %d, exec time: %s", 295 result.entry.Parent, 296 countBits(diffCoverage(c.coverageMask, result.coverageData)), 297 len(result.entry.Data), 298 result.entryDuration, 299 ) 300 } 301 c.updateCoverage(result.coverageData) 302 c.warmupInputLeft-- 303 if c.warmupInputLeft == 0 { 304 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel) 305 if shouldPrintDebugInfo() { 306 c.debugLogf( 307 "finished processing input corpus, entries: %d, initial coverage bits: %d", 308 len(c.corpus.entries), 309 countBits(c.coverageMask), 310 ) 311 } 312 } 313 } else if keepCoverage := diffCoverage(c.coverageMask, result.coverageData); keepCoverage != nil { 314 // Found a value that expanded coverage. 315 // It's not a crasher, but we may want to add it to the on-disk 316 // corpus and prioritize it for future fuzzing. 317 // TODO(jayconrod, katiehockman): Prioritize fuzzing these 318 // values which expanded coverage, perhaps based on the 319 // number of new edges that this result expanded. 320 // TODO(jayconrod, katiehockman): Don't write a value that's already 321 // in the corpus. 322 if c.canMinimize() && result.canMinimize && c.crashMinimizing == nil { 323 // Send back to workers to find a smaller value that preserves 324 // at least one new coverage bit. 325 c.queueForMinimization(result, keepCoverage) 326 } else { 327 // Update the coordinator's coverage mask and save the value. 328 inputSize := len(result.entry.Data) 329 entryNew, err := c.addCorpusEntries(true, result.entry) 330 if err != nil { 331 stop(err) 332 break 333 } 334 if !entryNew { 335 if shouldPrintDebugInfo() { 336 c.debugLogf( 337 "ignoring duplicate input which increased coverage, id: %s", 338 result.entry.Path, 339 ) 340 } 341 break 342 } 343 c.updateCoverage(keepCoverage) 344 c.inputQueue.enqueue(result.entry) 345 c.interestingCount++ 346 if shouldPrintDebugInfo() { 347 c.debugLogf( 348 "new interesting input, id: %s, parent: %s, gen: %d, new bits: %d, total bits: %d, size: %d, exec time: %s", 349 result.entry.Path, 350 result.entry.Parent, 351 result.entry.Generation, 352 countBits(keepCoverage), 353 countBits(c.coverageMask), 354 inputSize, 355 result.entryDuration, 356 ) 357 } 358 } 359 } else { 360 if shouldPrintDebugInfo() { 361 c.debugLogf( 362 "worker reported interesting input that doesn't expand coverage, id: %s, parent: %s, canMinimize: %t", 363 result.entry.Path, 364 result.entry.Parent, 365 result.canMinimize, 366 ) 367 } 368 } 369 } else if c.warmupRun() { 370 // No error or coverage data was reported for this input during 371 // warmup, so continue processing results. 372 c.warmupInputLeft-- 373 if c.warmupInputLeft == 0 { 374 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel) 375 if shouldPrintDebugInfo() { 376 c.debugLogf( 377 "finished testing-only phase, entries: %d", 378 len(c.corpus.entries), 379 ) 380 } 381 } 382 } 383 384 case inputC <- input: 385 // Sent the next input to a worker. 386 c.sentInput(input) 387 388 case minimizeC <- minimizeInput: 389 // Sent the next input for minimization to a worker. 390 c.sentMinimizeInput(minimizeInput) 391 392 case <-statTicker.C: 393 c.logStats() 394 } 395 } 396 397 // TODO(jayconrod,katiehockman): if a crasher can't be written to the corpus, 398 // write to the cache instead. 399 } 400 401 // crashError wraps a crasher written to the seed corpus. It saves the name 402 // of the file where the input causing the crasher was saved. The testing 403 // framework uses this to report a command to re-run that specific input. 404 type crashError struct { 405 path string 406 err error 407 } 408 409 func (e *crashError) Error() string { 410 return e.err.Error() 411 } 412 413 func (e *crashError) Unwrap() error { 414 return e.err 415 } 416 417 func (e *crashError) CrashPath() string { 418 return e.path 419 } 420 421 type corpus struct { 422 entries []CorpusEntry 423 hashes map[[sha256.Size]byte]bool 424 } 425 426 // addCorpusEntries adds entries to the corpus, and optionally writes the entries 427 // to the cache directory. If an entry is already in the corpus it is skipped. If 428 // all of the entries are unique, addCorpusEntries returns true and a nil error, 429 // if at least one of the entries was a duplicate, it returns false and a nil error. 430 func (c *coordinator) addCorpusEntries(addToCache bool, entries ...CorpusEntry) (bool, error) { 431 noDupes := true 432 for _, e := range entries { 433 data, err := corpusEntryData(e) 434 if err != nil { 435 return false, err 436 } 437 h := sha256.Sum256(data) 438 if c.corpus.hashes[h] { 439 noDupes = false 440 continue 441 } 442 if addToCache { 443 if err := writeToCorpus(&e, c.opts.CacheDir); err != nil { 444 return false, err 445 } 446 // For entries written to disk, we don't hold onto the bytes, 447 // since the corpus would consume a significant amount of 448 // memory. 449 e.Data = nil 450 } 451 c.corpus.hashes[h] = true 452 c.corpus.entries = append(c.corpus.entries, e) 453 } 454 return noDupes, nil 455 } 456 457 // CorpusEntry represents an individual input for fuzzing. 458 // 459 // We must use an equivalent type in the testing and testing/internal/testdeps 460 // packages, but testing can't import this package directly, and we don't want 461 // to export this type from testing. Instead, we use the same struct type and 462 // use a type alias (not a defined type) for convenience. 463 type CorpusEntry = struct { 464 Parent string 465 466 // Path is the path of the corpus file, if the entry was loaded from disk. 467 // For other entries, including seed values provided by f.Add, Path is the 468 // name of the test, e.g. seed#0 or its hash. 469 Path string 470 471 // Data is the raw input data. Data should only be populated for seed 472 // values. For on-disk corpus files, Data will be nil, as it will be loaded 473 // from disk using Path. 474 Data []byte 475 476 // Values is the unmarshaled values from a corpus file. 477 Values []any 478 479 Generation int 480 481 // IsSeed indicates whether this entry is part of the seed corpus. 482 IsSeed bool 483 } 484 485 // corpusEntryData returns the raw input bytes, either from the data struct 486 // field, or from disk. 487 func corpusEntryData(ce CorpusEntry) ([]byte, error) { 488 if ce.Data != nil { 489 return ce.Data, nil 490 } 491 492 return os.ReadFile(ce.Path) 493 } 494 495 type fuzzInput struct { 496 // entry is the value to test initially. The worker will randomly mutate 497 // values from this starting point. 498 entry CorpusEntry 499 500 // timeout is the time to spend fuzzing variations of this input, 501 // not including starting or cleaning up. 502 timeout time.Duration 503 504 // limit is the maximum number of calls to the fuzz function the worker may 505 // make. The worker may make fewer calls, for example, if it finds an 506 // error early. If limit is zero, there is no limit on calls to the 507 // fuzz function. 508 limit int64 509 510 // warmup indicates whether this is a warmup input before fuzzing begins. If 511 // true, the input should not be fuzzed. 512 warmup bool 513 514 // coverageData reflects the coordinator's current coverageMask. 515 coverageData []byte 516 } 517 518 type fuzzResult struct { 519 // entry is an interesting value or a crasher. 520 entry CorpusEntry 521 522 // crasherMsg is an error message from a crash. It's "" if no crash was found. 523 crasherMsg string 524 525 // canMinimize is true if the worker should attempt to minimize this result. 526 // It may be false because an attempt has already been made. 527 canMinimize bool 528 529 // coverageData is set if the worker found new coverage. 530 coverageData []byte 531 532 // limit is the number of values the coordinator asked the worker 533 // to test. 0 if there was no limit. 534 limit int64 535 536 // count is the number of values the worker actually tested. 537 count int64 538 539 // totalDuration is the time the worker spent testing inputs. 540 totalDuration time.Duration 541 542 // entryDuration is the time the worker spent execution an interesting result 543 entryDuration time.Duration 544 } 545 546 type fuzzMinimizeInput struct { 547 // entry is an interesting value or crasher to minimize. 548 entry CorpusEntry 549 550 // crasherMsg is an error message from a crash. It's "" if no crash was found. 551 // If set, the worker will attempt to find a smaller input that also produces 552 // an error, though not necessarily the same error. 553 crasherMsg string 554 555 // limit is the maximum number of calls to the fuzz function the worker may 556 // make. The worker may make fewer calls, for example, if it can't reproduce 557 // an error. If limit is zero, there is no limit on calls to the fuzz function. 558 limit int64 559 560 // timeout is the time to spend minimizing this input. 561 // A zero timeout means no limit. 562 timeout time.Duration 563 564 // keepCoverage is a set of coverage bits that entry found that were not in 565 // the coordinator's combined set. When minimizing, the worker should find an 566 // input that preserves at least one of these bits. keepCoverage is nil for 567 // crashing inputs. 568 keepCoverage []byte 569 } 570 571 // coordinator holds channels that workers can use to communicate with 572 // the coordinator. 573 type coordinator struct { 574 opts CoordinateFuzzingOpts 575 576 // startTime is the time we started the workers after loading the corpus. 577 // Used for logging. 578 startTime time.Time 579 580 // inputC is sent values to fuzz by the coordinator. Any worker may receive 581 // values from this channel. Workers send results to resultC. 582 inputC chan fuzzInput 583 584 // minimizeC is sent values to minimize by the coordinator. Any worker may 585 // receive values from this channel. Workers send results to resultC. 586 minimizeC chan fuzzMinimizeInput 587 588 // resultC is sent results of fuzzing by workers. The coordinator 589 // receives these. Multiple types of messages are allowed. 590 resultC chan fuzzResult 591 592 // count is the number of values fuzzed so far. 593 count int64 594 595 // countLastLog is the number of values fuzzed when the output was last 596 // logged. 597 countLastLog int64 598 599 // timeLastLog is the time at which the output was last logged. 600 timeLastLog time.Time 601 602 // interestingCount is the number of unique interesting values which have 603 // been found this execution. 604 interestingCount int 605 606 // warmupInputCount is the count of all entries in the corpus which will 607 // need to be received from workers to run once during warmup, but not fuzz. 608 // This could be for coverage data, or only for the purposes of verifying 609 // that the seed corpus doesn't have any crashers. See warmupRun. 610 warmupInputCount int 611 612 // warmupInputLeft is the number of entries in the corpus which still need 613 // to be received from workers to run once during warmup, but not fuzz. 614 // See warmupInputLeft. 615 warmupInputLeft int 616 617 // duration is the time spent fuzzing inside workers, not counting time 618 // starting up or tearing down. 619 duration time.Duration 620 621 // countWaiting is the number of fuzzing executions the coordinator is 622 // waiting on workers to complete. 623 countWaiting int64 624 625 // corpus is a set of interesting values, including the seed corpus and 626 // generated values that workers reported as interesting. 627 corpus corpus 628 629 // minimizationAllowed is true if one or more of the types of fuzz 630 // function's parameters can be minimized. 631 minimizationAllowed bool 632 633 // inputQueue is a queue of inputs that workers should try fuzzing. This is 634 // initially populated from the seed corpus and cached inputs. More inputs 635 // may be added as new coverage is discovered. 636 inputQueue queue 637 638 // minimizeQueue is a queue of inputs that caused errors or exposed new 639 // coverage. Workers should attempt to find smaller inputs that do the 640 // same thing. 641 minimizeQueue queue 642 643 // crashMinimizing is the crash that is currently being minimized. 644 crashMinimizing *fuzzResult 645 646 // coverageMask aggregates coverage that was found for all inputs in the 647 // corpus. Each byte represents a single basic execution block. Each set bit 648 // within the byte indicates that an input has triggered that block at least 649 // 1 << n times, where n is the position of the bit in the byte. For example, a 650 // value of 12 indicates that separate inputs have triggered this block 651 // between 4-7 times and 8-15 times. 652 coverageMask []byte 653 } 654 655 func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) { 656 // Make sure all of the seed corpus has marshalled data. 657 for i := range opts.Seed { 658 if opts.Seed[i].Data == nil && opts.Seed[i].Values != nil { 659 opts.Seed[i].Data = marshalCorpusFile(opts.Seed[i].Values...) 660 } 661 } 662 c := &coordinator{ 663 opts: opts, 664 startTime: time.Now(), 665 inputC: make(chan fuzzInput), 666 minimizeC: make(chan fuzzMinimizeInput), 667 resultC: make(chan fuzzResult), 668 timeLastLog: time.Now(), 669 corpus: corpus{hashes: make(map[[sha256.Size]byte]bool)}, 670 } 671 if err := c.readCache(); err != nil { 672 return nil, err 673 } 674 if opts.MinimizeLimit > 0 || opts.MinimizeTimeout > 0 { 675 for _, t := range opts.Types { 676 if isMinimizable(t) { 677 c.minimizationAllowed = true 678 break 679 } 680 } 681 } 682 683 covSize := len(coverage()) 684 if covSize == 0 { 685 fmt.Fprintf(c.opts.Log, "warning: the test binary was not built with coverage instrumentation, so fuzzing will run without coverage guidance and may be inefficient\n") 686 // Even though a coverage-only run won't occur, we should still run all 687 // of the seed corpus to make sure there are no existing failures before 688 // we start fuzzing. 689 c.warmupInputCount = len(c.opts.Seed) 690 for _, e := range c.opts.Seed { 691 c.inputQueue.enqueue(e) 692 } 693 } else { 694 c.warmupInputCount = len(c.corpus.entries) 695 for _, e := range c.corpus.entries { 696 c.inputQueue.enqueue(e) 697 } 698 // Set c.coverageMask to a clean []byte full of zeros. 699 c.coverageMask = make([]byte, covSize) 700 } 701 c.warmupInputLeft = c.warmupInputCount 702 703 if len(c.corpus.entries) == 0 { 704 fmt.Fprintf(c.opts.Log, "warning: starting with empty corpus\n") 705 var vals []any 706 for _, t := range opts.Types { 707 vals = append(vals, zeroValue(t)) 708 } 709 data := marshalCorpusFile(vals...) 710 h := sha256.Sum256(data) 711 name := fmt.Sprintf("%x", h[:4]) 712 c.addCorpusEntries(false, CorpusEntry{Path: name, Data: data}) 713 } 714 715 return c, nil 716 } 717 718 func (c *coordinator) updateStats(result fuzzResult) { 719 c.count += result.count 720 c.countWaiting -= result.limit 721 c.duration += result.totalDuration 722 } 723 724 func (c *coordinator) logStats() { 725 now := time.Now() 726 if c.warmupRun() { 727 runSoFar := c.warmupInputCount - c.warmupInputLeft 728 if coverageEnabled { 729 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount) 730 } else { 731 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount) 732 } 733 } else if c.crashMinimizing != nil { 734 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, minimizing\n", c.elapsed()) 735 } else { 736 rate := float64(c.count-c.countLastLog) / now.Sub(c.timeLastLog).Seconds() 737 if coverageEnabled { 738 total := c.warmupInputCount + c.interestingCount 739 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec), new interesting: %d (total: %d)\n", c.elapsed(), c.count, rate, c.interestingCount, total) 740 } else { 741 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec)\n", c.elapsed(), c.count, rate) 742 } 743 } 744 c.countLastLog = c.count 745 c.timeLastLog = now 746 } 747 748 // peekInput returns the next value that should be sent to workers. 749 // If the number of executions is limited, the returned value includes 750 // a limit for one worker. If there are no executions left, peekInput returns 751 // a zero value and false. 752 // 753 // peekInput doesn't actually remove the input from the queue. The caller 754 // must call sentInput after sending the input. 755 // 756 // If the input queue is empty and the coverage/testing-only run has completed, 757 // queue refills it from the corpus. 758 func (c *coordinator) peekInput() (fuzzInput, bool) { 759 if c.opts.Limit > 0 && c.count+c.countWaiting >= c.opts.Limit { 760 // Already making the maximum number of calls to the fuzz function. 761 // Don't send more inputs right now. 762 return fuzzInput{}, false 763 } 764 if c.inputQueue.len == 0 { 765 if c.warmupRun() { 766 // Wait for coverage/testing-only run to finish before sending more 767 // inputs. 768 return fuzzInput{}, false 769 } 770 c.refillInputQueue() 771 } 772 773 entry, ok := c.inputQueue.peek() 774 if !ok { 775 panic("input queue empty after refill") 776 } 777 input := fuzzInput{ 778 entry: entry.(CorpusEntry), 779 timeout: workerFuzzDuration, 780 warmup: c.warmupRun(), 781 } 782 if c.coverageMask != nil { 783 input.coverageData = bytes.Clone(c.coverageMask) 784 } 785 if input.warmup { 786 // No fuzzing will occur, but it should count toward the limit set by 787 // -fuzztime. 788 input.limit = 1 789 return input, true 790 } 791 792 if c.opts.Limit > 0 { 793 input.limit = c.opts.Limit / int64(c.opts.Parallel) 794 if c.opts.Limit%int64(c.opts.Parallel) > 0 { 795 input.limit++ 796 } 797 remaining := c.opts.Limit - c.count - c.countWaiting 798 if input.limit > remaining { 799 input.limit = remaining 800 } 801 } 802 return input, true 803 } 804 805 // sentInput updates internal counters after an input is sent to c.inputC. 806 func (c *coordinator) sentInput(input fuzzInput) { 807 c.inputQueue.dequeue() 808 c.countWaiting += input.limit 809 } 810 811 // refillInputQueue refills the input queue from the corpus after it becomes 812 // empty. 813 func (c *coordinator) refillInputQueue() { 814 for _, e := range c.corpus.entries { 815 c.inputQueue.enqueue(e) 816 } 817 } 818 819 // queueForMinimization creates a fuzzMinimizeInput from result and adds it 820 // to the minimization queue to be sent to workers. 821 func (c *coordinator) queueForMinimization(result fuzzResult, keepCoverage []byte) { 822 if shouldPrintDebugInfo() { 823 c.debugLogf( 824 "queueing input for minimization, id: %s, parent: %s, keepCoverage: %t, crasher: %t", 825 result.entry.Path, 826 result.entry.Parent, 827 keepCoverage != nil, 828 result.crasherMsg != "", 829 ) 830 } 831 if result.crasherMsg != "" { 832 c.minimizeQueue.clear() 833 } 834 835 input := fuzzMinimizeInput{ 836 entry: result.entry, 837 crasherMsg: result.crasherMsg, 838 keepCoverage: keepCoverage, 839 } 840 c.minimizeQueue.enqueue(input) 841 } 842 843 // peekMinimizeInput returns the next input that should be sent to workers for 844 // minimization. 845 func (c *coordinator) peekMinimizeInput() (fuzzMinimizeInput, bool) { 846 if !c.canMinimize() { 847 // Already making the maximum number of calls to the fuzz function. 848 // Don't send more inputs right now. 849 return fuzzMinimizeInput{}, false 850 } 851 v, ok := c.minimizeQueue.peek() 852 if !ok { 853 return fuzzMinimizeInput{}, false 854 } 855 input := v.(fuzzMinimizeInput) 856 857 if c.opts.MinimizeTimeout > 0 { 858 input.timeout = c.opts.MinimizeTimeout 859 } 860 if c.opts.MinimizeLimit > 0 { 861 input.limit = c.opts.MinimizeLimit 862 } else if c.opts.Limit > 0 { 863 if input.crasherMsg != "" { 864 input.limit = c.opts.Limit 865 } else { 866 input.limit = c.opts.Limit / int64(c.opts.Parallel) 867 if c.opts.Limit%int64(c.opts.Parallel) > 0 { 868 input.limit++ 869 } 870 } 871 } 872 if c.opts.Limit > 0 { 873 remaining := c.opts.Limit - c.count - c.countWaiting 874 if input.limit > remaining { 875 input.limit = remaining 876 } 877 } 878 return input, true 879 } 880 881 // sentMinimizeInput removes an input from the minimization queue after it's 882 // sent to minimizeC. 883 func (c *coordinator) sentMinimizeInput(input fuzzMinimizeInput) { 884 c.minimizeQueue.dequeue() 885 c.countWaiting += input.limit 886 } 887 888 // warmupRun returns true while the coordinator is running inputs without 889 // mutating them as a warmup before fuzzing. This could be to gather baseline 890 // coverage data for entries in the corpus, or to test all of the seed corpus 891 // for errors before fuzzing begins. 892 // 893 // The coordinator doesn't store coverage data in the cache with each input 894 // because that data would be invalid when counter offsets in the test binary 895 // change. 896 // 897 // When gathering coverage, the coordinator sends each entry to a worker to 898 // gather coverage for that entry only, without fuzzing or minimizing. This 899 // phase ends when all workers have finished, and the coordinator has a combined 900 // coverage map. 901 func (c *coordinator) warmupRun() bool { 902 return c.warmupInputLeft > 0 903 } 904 905 // updateCoverage sets bits in c.coverageMask that are set in newCoverage. 906 // updateCoverage returns the number of newly set bits. See the comment on 907 // coverageMask for the format. 908 func (c *coordinator) updateCoverage(newCoverage []byte) int { 909 if len(newCoverage) != len(c.coverageMask) { 910 panic(fmt.Sprintf("number of coverage counters changed at runtime: %d, expected %d", len(newCoverage), len(c.coverageMask))) 911 } 912 newBitCount := 0 913 for i := range newCoverage { 914 diff := newCoverage[i] &^ c.coverageMask[i] 915 newBitCount += bits.OnesCount8(diff) 916 c.coverageMask[i] |= newCoverage[i] 917 } 918 return newBitCount 919 } 920 921 // canMinimize returns whether the coordinator should attempt to find smaller 922 // inputs that reproduce a crash or new coverage. 923 func (c *coordinator) canMinimize() bool { 924 return c.minimizationAllowed && 925 (c.opts.Limit == 0 || c.count+c.countWaiting < c.opts.Limit) 926 } 927 928 func (c *coordinator) elapsed() time.Duration { 929 return time.Since(c.startTime).Round(1 * time.Second) 930 } 931 932 // readCache creates a combined corpus from seed values and values in the cache 933 // (in GOCACHE/fuzz). 934 // 935 // TODO(fuzzing): need a mechanism that can remove values that 936 // aren't useful anymore, for example, because they have the wrong type. 937 func (c *coordinator) readCache() error { 938 if _, err := c.addCorpusEntries(false, c.opts.Seed...); err != nil { 939 return err 940 } 941 entries, err := ReadCorpus(c.opts.CacheDir, c.opts.Types) 942 if err != nil { 943 if _, ok := err.(*MalformedCorpusError); !ok { 944 // It's okay if some files in the cache directory are malformed and 945 // are not included in the corpus, but fail if it's an I/O error. 946 return err 947 } 948 // TODO(jayconrod,katiehockman): consider printing some kind of warning 949 // indicating the number of files which were skipped because they are 950 // malformed. 951 } 952 if _, err := c.addCorpusEntries(false, entries...); err != nil { 953 return err 954 } 955 return nil 956 } 957 958 // MalformedCorpusError is an error found while reading the corpus from the 959 // filesystem. All of the errors are stored in the errs list. The testing 960 // framework uses this to report malformed files in testdata. 961 type MalformedCorpusError struct { 962 errs []error 963 } 964 965 func (e *MalformedCorpusError) Error() string { 966 var msgs []string 967 for _, s := range e.errs { 968 msgs = append(msgs, s.Error()) 969 } 970 return strings.Join(msgs, "\n") 971 } 972 973 // ReadCorpus reads the corpus from the provided dir. The returned corpus 974 // entries are guaranteed to match the given types. Any malformed files will 975 // be saved in a MalformedCorpusError and returned, along with the most recent 976 // error. 977 func ReadCorpus(dir string, types []reflect.Type) ([]CorpusEntry, error) { 978 files, err := os.ReadDir(dir) 979 if os.IsNotExist(err) { 980 return nil, nil // No corpus to read 981 } else if err != nil { 982 return nil, fmt.Errorf("reading seed corpus from testdata: %v", err) 983 } 984 var corpus []CorpusEntry 985 var errs []error 986 for _, file := range files { 987 // TODO(jayconrod,katiehockman): determine when a file is a fuzzing input 988 // based on its name. We should only read files created by writeToCorpus. 989 // If we read ALL files, we won't be able to change the file format by 990 // changing the extension. We also won't be able to add files like 991 // README.txt explaining why the directory exists. 992 if file.IsDir() { 993 continue 994 } 995 filename := filepath.Join(dir, file.Name()) 996 data, err := os.ReadFile(filename) 997 if err != nil { 998 return nil, fmt.Errorf("failed to read corpus file: %v", err) 999 } 1000 var vals []any 1001 vals, err = readCorpusData(data, types) 1002 if err != nil { 1003 errs = append(errs, fmt.Errorf("%q: %v", filename, err)) 1004 continue 1005 } 1006 corpus = append(corpus, CorpusEntry{Path: filename, Values: vals}) 1007 } 1008 if len(errs) > 0 { 1009 return corpus, &MalformedCorpusError{errs: errs} 1010 } 1011 return corpus, nil 1012 } 1013 1014 func readCorpusData(data []byte, types []reflect.Type) ([]any, error) { 1015 vals, err := unmarshalCorpusFile(data) 1016 if err != nil { 1017 return nil, fmt.Errorf("unmarshal: %v", err) 1018 } 1019 if err = CheckCorpus(vals, types); err != nil { 1020 return nil, err 1021 } 1022 return vals, nil 1023 } 1024 1025 // CheckCorpus verifies that the types in vals match the expected types 1026 // provided. 1027 func CheckCorpus(vals []any, types []reflect.Type) error { 1028 if len(vals) != len(types) { 1029 return fmt.Errorf("wrong number of values in corpus entry: %d, want %d", len(vals), len(types)) 1030 } 1031 valsT := make([]reflect.Type, len(vals)) 1032 for valsI, v := range vals { 1033 valsT[valsI] = reflect.TypeOf(v) 1034 } 1035 for i := range types { 1036 if valsT[i] != types[i] { 1037 return fmt.Errorf("mismatched types in corpus entry: %v, want %v", valsT, types) 1038 } 1039 } 1040 return nil 1041 } 1042 1043 // writeToCorpus atomically writes the given bytes to a new file in testdata. If 1044 // the directory does not exist, it will create one. If the file already exists, 1045 // writeToCorpus will not rewrite it. writeToCorpus sets entry.Path to the new 1046 // file that was just written or an error if it failed. 1047 func writeToCorpus(entry *CorpusEntry, dir string) (err error) { 1048 sum := fmt.Sprintf("%x", sha256.Sum256(entry.Data))[:16] 1049 entry.Path = filepath.Join(dir, sum) 1050 if err := os.MkdirAll(dir, 0777); err != nil { 1051 return err 1052 } 1053 if err := os.WriteFile(entry.Path, entry.Data, 0666); err != nil { 1054 os.Remove(entry.Path) // remove partially written file 1055 return err 1056 } 1057 return nil 1058 } 1059 1060 func testName(path string) string { 1061 return filepath.Base(path) 1062 } 1063 1064 func zeroValue(t reflect.Type) any { 1065 for _, v := range zeroVals { 1066 if reflect.TypeOf(v) == t { 1067 return v 1068 } 1069 } 1070 panic(fmt.Sprintf("unsupported type: %v", t)) 1071 } 1072 1073 var zeroVals []any = []any{ 1074 []byte(""), 1075 string(""), 1076 false, 1077 byte(0), 1078 rune(0), 1079 float32(0), 1080 float64(0), 1081 int(0), 1082 int8(0), 1083 int16(0), 1084 int32(0), 1085 int64(0), 1086 uint(0), 1087 uint8(0), 1088 uint16(0), 1089 uint32(0), 1090 uint64(0), 1091 } 1092 1093 var debugInfo = godebug.New("#fuzzdebug").Value() == "1" 1094 1095 func shouldPrintDebugInfo() bool { 1096 return debugInfo 1097 } 1098 1099 func (c *coordinator) debugLogf(format string, args ...any) { 1100 t := time.Now().Format("2006-01-02 15:04:05.999999999") 1101 fmt.Fprintf(c.opts.Log, t+" DEBUG "+format+"\n", args...) 1102 }