github.com/JimmyHuang454/JLS-go@v0.0.0-20230831150107-90d536585ba0/internal/fuzz/fuzz.go (about) 1 // Copyright 2020 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package fuzz provides common fuzzing functionality for tests built with 6 // "go test" and for programs that use fuzzing functionality in the testing 7 // package. 8 package fuzz 9 10 import ( 11 "bytes" 12 "context" 13 "crypto/sha256" 14 "errors" 15 "fmt" 16 "internal/godebug" 17 "io" 18 "math/bits" 19 "os" 20 "path/filepath" 21 "reflect" 22 "runtime" 23 "strings" 24 "time" 25 ) 26 27 // CoordinateFuzzingOpts is a set of arguments for CoordinateFuzzing. 28 // The zero value is valid for each field unless specified otherwise. 29 type CoordinateFuzzingOpts struct { 30 // Log is a writer for logging progress messages and warnings. 31 // If nil, io.Discard will be used instead. 32 Log io.Writer 33 34 // Timeout is the amount of wall clock time to spend fuzzing after the corpus 35 // has loaded. If zero, there will be no time limit. 36 Timeout time.Duration 37 38 // Limit is the number of random values to generate and test. If zero, 39 // there will be no limit on the number of generated values. 40 Limit int64 41 42 // MinimizeTimeout is the amount of wall clock time to spend minimizing 43 // after discovering a crasher. If zero, there will be no time limit. If 44 // MinimizeTimeout and MinimizeLimit are both zero, then minimization will 45 // be disabled. 46 MinimizeTimeout time.Duration 47 48 // MinimizeLimit is the maximum number of calls to the fuzz function to be 49 // made while minimizing after finding a crash. If zero, there will be no 50 // limit. Calls to the fuzz function made when minimizing also count toward 51 // Limit. If MinimizeTimeout and MinimizeLimit are both zero, then 52 // minimization will be disabled. 53 MinimizeLimit int64 54 55 // parallel is the number of worker processes to run in parallel. If zero, 56 // CoordinateFuzzing will run GOMAXPROCS workers. 57 Parallel int 58 59 // Seed is a list of seed values added by the fuzz target with testing.F.Add 60 // and in testdata. 61 Seed []CorpusEntry 62 63 // Types is the list of types which make up a corpus entry. 64 // Types must be set and must match values in Seed. 65 Types []reflect.Type 66 67 // CorpusDir is a directory where files containing values that crash the 68 // code being tested may be written. CorpusDir must be set. 69 CorpusDir string 70 71 // CacheDir is a directory containing additional "interesting" values. 72 // The fuzzer may derive new values from these, and may write new values here. 73 CacheDir string 74 } 75 76 // CoordinateFuzzing creates several worker processes and communicates with 77 // them to test random inputs that could trigger crashes and expose bugs. 78 // The worker processes run the same binary in the same directory with the 79 // same environment variables as the coordinator process. Workers also run 80 // with the same arguments as the coordinator, except with the -test.fuzzworker 81 // flag prepended to the argument list. 82 // 83 // If a crash occurs, the function will return an error containing information 84 // about the crash, which can be reported to the user. 85 func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err error) { 86 if err := ctx.Err(); err != nil { 87 return err 88 } 89 if opts.Log == nil { 90 opts.Log = io.Discard 91 } 92 if opts.Parallel == 0 { 93 opts.Parallel = runtime.GOMAXPROCS(0) 94 } 95 if opts.Limit > 0 && int64(opts.Parallel) > opts.Limit { 96 // Don't start more workers than we need. 97 opts.Parallel = int(opts.Limit) 98 } 99 100 c, err := newCoordinator(opts) 101 if err != nil { 102 return err 103 } 104 105 if opts.Timeout > 0 { 106 var cancel func() 107 ctx, cancel = context.WithTimeout(ctx, opts.Timeout) 108 defer cancel() 109 } 110 111 // fuzzCtx is used to stop workers, for example, after finding a crasher. 112 fuzzCtx, cancelWorkers := context.WithCancel(ctx) 113 defer cancelWorkers() 114 doneC := ctx.Done() 115 116 // stop is called when a worker encounters a fatal error. 117 var fuzzErr error 118 stopping := false 119 stop := func(err error) { 120 if err == fuzzCtx.Err() || isInterruptError(err) { 121 // Suppress cancellation errors and terminations due to SIGINT. 122 // The messages are not helpful since either the user triggered the error 123 // (with ^C) or another more helpful message will be printed (a crasher). 124 err = nil 125 } 126 if err != nil && (fuzzErr == nil || fuzzErr == ctx.Err()) { 127 fuzzErr = err 128 } 129 if stopping { 130 return 131 } 132 stopping = true 133 cancelWorkers() 134 doneC = nil 135 } 136 137 // Ensure that any crash we find is written to the corpus, even if an error 138 // or interruption occurs while minimizing it. 139 crashWritten := false 140 defer func() { 141 if c.crashMinimizing == nil || crashWritten { 142 return 143 } 144 werr := writeToCorpus(&c.crashMinimizing.entry, opts.CorpusDir) 145 if werr != nil { 146 err = fmt.Errorf("%w\n%v", err, werr) 147 return 148 } 149 if err == nil { 150 err = &crashError{ 151 path: c.crashMinimizing.entry.Path, 152 err: errors.New(c.crashMinimizing.crasherMsg), 153 } 154 } 155 }() 156 157 // Start workers. 158 // TODO(jayconrod): do we want to support fuzzing different binaries? 159 dir := "" // same as self 160 binPath := os.Args[0] 161 args := append([]string{"-test.fuzzworker"}, os.Args[1:]...) 162 env := os.Environ() // same as self 163 164 errC := make(chan error) 165 workers := make([]*worker, opts.Parallel) 166 for i := range workers { 167 var err error 168 workers[i], err = newWorker(c, dir, binPath, args, env) 169 if err != nil { 170 return err 171 } 172 } 173 for i := range workers { 174 w := workers[i] 175 go func() { 176 err := w.coordinate(fuzzCtx) 177 if fuzzCtx.Err() != nil || isInterruptError(err) { 178 err = nil 179 } 180 cleanErr := w.cleanup() 181 if err == nil { 182 err = cleanErr 183 } 184 errC <- err 185 }() 186 } 187 188 // Main event loop. 189 // Do not return until all workers have terminated. We avoid a deadlock by 190 // receiving messages from workers even after ctx is cancelled. 191 activeWorkers := len(workers) 192 statTicker := time.NewTicker(3 * time.Second) 193 defer statTicker.Stop() 194 defer c.logStats() 195 196 c.logStats() 197 for { 198 var inputC chan fuzzInput 199 input, ok := c.peekInput() 200 if ok && c.crashMinimizing == nil && !stopping { 201 inputC = c.inputC 202 } 203 204 var minimizeC chan fuzzMinimizeInput 205 minimizeInput, ok := c.peekMinimizeInput() 206 if ok && !stopping { 207 minimizeC = c.minimizeC 208 } 209 210 select { 211 case <-doneC: 212 // Interrupted, cancelled, or timed out. 213 // stop sets doneC to nil so we don't busy wait here. 214 stop(ctx.Err()) 215 216 case err := <-errC: 217 // A worker terminated, possibly after encountering a fatal error. 218 stop(err) 219 activeWorkers-- 220 if activeWorkers == 0 { 221 return fuzzErr 222 } 223 224 case result := <-c.resultC: 225 // Received response from worker. 226 if stopping { 227 break 228 } 229 c.updateStats(result) 230 231 if result.crasherMsg != "" { 232 if c.warmupRun() && result.entry.IsSeed { 233 target := filepath.Base(c.opts.CorpusDir) 234 fmt.Fprintf(c.opts.Log, "failure while testing seed corpus entry: %s/%s\n", target, testName(result.entry.Parent)) 235 stop(errors.New(result.crasherMsg)) 236 break 237 } 238 if c.canMinimize() && result.canMinimize { 239 if c.crashMinimizing != nil { 240 // This crash is not minimized, and another crash is being minimized. 241 // Ignore this one and wait for the other one to finish. 242 break 243 } 244 // Found a crasher but haven't yet attempted to minimize it. 245 // Send it back to a worker for minimization. Disable inputC so 246 // other workers don't continue fuzzing. 247 c.crashMinimizing = &result 248 fmt.Fprintf(c.opts.Log, "fuzz: minimizing %d-byte failing input file\n", len(result.entry.Data)) 249 c.queueForMinimization(result, nil) 250 } else if !crashWritten { 251 // Found a crasher that's either minimized or not minimizable. 252 // Write to corpus and stop. 253 err := writeToCorpus(&result.entry, opts.CorpusDir) 254 if err == nil { 255 crashWritten = true 256 err = &crashError{ 257 path: result.entry.Path, 258 err: errors.New(result.crasherMsg), 259 } 260 } 261 if shouldPrintDebugInfo() { 262 fmt.Fprintf( 263 c.opts.Log, 264 "DEBUG new crasher, elapsed: %s, id: %s, parent: %s, gen: %d, size: %d, exec time: %s\n", 265 c.elapsed(), 266 result.entry.Path, 267 result.entry.Parent, 268 result.entry.Generation, 269 len(result.entry.Data), 270 result.entryDuration, 271 ) 272 } 273 stop(err) 274 } 275 } else if result.coverageData != nil { 276 if c.warmupRun() { 277 if shouldPrintDebugInfo() { 278 fmt.Fprintf( 279 c.opts.Log, 280 "DEBUG processed an initial input, elapsed: %s, id: %s, new bits: %d, size: %d, exec time: %s\n", 281 c.elapsed(), 282 result.entry.Parent, 283 countBits(diffCoverage(c.coverageMask, result.coverageData)), 284 len(result.entry.Data), 285 result.entryDuration, 286 ) 287 } 288 c.updateCoverage(result.coverageData) 289 c.warmupInputLeft-- 290 if c.warmupInputLeft == 0 { 291 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel) 292 if shouldPrintDebugInfo() { 293 fmt.Fprintf( 294 c.opts.Log, 295 "DEBUG finished processing input corpus, elapsed: %s, entries: %d, initial coverage bits: %d\n", 296 c.elapsed(), 297 len(c.corpus.entries), 298 countBits(c.coverageMask), 299 ) 300 } 301 } 302 } else if keepCoverage := diffCoverage(c.coverageMask, result.coverageData); keepCoverage != nil { 303 // Found a value that expanded coverage. 304 // It's not a crasher, but we may want to add it to the on-disk 305 // corpus and prioritize it for future fuzzing. 306 // TODO(jayconrod, katiehockman): Prioritize fuzzing these 307 // values which expanded coverage, perhaps based on the 308 // number of new edges that this result expanded. 309 // TODO(jayconrod, katiehockman): Don't write a value that's already 310 // in the corpus. 311 if c.canMinimize() && result.canMinimize && c.crashMinimizing == nil { 312 // Send back to workers to find a smaller value that preserves 313 // at least one new coverage bit. 314 c.queueForMinimization(result, keepCoverage) 315 } else { 316 // Update the coordinator's coverage mask and save the value. 317 inputSize := len(result.entry.Data) 318 entryNew, err := c.addCorpusEntries(true, result.entry) 319 if err != nil { 320 stop(err) 321 break 322 } 323 if !entryNew { 324 continue 325 } 326 c.updateCoverage(keepCoverage) 327 c.inputQueue.enqueue(result.entry) 328 c.interestingCount++ 329 if shouldPrintDebugInfo() { 330 fmt.Fprintf( 331 c.opts.Log, 332 "DEBUG new interesting input, elapsed: %s, id: %s, parent: %s, gen: %d, new bits: %d, total bits: %d, size: %d, exec time: %s\n", 333 c.elapsed(), 334 result.entry.Path, 335 result.entry.Parent, 336 result.entry.Generation, 337 countBits(keepCoverage), 338 countBits(c.coverageMask), 339 inputSize, 340 result.entryDuration, 341 ) 342 } 343 } 344 } else { 345 if shouldPrintDebugInfo() { 346 fmt.Fprintf( 347 c.opts.Log, 348 "DEBUG worker reported interesting input that doesn't expand coverage, elapsed: %s, id: %s, parent: %s, canMinimize: %t\n", 349 c.elapsed(), 350 result.entry.Path, 351 result.entry.Parent, 352 result.canMinimize, 353 ) 354 } 355 } 356 } else if c.warmupRun() { 357 // No error or coverage data was reported for this input during 358 // warmup, so continue processing results. 359 c.warmupInputLeft-- 360 if c.warmupInputLeft == 0 { 361 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel) 362 if shouldPrintDebugInfo() { 363 fmt.Fprintf( 364 c.opts.Log, 365 "DEBUG finished testing-only phase, elapsed: %s, entries: %d\n", 366 time.Since(c.startTime), 367 len(c.corpus.entries), 368 ) 369 } 370 } 371 } 372 373 // Once the result has been processed, stop the worker if we 374 // have reached the fuzzing limit. 375 if c.opts.Limit > 0 && c.count >= c.opts.Limit { 376 stop(nil) 377 } 378 379 case inputC <- input: 380 // Sent the next input to a worker. 381 c.sentInput(input) 382 383 case minimizeC <- minimizeInput: 384 // Sent the next input for minimization to a worker. 385 c.sentMinimizeInput(minimizeInput) 386 387 case <-statTicker.C: 388 c.logStats() 389 } 390 } 391 392 // TODO(jayconrod,katiehockman): if a crasher can't be written to the corpus, 393 // write to the cache instead. 394 } 395 396 // crashError wraps a crasher written to the seed corpus. It saves the name 397 // of the file where the input causing the crasher was saved. The testing 398 // framework uses this to report a command to re-run that specific input. 399 type crashError struct { 400 path string 401 err error 402 } 403 404 func (e *crashError) Error() string { 405 return e.err.Error() 406 } 407 408 func (e *crashError) Unwrap() error { 409 return e.err 410 } 411 412 func (e *crashError) CrashPath() string { 413 return e.path 414 } 415 416 type corpus struct { 417 entries []CorpusEntry 418 hashes map[[sha256.Size]byte]bool 419 } 420 421 // addCorpusEntries adds entries to the corpus, and optionally writes the entries 422 // to the cache directory. If an entry is already in the corpus it is skipped. If 423 // all of the entries are unique, addCorpusEntries returns true and a nil error, 424 // if at least one of the entries was a duplicate, it returns false and a nil error. 425 func (c *coordinator) addCorpusEntries(addToCache bool, entries ...CorpusEntry) (bool, error) { 426 noDupes := true 427 for _, e := range entries { 428 data, err := corpusEntryData(e) 429 if err != nil { 430 return false, err 431 } 432 h := sha256.Sum256(data) 433 if c.corpus.hashes[h] { 434 noDupes = false 435 continue 436 } 437 if addToCache { 438 if err := writeToCorpus(&e, c.opts.CacheDir); err != nil { 439 return false, err 440 } 441 // For entries written to disk, we don't hold onto the bytes, 442 // since the corpus would consume a significant amount of 443 // memory. 444 e.Data = nil 445 } 446 c.corpus.hashes[h] = true 447 c.corpus.entries = append(c.corpus.entries, e) 448 } 449 return noDupes, nil 450 } 451 452 // CorpusEntry represents an individual input for fuzzing. 453 // 454 // We must use an equivalent type in the testing and testing/internal/testdeps 455 // packages, but testing can't import this package directly, and we don't want 456 // to export this type from testing. Instead, we use the same struct type and 457 // use a type alias (not a defined type) for convenience. 458 type CorpusEntry = struct { 459 Parent string 460 461 // Path is the path of the corpus file, if the entry was loaded from disk. 462 // For other entries, including seed values provided by f.Add, Path is the 463 // name of the test, e.g. seed#0 or its hash. 464 Path string 465 466 // Data is the raw input data. Data should only be populated for seed 467 // values. For on-disk corpus files, Data will be nil, as it will be loaded 468 // from disk using Path. 469 Data []byte 470 471 // Values is the unmarshaled values from a corpus file. 472 Values []any 473 474 Generation int 475 476 // IsSeed indicates whether this entry is part of the seed corpus. 477 IsSeed bool 478 } 479 480 // corpusEntryData returns the raw input bytes, either from the data struct 481 // field, or from disk. 482 func corpusEntryData(ce CorpusEntry) ([]byte, error) { 483 if ce.Data != nil { 484 return ce.Data, nil 485 } 486 487 return os.ReadFile(ce.Path) 488 } 489 490 type fuzzInput struct { 491 // entry is the value to test initially. The worker will randomly mutate 492 // values from this starting point. 493 entry CorpusEntry 494 495 // timeout is the time to spend fuzzing variations of this input, 496 // not including starting or cleaning up. 497 timeout time.Duration 498 499 // limit is the maximum number of calls to the fuzz function the worker may 500 // make. The worker may make fewer calls, for example, if it finds an 501 // error early. If limit is zero, there is no limit on calls to the 502 // fuzz function. 503 limit int64 504 505 // warmup indicates whether this is a warmup input before fuzzing begins. If 506 // true, the input should not be fuzzed. 507 warmup bool 508 509 // coverageData reflects the coordinator's current coverageMask. 510 coverageData []byte 511 } 512 513 type fuzzResult struct { 514 // entry is an interesting value or a crasher. 515 entry CorpusEntry 516 517 // crasherMsg is an error message from a crash. It's "" if no crash was found. 518 crasherMsg string 519 520 // canMinimize is true if the worker should attempt to minimize this result. 521 // It may be false because an attempt has already been made. 522 canMinimize bool 523 524 // coverageData is set if the worker found new coverage. 525 coverageData []byte 526 527 // limit is the number of values the coordinator asked the worker 528 // to test. 0 if there was no limit. 529 limit int64 530 531 // count is the number of values the worker actually tested. 532 count int64 533 534 // totalDuration is the time the worker spent testing inputs. 535 totalDuration time.Duration 536 537 // entryDuration is the time the worker spent execution an interesting result 538 entryDuration time.Duration 539 } 540 541 type fuzzMinimizeInput struct { 542 // entry is an interesting value or crasher to minimize. 543 entry CorpusEntry 544 545 // crasherMsg is an error message from a crash. It's "" if no crash was found. 546 // If set, the worker will attempt to find a smaller input that also produces 547 // an error, though not necessarily the same error. 548 crasherMsg string 549 550 // limit is the maximum number of calls to the fuzz function the worker may 551 // make. The worker may make fewer calls, for example, if it can't reproduce 552 // an error. If limit is zero, there is no limit on calls to the fuzz function. 553 limit int64 554 555 // timeout is the time to spend minimizing this input. 556 // A zero timeout means no limit. 557 timeout time.Duration 558 559 // keepCoverage is a set of coverage bits that entry found that were not in 560 // the coordinator's combined set. When minimizing, the worker should find an 561 // input that preserves at least one of these bits. keepCoverage is nil for 562 // crashing inputs. 563 keepCoverage []byte 564 } 565 566 // coordinator holds channels that workers can use to communicate with 567 // the coordinator. 568 type coordinator struct { 569 opts CoordinateFuzzingOpts 570 571 // startTime is the time we started the workers after loading the corpus. 572 // Used for logging. 573 startTime time.Time 574 575 // inputC is sent values to fuzz by the coordinator. Any worker may receive 576 // values from this channel. Workers send results to resultC. 577 inputC chan fuzzInput 578 579 // minimizeC is sent values to minimize by the coordinator. Any worker may 580 // receive values from this channel. Workers send results to resultC. 581 minimizeC chan fuzzMinimizeInput 582 583 // resultC is sent results of fuzzing by workers. The coordinator 584 // receives these. Multiple types of messages are allowed. 585 resultC chan fuzzResult 586 587 // count is the number of values fuzzed so far. 588 count int64 589 590 // countLastLog is the number of values fuzzed when the output was last 591 // logged. 592 countLastLog int64 593 594 // timeLastLog is the time at which the output was last logged. 595 timeLastLog time.Time 596 597 // interestingCount is the number of unique interesting values which have 598 // been found this execution. 599 interestingCount int 600 601 // warmupInputCount is the count of all entries in the corpus which will 602 // need to be received from workers to run once during warmup, but not fuzz. 603 // This could be for coverage data, or only for the purposes of verifying 604 // that the seed corpus doesn't have any crashers. See warmupRun. 605 warmupInputCount int 606 607 // warmupInputLeft is the number of entries in the corpus which still need 608 // to be received from workers to run once during warmup, but not fuzz. 609 // See warmupInputLeft. 610 warmupInputLeft int 611 612 // duration is the time spent fuzzing inside workers, not counting time 613 // starting up or tearing down. 614 duration time.Duration 615 616 // countWaiting is the number of fuzzing executions the coordinator is 617 // waiting on workers to complete. 618 countWaiting int64 619 620 // corpus is a set of interesting values, including the seed corpus and 621 // generated values that workers reported as interesting. 622 corpus corpus 623 624 // minimizationAllowed is true if one or more of the types of fuzz 625 // function's parameters can be minimized. 626 minimizationAllowed bool 627 628 // inputQueue is a queue of inputs that workers should try fuzzing. This is 629 // initially populated from the seed corpus and cached inputs. More inputs 630 // may be added as new coverage is discovered. 631 inputQueue queue 632 633 // minimizeQueue is a queue of inputs that caused errors or exposed new 634 // coverage. Workers should attempt to find smaller inputs that do the 635 // same thing. 636 minimizeQueue queue 637 638 // crashMinimizing is the crash that is currently being minimized. 639 crashMinimizing *fuzzResult 640 641 // coverageMask aggregates coverage that was found for all inputs in the 642 // corpus. Each byte represents a single basic execution block. Each set bit 643 // within the byte indicates that an input has triggered that block at least 644 // 1 << n times, where n is the position of the bit in the byte. For example, a 645 // value of 12 indicates that separate inputs have triggered this block 646 // between 4-7 times and 8-15 times. 647 coverageMask []byte 648 } 649 650 func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) { 651 // Make sure all of the seed corpus has marshalled data. 652 for i := range opts.Seed { 653 if opts.Seed[i].Data == nil && opts.Seed[i].Values != nil { 654 opts.Seed[i].Data = marshalCorpusFile(opts.Seed[i].Values...) 655 } 656 } 657 c := &coordinator{ 658 opts: opts, 659 startTime: time.Now(), 660 inputC: make(chan fuzzInput), 661 minimizeC: make(chan fuzzMinimizeInput), 662 resultC: make(chan fuzzResult), 663 timeLastLog: time.Now(), 664 corpus: corpus{hashes: make(map[[sha256.Size]byte]bool)}, 665 } 666 if err := c.readCache(); err != nil { 667 return nil, err 668 } 669 if opts.MinimizeLimit > 0 || opts.MinimizeTimeout > 0 { 670 for _, t := range opts.Types { 671 if isMinimizable(t) { 672 c.minimizationAllowed = true 673 break 674 } 675 } 676 } 677 678 covSize := len(coverage()) 679 if covSize == 0 { 680 fmt.Fprintf(c.opts.Log, "warning: the test binary was not built with coverage instrumentation, so fuzzing will run without coverage guidance and may be inefficient\n") 681 // Even though a coverage-only run won't occur, we should still run all 682 // of the seed corpus to make sure there are no existing failures before 683 // we start fuzzing. 684 c.warmupInputCount = len(c.opts.Seed) 685 for _, e := range c.opts.Seed { 686 c.inputQueue.enqueue(e) 687 } 688 } else { 689 c.warmupInputCount = len(c.corpus.entries) 690 for _, e := range c.corpus.entries { 691 c.inputQueue.enqueue(e) 692 } 693 // Set c.coverageMask to a clean []byte full of zeros. 694 c.coverageMask = make([]byte, covSize) 695 } 696 c.warmupInputLeft = c.warmupInputCount 697 698 if len(c.corpus.entries) == 0 { 699 fmt.Fprintf(c.opts.Log, "warning: starting with empty corpus\n") 700 var vals []any 701 for _, t := range opts.Types { 702 vals = append(vals, zeroValue(t)) 703 } 704 data := marshalCorpusFile(vals...) 705 h := sha256.Sum256(data) 706 name := fmt.Sprintf("%x", h[:4]) 707 c.addCorpusEntries(false, CorpusEntry{Path: name, Data: data}) 708 } 709 710 return c, nil 711 } 712 713 func (c *coordinator) updateStats(result fuzzResult) { 714 c.count += result.count 715 c.countWaiting -= result.limit 716 c.duration += result.totalDuration 717 } 718 719 func (c *coordinator) logStats() { 720 now := time.Now() 721 if c.warmupRun() { 722 runSoFar := c.warmupInputCount - c.warmupInputLeft 723 if coverageEnabled { 724 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount) 725 } else { 726 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount) 727 } 728 } else if c.crashMinimizing != nil { 729 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, minimizing\n", c.elapsed()) 730 } else { 731 rate := float64(c.count-c.countLastLog) / now.Sub(c.timeLastLog).Seconds() 732 if coverageEnabled { 733 total := c.warmupInputCount + c.interestingCount 734 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec), new interesting: %d (total: %d)\n", c.elapsed(), c.count, rate, c.interestingCount, total) 735 } else { 736 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec)\n", c.elapsed(), c.count, rate) 737 } 738 } 739 c.countLastLog = c.count 740 c.timeLastLog = now 741 } 742 743 // peekInput returns the next value that should be sent to workers. 744 // If the number of executions is limited, the returned value includes 745 // a limit for one worker. If there are no executions left, peekInput returns 746 // a zero value and false. 747 // 748 // peekInput doesn't actually remove the input from the queue. The caller 749 // must call sentInput after sending the input. 750 // 751 // If the input queue is empty and the coverage/testing-only run has completed, 752 // queue refills it from the corpus. 753 func (c *coordinator) peekInput() (fuzzInput, bool) { 754 if c.opts.Limit > 0 && c.count+c.countWaiting >= c.opts.Limit { 755 // Already making the maximum number of calls to the fuzz function. 756 // Don't send more inputs right now. 757 return fuzzInput{}, false 758 } 759 if c.inputQueue.len == 0 { 760 if c.warmupRun() { 761 // Wait for coverage/testing-only run to finish before sending more 762 // inputs. 763 return fuzzInput{}, false 764 } 765 c.refillInputQueue() 766 } 767 768 entry, ok := c.inputQueue.peek() 769 if !ok { 770 panic("input queue empty after refill") 771 } 772 input := fuzzInput{ 773 entry: entry.(CorpusEntry), 774 timeout: workerFuzzDuration, 775 warmup: c.warmupRun(), 776 } 777 if c.coverageMask != nil { 778 input.coverageData = bytes.Clone(c.coverageMask) 779 } 780 if input.warmup { 781 // No fuzzing will occur, but it should count toward the limit set by 782 // -fuzztime. 783 input.limit = 1 784 return input, true 785 } 786 787 if c.opts.Limit > 0 { 788 input.limit = c.opts.Limit / int64(c.opts.Parallel) 789 if c.opts.Limit%int64(c.opts.Parallel) > 0 { 790 input.limit++ 791 } 792 remaining := c.opts.Limit - c.count - c.countWaiting 793 if input.limit > remaining { 794 input.limit = remaining 795 } 796 } 797 return input, true 798 } 799 800 // sentInput updates internal counters after an input is sent to c.inputC. 801 func (c *coordinator) sentInput(input fuzzInput) { 802 c.inputQueue.dequeue() 803 c.countWaiting += input.limit 804 } 805 806 // refillInputQueue refills the input queue from the corpus after it becomes 807 // empty. 808 func (c *coordinator) refillInputQueue() { 809 for _, e := range c.corpus.entries { 810 c.inputQueue.enqueue(e) 811 } 812 } 813 814 // queueForMinimization creates a fuzzMinimizeInput from result and adds it 815 // to the minimization queue to be sent to workers. 816 func (c *coordinator) queueForMinimization(result fuzzResult, keepCoverage []byte) { 817 if result.crasherMsg != "" { 818 c.minimizeQueue.clear() 819 } 820 821 input := fuzzMinimizeInput{ 822 entry: result.entry, 823 crasherMsg: result.crasherMsg, 824 keepCoverage: keepCoverage, 825 } 826 c.minimizeQueue.enqueue(input) 827 } 828 829 // peekMinimizeInput returns the next input that should be sent to workers for 830 // minimization. 831 func (c *coordinator) peekMinimizeInput() (fuzzMinimizeInput, bool) { 832 if !c.canMinimize() { 833 // Already making the maximum number of calls to the fuzz function. 834 // Don't send more inputs right now. 835 return fuzzMinimizeInput{}, false 836 } 837 v, ok := c.minimizeQueue.peek() 838 if !ok { 839 return fuzzMinimizeInput{}, false 840 } 841 input := v.(fuzzMinimizeInput) 842 843 if c.opts.MinimizeTimeout > 0 { 844 input.timeout = c.opts.MinimizeTimeout 845 } 846 if c.opts.MinimizeLimit > 0 { 847 input.limit = c.opts.MinimizeLimit 848 } else if c.opts.Limit > 0 { 849 if input.crasherMsg != "" { 850 input.limit = c.opts.Limit 851 } else { 852 input.limit = c.opts.Limit / int64(c.opts.Parallel) 853 if c.opts.Limit%int64(c.opts.Parallel) > 0 { 854 input.limit++ 855 } 856 } 857 } 858 if c.opts.Limit > 0 { 859 remaining := c.opts.Limit - c.count - c.countWaiting 860 if input.limit > remaining { 861 input.limit = remaining 862 } 863 } 864 return input, true 865 } 866 867 // sentMinimizeInput removes an input from the minimization queue after it's 868 // sent to minimizeC. 869 func (c *coordinator) sentMinimizeInput(input fuzzMinimizeInput) { 870 c.minimizeQueue.dequeue() 871 c.countWaiting += input.limit 872 } 873 874 // warmupRun returns true while the coordinator is running inputs without 875 // mutating them as a warmup before fuzzing. This could be to gather baseline 876 // coverage data for entries in the corpus, or to test all of the seed corpus 877 // for errors before fuzzing begins. 878 // 879 // The coordinator doesn't store coverage data in the cache with each input 880 // because that data would be invalid when counter offsets in the test binary 881 // change. 882 // 883 // When gathering coverage, the coordinator sends each entry to a worker to 884 // gather coverage for that entry only, without fuzzing or minimizing. This 885 // phase ends when all workers have finished, and the coordinator has a combined 886 // coverage map. 887 func (c *coordinator) warmupRun() bool { 888 return c.warmupInputLeft > 0 889 } 890 891 // updateCoverage sets bits in c.coverageMask that are set in newCoverage. 892 // updateCoverage returns the number of newly set bits. See the comment on 893 // coverageMask for the format. 894 func (c *coordinator) updateCoverage(newCoverage []byte) int { 895 if len(newCoverage) != len(c.coverageMask) { 896 panic(fmt.Sprintf("number of coverage counters changed at runtime: %d, expected %d", len(newCoverage), len(c.coverageMask))) 897 } 898 newBitCount := 0 899 for i := range newCoverage { 900 diff := newCoverage[i] &^ c.coverageMask[i] 901 newBitCount += bits.OnesCount8(diff) 902 c.coverageMask[i] |= newCoverage[i] 903 } 904 return newBitCount 905 } 906 907 // canMinimize returns whether the coordinator should attempt to find smaller 908 // inputs that reproduce a crash or new coverage. 909 func (c *coordinator) canMinimize() bool { 910 return c.minimizationAllowed && 911 (c.opts.Limit == 0 || c.count+c.countWaiting < c.opts.Limit) 912 } 913 914 func (c *coordinator) elapsed() time.Duration { 915 return time.Since(c.startTime).Round(1 * time.Second) 916 } 917 918 // readCache creates a combined corpus from seed values and values in the cache 919 // (in GOCACHE/fuzz). 920 // 921 // TODO(fuzzing): need a mechanism that can remove values that 922 // aren't useful anymore, for example, because they have the wrong type. 923 func (c *coordinator) readCache() error { 924 if _, err := c.addCorpusEntries(false, c.opts.Seed...); err != nil { 925 return err 926 } 927 entries, err := ReadCorpus(c.opts.CacheDir, c.opts.Types) 928 if err != nil { 929 if _, ok := err.(*MalformedCorpusError); !ok { 930 // It's okay if some files in the cache directory are malformed and 931 // are not included in the corpus, but fail if it's an I/O error. 932 return err 933 } 934 // TODO(jayconrod,katiehockman): consider printing some kind of warning 935 // indicating the number of files which were skipped because they are 936 // malformed. 937 } 938 if _, err := c.addCorpusEntries(false, entries...); err != nil { 939 return err 940 } 941 return nil 942 } 943 944 // MalformedCorpusError is an error found while reading the corpus from the 945 // filesystem. All of the errors are stored in the errs list. The testing 946 // framework uses this to report malformed files in testdata. 947 type MalformedCorpusError struct { 948 errs []error 949 } 950 951 func (e *MalformedCorpusError) Error() string { 952 var msgs []string 953 for _, s := range e.errs { 954 msgs = append(msgs, s.Error()) 955 } 956 return strings.Join(msgs, "\n") 957 } 958 959 // ReadCorpus reads the corpus from the provided dir. The returned corpus 960 // entries are guaranteed to match the given types. Any malformed files will 961 // be saved in a MalformedCorpusError and returned, along with the most recent 962 // error. 963 func ReadCorpus(dir string, types []reflect.Type) ([]CorpusEntry, error) { 964 files, err := os.ReadDir(dir) 965 if os.IsNotExist(err) { 966 return nil, nil // No corpus to read 967 } else if err != nil { 968 return nil, fmt.Errorf("reading seed corpus from testdata: %v", err) 969 } 970 var corpus []CorpusEntry 971 var errs []error 972 for _, file := range files { 973 // TODO(jayconrod,katiehockman): determine when a file is a fuzzing input 974 // based on its name. We should only read files created by writeToCorpus. 975 // If we read ALL files, we won't be able to change the file format by 976 // changing the extension. We also won't be able to add files like 977 // README.txt explaining why the directory exists. 978 if file.IsDir() { 979 continue 980 } 981 filename := filepath.Join(dir, file.Name()) 982 data, err := os.ReadFile(filename) 983 if err != nil { 984 return nil, fmt.Errorf("failed to read corpus file: %v", err) 985 } 986 var vals []any 987 vals, err = readCorpusData(data, types) 988 if err != nil { 989 errs = append(errs, fmt.Errorf("%q: %v", filename, err)) 990 continue 991 } 992 corpus = append(corpus, CorpusEntry{Path: filename, Values: vals}) 993 } 994 if len(errs) > 0 { 995 return corpus, &MalformedCorpusError{errs: errs} 996 } 997 return corpus, nil 998 } 999 1000 func readCorpusData(data []byte, types []reflect.Type) ([]any, error) { 1001 vals, err := unmarshalCorpusFile(data) 1002 if err != nil { 1003 return nil, fmt.Errorf("unmarshal: %v", err) 1004 } 1005 if err = CheckCorpus(vals, types); err != nil { 1006 return nil, err 1007 } 1008 return vals, nil 1009 } 1010 1011 // CheckCorpus verifies that the types in vals match the expected types 1012 // provided. 1013 func CheckCorpus(vals []any, types []reflect.Type) error { 1014 if len(vals) != len(types) { 1015 return fmt.Errorf("wrong number of values in corpus entry: %d, want %d", len(vals), len(types)) 1016 } 1017 valsT := make([]reflect.Type, len(vals)) 1018 for valsI, v := range vals { 1019 valsT[valsI] = reflect.TypeOf(v) 1020 } 1021 for i := range types { 1022 if valsT[i] != types[i] { 1023 return fmt.Errorf("mismatched types in corpus entry: %v, want %v", valsT, types) 1024 } 1025 } 1026 return nil 1027 } 1028 1029 // writeToCorpus atomically writes the given bytes to a new file in testdata. If 1030 // the directory does not exist, it will create one. If the file already exists, 1031 // writeToCorpus will not rewrite it. writeToCorpus sets entry.Path to the new 1032 // file that was just written or an error if it failed. 1033 func writeToCorpus(entry *CorpusEntry, dir string) (err error) { 1034 sum := fmt.Sprintf("%x", sha256.Sum256(entry.Data))[:16] 1035 entry.Path = filepath.Join(dir, sum) 1036 if err := os.MkdirAll(dir, 0777); err != nil { 1037 return err 1038 } 1039 if err := os.WriteFile(entry.Path, entry.Data, 0666); err != nil { 1040 os.Remove(entry.Path) // remove partially written file 1041 return err 1042 } 1043 return nil 1044 } 1045 1046 func testName(path string) string { 1047 return filepath.Base(path) 1048 } 1049 1050 func zeroValue(t reflect.Type) any { 1051 for _, v := range zeroVals { 1052 if reflect.TypeOf(v) == t { 1053 return v 1054 } 1055 } 1056 panic(fmt.Sprintf("unsupported type: %v", t)) 1057 } 1058 1059 var zeroVals []any = []any{ 1060 []byte(""), 1061 string(""), 1062 false, 1063 byte(0), 1064 rune(0), 1065 float32(0), 1066 float64(0), 1067 int(0), 1068 int8(0), 1069 int16(0), 1070 int32(0), 1071 int64(0), 1072 uint(0), 1073 uint8(0), 1074 uint16(0), 1075 uint32(0), 1076 uint64(0), 1077 } 1078 1079 var debugInfo = godebug.New("fuzzdebug").Value() == "1" 1080 1081 func shouldPrintDebugInfo() bool { 1082 return debugInfo 1083 }