github.com/AESNooper/go/src@v0.0.0-20220218095104-b56a4ab1bbbb/internal/fuzz/fuzz.go (about) 1 // Copyright 2020 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package fuzz provides common fuzzing functionality for tests built with 6 // "go test" and for programs that use fuzzing functionality in the testing 7 // package. 8 package fuzz 9 10 import ( 11 "context" 12 "crypto/sha256" 13 "errors" 14 "fmt" 15 "internal/godebug" 16 "io" 17 "io/ioutil" 18 "math/bits" 19 "os" 20 "path/filepath" 21 "reflect" 22 "runtime" 23 "strings" 24 "sync" 25 "time" 26 ) 27 28 // CoordinateFuzzingOpts is a set of arguments for CoordinateFuzzing. 29 // The zero value is valid for each field unless specified otherwise. 30 type CoordinateFuzzingOpts struct { 31 // Log is a writer for logging progress messages and warnings. 32 // If nil, io.Discard will be used instead. 33 Log io.Writer 34 35 // Timeout is the amount of wall clock time to spend fuzzing after the corpus 36 // has loaded. If zero, there will be no time limit. 37 Timeout time.Duration 38 39 // Limit is the number of random values to generate and test. If zero, 40 // there will be no limit on the number of generated values. 41 Limit int64 42 43 // MinimizeTimeout is the amount of wall clock time to spend minimizing 44 // after discovering a crasher. If zero, there will be no time limit. If 45 // MinimizeTimeout and MinimizeLimit are both zero, then minimization will 46 // be disabled. 47 MinimizeTimeout time.Duration 48 49 // MinimizeLimit is the maximum number of calls to the fuzz function to be 50 // made while minimizing after finding a crash. If zero, there will be no 51 // limit. Calls to the fuzz function made when minimizing also count toward 52 // Limit. If MinimizeTimeout and MinimizeLimit are both zero, then 53 // minimization will be disabled. 54 MinimizeLimit int64 55 56 // parallel is the number of worker processes to run in parallel. If zero, 57 // CoordinateFuzzing will run GOMAXPROCS workers. 58 Parallel int 59 60 // Seed is a list of seed values added by the fuzz target with testing.F.Add 61 // and in testdata. 62 Seed []CorpusEntry 63 64 // Types is the list of types which make up a corpus entry. 65 // Types must be set and must match values in Seed. 66 Types []reflect.Type 67 68 // CorpusDir is a directory where files containing values that crash the 69 // code being tested may be written. CorpusDir must be set. 70 CorpusDir string 71 72 // CacheDir is a directory containing additional "interesting" values. 73 // The fuzzer may derive new values from these, and may write new values here. 74 CacheDir string 75 } 76 77 // CoordinateFuzzing creates several worker processes and communicates with 78 // them to test random inputs that could trigger crashes and expose bugs. 79 // The worker processes run the same binary in the same directory with the 80 // same environment variables as the coordinator process. Workers also run 81 // with the same arguments as the coordinator, except with the -test.fuzzworker 82 // flag prepended to the argument list. 83 // 84 // If a crash occurs, the function will return an error containing information 85 // about the crash, which can be reported to the user. 86 func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err error) { 87 if err := ctx.Err(); err != nil { 88 return err 89 } 90 if opts.Log == nil { 91 opts.Log = io.Discard 92 } 93 if opts.Parallel == 0 { 94 opts.Parallel = runtime.GOMAXPROCS(0) 95 } 96 if opts.Limit > 0 && int64(opts.Parallel) > opts.Limit { 97 // Don't start more workers than we need. 98 opts.Parallel = int(opts.Limit) 99 } 100 101 c, err := newCoordinator(opts) 102 if err != nil { 103 return err 104 } 105 106 if opts.Timeout > 0 { 107 var cancel func() 108 ctx, cancel = context.WithTimeout(ctx, opts.Timeout) 109 defer cancel() 110 } 111 112 // fuzzCtx is used to stop workers, for example, after finding a crasher. 113 fuzzCtx, cancelWorkers := context.WithCancel(ctx) 114 defer cancelWorkers() 115 doneC := ctx.Done() 116 117 // stop is called when a worker encounters a fatal error. 118 var fuzzErr error 119 stopping := false 120 stop := func(err error) { 121 if err == fuzzCtx.Err() || isInterruptError(err) { 122 // Suppress cancellation errors and terminations due to SIGINT. 123 // The messages are not helpful since either the user triggered the error 124 // (with ^C) or another more helpful message will be printed (a crasher). 125 err = nil 126 } 127 if err != nil && (fuzzErr == nil || fuzzErr == ctx.Err()) { 128 fuzzErr = err 129 } 130 if stopping { 131 return 132 } 133 stopping = true 134 cancelWorkers() 135 doneC = nil 136 } 137 138 // Ensure that any crash we find is written to the corpus, even if an error 139 // or interruption occurs while minimizing it. 140 crashWritten := false 141 defer func() { 142 if c.crashMinimizing == nil || crashWritten { 143 return 144 } 145 werr := writeToCorpus(&c.crashMinimizing.entry, opts.CorpusDir) 146 if werr != nil { 147 err = fmt.Errorf("%w\n%v", err, werr) 148 return 149 } 150 if err == nil { 151 err = &crashError{ 152 path: c.crashMinimizing.entry.Path, 153 err: errors.New(c.crashMinimizing.crasherMsg), 154 } 155 } 156 }() 157 158 // Start workers. 159 // TODO(jayconrod): do we want to support fuzzing different binaries? 160 dir := "" // same as self 161 binPath := os.Args[0] 162 args := append([]string{"-test.fuzzworker"}, os.Args[1:]...) 163 env := os.Environ() // same as self 164 165 errC := make(chan error) 166 workers := make([]*worker, opts.Parallel) 167 for i := range workers { 168 var err error 169 workers[i], err = newWorker(c, dir, binPath, args, env) 170 if err != nil { 171 return err 172 } 173 } 174 for i := range workers { 175 w := workers[i] 176 go func() { 177 err := w.coordinate(fuzzCtx) 178 if fuzzCtx.Err() != nil || isInterruptError(err) { 179 err = nil 180 } 181 cleanErr := w.cleanup() 182 if err == nil { 183 err = cleanErr 184 } 185 errC <- err 186 }() 187 } 188 189 // Main event loop. 190 // Do not return until all workers have terminated. We avoid a deadlock by 191 // receiving messages from workers even after ctx is cancelled. 192 activeWorkers := len(workers) 193 statTicker := time.NewTicker(3 * time.Second) 194 defer statTicker.Stop() 195 defer c.logStats() 196 197 c.logStats() 198 for { 199 var inputC chan fuzzInput 200 input, ok := c.peekInput() 201 if ok && c.crashMinimizing == nil && !stopping { 202 inputC = c.inputC 203 } 204 205 var minimizeC chan fuzzMinimizeInput 206 minimizeInput, ok := c.peekMinimizeInput() 207 if ok && !stopping { 208 minimizeC = c.minimizeC 209 } 210 211 select { 212 case <-doneC: 213 // Interrupted, cancelled, or timed out. 214 // stop sets doneC to nil so we don't busy wait here. 215 stop(ctx.Err()) 216 217 case err := <-errC: 218 // A worker terminated, possibly after encountering a fatal error. 219 stop(err) 220 activeWorkers-- 221 if activeWorkers == 0 { 222 return fuzzErr 223 } 224 225 case result := <-c.resultC: 226 // Received response from worker. 227 if stopping { 228 break 229 } 230 c.updateStats(result) 231 232 if result.crasherMsg != "" { 233 if c.warmupRun() && result.entry.IsSeed { 234 target := filepath.Base(c.opts.CorpusDir) 235 fmt.Fprintf(c.opts.Log, "failure while testing seed corpus entry: %s/%s\n", target, testName(result.entry.Parent)) 236 stop(errors.New(result.crasherMsg)) 237 break 238 } 239 if c.canMinimize() && result.canMinimize { 240 if c.crashMinimizing != nil { 241 // This crash is not minimized, and another crash is being minimized. 242 // Ignore this one and wait for the other one to finish. 243 break 244 } 245 // Found a crasher but haven't yet attempted to minimize it. 246 // Send it back to a worker for minimization. Disable inputC so 247 // other workers don't continue fuzzing. 248 c.crashMinimizing = &result 249 fmt.Fprintf(c.opts.Log, "fuzz: minimizing %d-byte failing input file\n", len(result.entry.Data)) 250 c.queueForMinimization(result, nil) 251 } else if !crashWritten { 252 // Found a crasher that's either minimized or not minimizable. 253 // Write to corpus and stop. 254 err := writeToCorpus(&result.entry, opts.CorpusDir) 255 if err == nil { 256 crashWritten = true 257 err = &crashError{ 258 path: result.entry.Path, 259 err: errors.New(result.crasherMsg), 260 } 261 } 262 if shouldPrintDebugInfo() { 263 fmt.Fprintf( 264 c.opts.Log, 265 "DEBUG new crasher, elapsed: %s, id: %s, parent: %s, gen: %d, size: %d, exec time: %s\n", 266 c.elapsed(), 267 result.entry.Path, 268 result.entry.Parent, 269 result.entry.Generation, 270 len(result.entry.Data), 271 result.entryDuration, 272 ) 273 } 274 stop(err) 275 } 276 } else if result.coverageData != nil { 277 if c.warmupRun() { 278 if shouldPrintDebugInfo() { 279 fmt.Fprintf( 280 c.opts.Log, 281 "DEBUG processed an initial input, elapsed: %s, id: %s, new bits: %d, size: %d, exec time: %s\n", 282 c.elapsed(), 283 result.entry.Parent, 284 countBits(diffCoverage(c.coverageMask, result.coverageData)), 285 len(result.entry.Data), 286 result.entryDuration, 287 ) 288 } 289 c.updateCoverage(result.coverageData) 290 c.warmupInputLeft-- 291 if c.warmupInputLeft == 0 { 292 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel) 293 if shouldPrintDebugInfo() { 294 fmt.Fprintf( 295 c.opts.Log, 296 "DEBUG finished processing input corpus, elapsed: %s, entries: %d, initial coverage bits: %d\n", 297 c.elapsed(), 298 len(c.corpus.entries), 299 countBits(c.coverageMask), 300 ) 301 } 302 } 303 } else if keepCoverage := diffCoverage(c.coverageMask, result.coverageData); keepCoverage != nil { 304 // Found a value that expanded coverage. 305 // It's not a crasher, but we may want to add it to the on-disk 306 // corpus and prioritize it for future fuzzing. 307 // TODO(jayconrod, katiehockman): Prioritize fuzzing these 308 // values which expanded coverage, perhaps based on the 309 // number of new edges that this result expanded. 310 // TODO(jayconrod, katiehockman): Don't write a value that's already 311 // in the corpus. 312 if c.canMinimize() && result.canMinimize && c.crashMinimizing == nil { 313 // Send back to workers to find a smaller value that preserves 314 // at least one new coverage bit. 315 c.queueForMinimization(result, keepCoverage) 316 } else { 317 // Update the coordinator's coverage mask and save the value. 318 inputSize := len(result.entry.Data) 319 if opts.CacheDir != "" { 320 // It is possible that the input that was discovered is already 321 // present in the corpus, but the worker produced a coverage map 322 // that still expanded our total coverage (this may happen due to 323 // flakiness in the coverage counters). In order to prevent adding 324 // duplicate entries to the corpus (and re-writing the file on 325 // disk), skip it if the on disk file already exists. 326 // TOOD(roland): this check is limited in that it will only be 327 // applied if we are using the CacheDir. Another option would be 328 // to iterate through the corpus and check if it is already present, 329 // which would catch cases where we are not caching entries. 330 // A slightly faster approach would be to keep some kind of map of 331 // entry hashes, which would allow us to avoid iterating through 332 // all entries. 333 _, err = os.Stat(result.entry.Path) 334 if err == nil { 335 continue 336 } 337 err := writeToCorpus(&result.entry, opts.CacheDir) 338 if err != nil { 339 stop(err) 340 } 341 result.entry.Data = nil 342 } 343 c.updateCoverage(keepCoverage) 344 c.corpus.entries = append(c.corpus.entries, result.entry) 345 c.inputQueue.enqueue(result.entry) 346 c.interestingCount++ 347 if shouldPrintDebugInfo() { 348 fmt.Fprintf( 349 c.opts.Log, 350 "DEBUG new interesting input, elapsed: %s, id: %s, parent: %s, gen: %d, new bits: %d, total bits: %d, size: %d, exec time: %s\n", 351 c.elapsed(), 352 result.entry.Path, 353 result.entry.Parent, 354 result.entry.Generation, 355 countBits(keepCoverage), 356 countBits(c.coverageMask), 357 inputSize, 358 result.entryDuration, 359 ) 360 } 361 } 362 } else { 363 if shouldPrintDebugInfo() { 364 fmt.Fprintf( 365 c.opts.Log, 366 "DEBUG worker reported interesting input that doesn't expand coverage, elapsed: %s, id: %s, parent: %s, canMinimize: %t\n", 367 c.elapsed(), 368 result.entry.Path, 369 result.entry.Parent, 370 result.canMinimize, 371 ) 372 } 373 } 374 } else if c.warmupRun() { 375 // No error or coverage data was reported for this input during 376 // warmup, so continue processing results. 377 c.warmupInputLeft-- 378 if c.warmupInputLeft == 0 { 379 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel) 380 if shouldPrintDebugInfo() { 381 fmt.Fprintf( 382 c.opts.Log, 383 "DEBUG finished testing-only phase, elapsed: %s, entries: %d\n", 384 time.Since(c.startTime), 385 len(c.corpus.entries), 386 ) 387 } 388 } 389 } 390 391 // Once the result has been processed, stop the worker if we 392 // have reached the fuzzing limit. 393 if c.opts.Limit > 0 && c.count >= c.opts.Limit { 394 stop(nil) 395 } 396 397 case inputC <- input: 398 // Sent the next input to a worker. 399 c.sentInput(input) 400 401 case minimizeC <- minimizeInput: 402 // Sent the next input for minimization to a worker. 403 c.sentMinimizeInput(minimizeInput) 404 405 case <-statTicker.C: 406 c.logStats() 407 } 408 } 409 410 // TODO(jayconrod,katiehockman): if a crasher can't be written to the corpus, 411 // write to the cache instead. 412 } 413 414 // crashError wraps a crasher written to the seed corpus. It saves the name 415 // of the file where the input causing the crasher was saved. The testing 416 // framework uses this to report a command to re-run that specific input. 417 type crashError struct { 418 path string 419 err error 420 } 421 422 func (e *crashError) Error() string { 423 return e.err.Error() 424 } 425 426 func (e *crashError) Unwrap() error { 427 return e.err 428 } 429 430 func (e *crashError) CrashPath() string { 431 return e.path 432 } 433 434 type corpus struct { 435 entries []CorpusEntry 436 } 437 438 // CorpusEntry represents an individual input for fuzzing. 439 // 440 // We must use an equivalent type in the testing and testing/internal/testdeps 441 // packages, but testing can't import this package directly, and we don't want 442 // to export this type from testing. Instead, we use the same struct type and 443 // use a type alias (not a defined type) for convenience. 444 type CorpusEntry = struct { 445 Parent string 446 447 // Path is the path of the corpus file, if the entry was loaded from disk. 448 // For other entries, including seed values provided by f.Add, Path is the 449 // name of the test, e.g. seed#0 or its hash. 450 Path string 451 452 // Data is the raw input data. Data should only be populated for seed 453 // values. For on-disk corpus files, Data will be nil, as it will be loaded 454 // from disk using Path. 455 Data []byte 456 457 // Values is the unmarshaled values from a corpus file. 458 Values []interface{} 459 460 Generation int 461 462 // IsSeed indicates whether this entry is part of the seed corpus. 463 IsSeed bool 464 } 465 466 // Data returns the raw input bytes, either from the data struct field, 467 // or from disk. 468 func CorpusEntryData(ce CorpusEntry) ([]byte, error) { 469 if ce.Data != nil { 470 return ce.Data, nil 471 } 472 473 return os.ReadFile(ce.Path) 474 } 475 476 type fuzzInput struct { 477 // entry is the value to test initially. The worker will randomly mutate 478 // values from this starting point. 479 entry CorpusEntry 480 481 // timeout is the time to spend fuzzing variations of this input, 482 // not including starting or cleaning up. 483 timeout time.Duration 484 485 // limit is the maximum number of calls to the fuzz function the worker may 486 // make. The worker may make fewer calls, for example, if it finds an 487 // error early. If limit is zero, there is no limit on calls to the 488 // fuzz function. 489 limit int64 490 491 // warmup indicates whether this is a warmup input before fuzzing begins. If 492 // true, the input should not be fuzzed. 493 warmup bool 494 495 // coverageData reflects the coordinator's current coverageMask. 496 coverageData []byte 497 } 498 499 type fuzzResult struct { 500 // entry is an interesting value or a crasher. 501 entry CorpusEntry 502 503 // crasherMsg is an error message from a crash. It's "" if no crash was found. 504 crasherMsg string 505 506 // canMinimize is true if the worker should attempt to minimize this result. 507 // It may be false because an attempt has already been made. 508 canMinimize bool 509 510 // coverageData is set if the worker found new coverage. 511 coverageData []byte 512 513 // limit is the number of values the coordinator asked the worker 514 // to test. 0 if there was no limit. 515 limit int64 516 517 // count is the number of values the worker actually tested. 518 count int64 519 520 // totalDuration is the time the worker spent testing inputs. 521 totalDuration time.Duration 522 523 // entryDuration is the time the worker spent execution an interesting result 524 entryDuration time.Duration 525 } 526 527 type fuzzMinimizeInput struct { 528 // entry is an interesting value or crasher to minimize. 529 entry CorpusEntry 530 531 // crasherMsg is an error message from a crash. It's "" if no crash was found. 532 // If set, the worker will attempt to find a smaller input that also produces 533 // an error, though not necessarily the same error. 534 crasherMsg string 535 536 // limit is the maximum number of calls to the fuzz function the worker may 537 // make. The worker may make fewer calls, for example, if it can't reproduce 538 // an error. If limit is zero, there is no limit on calls to the fuzz function. 539 limit int64 540 541 // timeout is the time to spend minimizing this input. 542 // A zero timeout means no limit. 543 timeout time.Duration 544 545 // keepCoverage is a set of coverage bits that entry found that were not in 546 // the coordinator's combined set. When minimizing, the worker should find an 547 // input that preserves at least one of these bits. keepCoverage is nil for 548 // crashing inputs. 549 keepCoverage []byte 550 } 551 552 // coordinator holds channels that workers can use to communicate with 553 // the coordinator. 554 type coordinator struct { 555 opts CoordinateFuzzingOpts 556 557 // startTime is the time we started the workers after loading the corpus. 558 // Used for logging. 559 startTime time.Time 560 561 // inputC is sent values to fuzz by the coordinator. Any worker may receive 562 // values from this channel. Workers send results to resultC. 563 inputC chan fuzzInput 564 565 // minimizeC is sent values to minimize by the coordinator. Any worker may 566 // receive values from this channel. Workers send results to resultC. 567 minimizeC chan fuzzMinimizeInput 568 569 // resultC is sent results of fuzzing by workers. The coordinator 570 // receives these. Multiple types of messages are allowed. 571 resultC chan fuzzResult 572 573 // count is the number of values fuzzed so far. 574 count int64 575 576 // countLastLog is the number of values fuzzed when the output was last 577 // logged. 578 countLastLog int64 579 580 // timeLastLog is the time at which the output was last logged. 581 timeLastLog time.Time 582 583 // interestingCount is the number of unique interesting values which have 584 // been found this execution. 585 interestingCount int64 586 587 // warmupInputCount is the count of all entries in the corpus which will 588 // need to be received from workers to run once during warmup, but not fuzz. 589 // This could be for coverage data, or only for the purposes of verifying 590 // that the seed corpus doesn't have any crashers. See warmupRun. 591 warmupInputCount int 592 593 // warmupInputLeft is the number of entries in the corpus which still need 594 // to be received from workers to run once during warmup, but not fuzz. 595 // See warmupInputLeft. 596 warmupInputLeft int 597 598 // duration is the time spent fuzzing inside workers, not counting time 599 // starting up or tearing down. 600 duration time.Duration 601 602 // countWaiting is the number of fuzzing executions the coordinator is 603 // waiting on workers to complete. 604 countWaiting int64 605 606 // corpus is a set of interesting values, including the seed corpus and 607 // generated values that workers reported as interesting. 608 corpus corpus 609 610 // minimizationAllowed is true if one or more of the types of fuzz 611 // function's parameters can be minimized. 612 minimizationAllowed bool 613 614 // inputQueue is a queue of inputs that workers should try fuzzing. This is 615 // initially populated from the seed corpus and cached inputs. More inputs 616 // may be added as new coverage is discovered. 617 inputQueue queue 618 619 // minimizeQueue is a queue of inputs that caused errors or exposed new 620 // coverage. Workers should attempt to find smaller inputs that do the 621 // same thing. 622 minimizeQueue queue 623 624 // crashMinimizing is the crash that is currently being minimized. 625 crashMinimizing *fuzzResult 626 627 // coverageMask aggregates coverage that was found for all inputs in the 628 // corpus. Each byte represents a single basic execution block. Each set bit 629 // within the byte indicates that an input has triggered that block at least 630 // 1 << n times, where n is the position of the bit in the byte. For example, a 631 // value of 12 indicates that separate inputs have triggered this block 632 // between 4-7 times and 8-15 times. 633 coverageMask []byte 634 } 635 636 func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) { 637 // Make sure all of the seed corpus has marshalled data. 638 for i := range opts.Seed { 639 if opts.Seed[i].Data == nil && opts.Seed[i].Values != nil { 640 opts.Seed[i].Data = marshalCorpusFile(opts.Seed[i].Values...) 641 } 642 } 643 corpus, err := readCache(opts.Seed, opts.Types, opts.CacheDir) 644 if err != nil { 645 return nil, err 646 } 647 c := &coordinator{ 648 opts: opts, 649 startTime: time.Now(), 650 inputC: make(chan fuzzInput), 651 minimizeC: make(chan fuzzMinimizeInput), 652 resultC: make(chan fuzzResult), 653 corpus: corpus, 654 timeLastLog: time.Now(), 655 } 656 if opts.MinimizeLimit > 0 || opts.MinimizeTimeout > 0 { 657 for _, t := range opts.Types { 658 if isMinimizable(t) { 659 c.minimizationAllowed = true 660 break 661 } 662 } 663 } 664 665 covSize := len(coverage()) 666 if covSize == 0 { 667 fmt.Fprintf(c.opts.Log, "warning: the test binary was not built with coverage instrumentation, so fuzzing will run without coverage guidance and may be inefficient\n") 668 // Even though a coverage-only run won't occur, we should still run all 669 // of the seed corpus to make sure there are no existing failures before 670 // we start fuzzing. 671 c.warmupInputCount = len(c.opts.Seed) 672 for _, e := range c.opts.Seed { 673 c.inputQueue.enqueue(e) 674 } 675 } else { 676 c.warmupInputCount = len(c.corpus.entries) 677 for _, e := range c.corpus.entries { 678 c.inputQueue.enqueue(e) 679 } 680 // Set c.coverageMask to a clean []byte full of zeros. 681 c.coverageMask = make([]byte, covSize) 682 } 683 c.warmupInputLeft = c.warmupInputCount 684 685 if len(c.corpus.entries) == 0 { 686 fmt.Fprintf(c.opts.Log, "warning: starting with empty corpus\n") 687 var vals []interface{} 688 for _, t := range opts.Types { 689 vals = append(vals, zeroValue(t)) 690 } 691 data := marshalCorpusFile(vals...) 692 h := sha256.Sum256(data) 693 name := fmt.Sprintf("%x", h[:4]) 694 c.corpus.entries = append(c.corpus.entries, CorpusEntry{Path: name, Data: data}) 695 } 696 697 return c, nil 698 } 699 700 func (c *coordinator) updateStats(result fuzzResult) { 701 c.count += result.count 702 c.countWaiting -= result.limit 703 c.duration += result.totalDuration 704 } 705 706 func (c *coordinator) logStats() { 707 now := time.Now() 708 if c.warmupRun() { 709 runSoFar := c.warmupInputCount - c.warmupInputLeft 710 if coverageEnabled { 711 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount) 712 } else { 713 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount) 714 } 715 } else if c.crashMinimizing != nil { 716 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, minimizing\n", c.elapsed()) 717 } else { 718 rate := float64(c.count-c.countLastLog) / now.Sub(c.timeLastLog).Seconds() 719 if coverageEnabled { 720 interestingTotalCount := int64(c.warmupInputCount-len(c.opts.Seed)) + c.interestingCount 721 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec), new interesting: %d (total: %d)\n", c.elapsed(), c.count, rate, c.interestingCount, interestingTotalCount) 722 } else { 723 fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec)\n", c.elapsed(), c.count, rate) 724 } 725 } 726 c.countLastLog = c.count 727 c.timeLastLog = now 728 } 729 730 // peekInput returns the next value that should be sent to workers. 731 // If the number of executions is limited, the returned value includes 732 // a limit for one worker. If there are no executions left, peekInput returns 733 // a zero value and false. 734 // 735 // peekInput doesn't actually remove the input from the queue. The caller 736 // must call sentInput after sending the input. 737 // 738 // If the input queue is empty and the coverage/testing-only run has completed, 739 // queue refills it from the corpus. 740 func (c *coordinator) peekInput() (fuzzInput, bool) { 741 if c.opts.Limit > 0 && c.count+c.countWaiting >= c.opts.Limit { 742 // Already making the maximum number of calls to the fuzz function. 743 // Don't send more inputs right now. 744 return fuzzInput{}, false 745 } 746 if c.inputQueue.len == 0 { 747 if c.warmupRun() { 748 // Wait for coverage/testing-only run to finish before sending more 749 // inputs. 750 return fuzzInput{}, false 751 } 752 c.refillInputQueue() 753 } 754 755 entry, ok := c.inputQueue.peek() 756 if !ok { 757 panic("input queue empty after refill") 758 } 759 input := fuzzInput{ 760 entry: entry.(CorpusEntry), 761 timeout: workerFuzzDuration, 762 warmup: c.warmupRun(), 763 } 764 if c.coverageMask != nil { 765 input.coverageData = make([]byte, len(c.coverageMask)) 766 copy(input.coverageData, c.coverageMask) 767 } 768 if input.warmup { 769 // No fuzzing will occur, but it should count toward the limit set by 770 // -fuzztime. 771 input.limit = 1 772 return input, true 773 } 774 775 if c.opts.Limit > 0 { 776 input.limit = c.opts.Limit / int64(c.opts.Parallel) 777 if c.opts.Limit%int64(c.opts.Parallel) > 0 { 778 input.limit++ 779 } 780 remaining := c.opts.Limit - c.count - c.countWaiting 781 if input.limit > remaining { 782 input.limit = remaining 783 } 784 } 785 return input, true 786 } 787 788 // sentInput updates internal counters after an input is sent to c.inputC. 789 func (c *coordinator) sentInput(input fuzzInput) { 790 c.inputQueue.dequeue() 791 c.countWaiting += input.limit 792 } 793 794 // refillInputQueue refills the input queue from the corpus after it becomes 795 // empty. 796 func (c *coordinator) refillInputQueue() { 797 for _, e := range c.corpus.entries { 798 c.inputQueue.enqueue(e) 799 } 800 } 801 802 // queueForMinimization creates a fuzzMinimizeInput from result and adds it 803 // to the minimization queue to be sent to workers. 804 func (c *coordinator) queueForMinimization(result fuzzResult, keepCoverage []byte) { 805 if result.crasherMsg != "" { 806 c.minimizeQueue.clear() 807 } 808 809 input := fuzzMinimizeInput{ 810 entry: result.entry, 811 crasherMsg: result.crasherMsg, 812 keepCoverage: keepCoverage, 813 } 814 c.minimizeQueue.enqueue(input) 815 } 816 817 // peekMinimizeInput returns the next input that should be sent to workers for 818 // minimization. 819 func (c *coordinator) peekMinimizeInput() (fuzzMinimizeInput, bool) { 820 if !c.canMinimize() { 821 // Already making the maximum number of calls to the fuzz function. 822 // Don't send more inputs right now. 823 return fuzzMinimizeInput{}, false 824 } 825 v, ok := c.minimizeQueue.peek() 826 if !ok { 827 return fuzzMinimizeInput{}, false 828 } 829 input := v.(fuzzMinimizeInput) 830 831 if c.opts.MinimizeTimeout > 0 { 832 input.timeout = c.opts.MinimizeTimeout 833 } 834 if c.opts.MinimizeLimit > 0 { 835 input.limit = c.opts.MinimizeLimit 836 } else if c.opts.Limit > 0 { 837 if input.crasherMsg != "" { 838 input.limit = c.opts.Limit 839 } else { 840 input.limit = c.opts.Limit / int64(c.opts.Parallel) 841 if c.opts.Limit%int64(c.opts.Parallel) > 0 { 842 input.limit++ 843 } 844 } 845 } 846 if c.opts.Limit > 0 { 847 remaining := c.opts.Limit - c.count - c.countWaiting 848 if input.limit > remaining { 849 input.limit = remaining 850 } 851 } 852 return input, true 853 } 854 855 // sentMinimizeInput removes an input from the minimization queue after it's 856 // sent to minimizeC. 857 func (c *coordinator) sentMinimizeInput(input fuzzMinimizeInput) { 858 c.minimizeQueue.dequeue() 859 c.countWaiting += input.limit 860 } 861 862 // warmupRun returns true while the coordinator is running inputs without 863 // mutating them as a warmup before fuzzing. This could be to gather baseline 864 // coverage data for entries in the corpus, or to test all of the seed corpus 865 // for errors before fuzzing begins. 866 // 867 // The coordinator doesn't store coverage data in the cache with each input 868 // because that data would be invalid when counter offsets in the test binary 869 // change. 870 // 871 // When gathering coverage, the coordinator sends each entry to a worker to 872 // gather coverage for that entry only, without fuzzing or minimizing. This 873 // phase ends when all workers have finished, and the coordinator has a combined 874 // coverage map. 875 func (c *coordinator) warmupRun() bool { 876 return c.warmupInputLeft > 0 877 } 878 879 // updateCoverage sets bits in c.coverageMask that are set in newCoverage. 880 // updateCoverage returns the number of newly set bits. See the comment on 881 // coverageMask for the format. 882 func (c *coordinator) updateCoverage(newCoverage []byte) int { 883 if len(newCoverage) != len(c.coverageMask) { 884 panic(fmt.Sprintf("number of coverage counters changed at runtime: %d, expected %d", len(newCoverage), len(c.coverageMask))) 885 } 886 newBitCount := 0 887 for i := range newCoverage { 888 diff := newCoverage[i] &^ c.coverageMask[i] 889 newBitCount += bits.OnesCount8(diff) 890 c.coverageMask[i] |= newCoverage[i] 891 } 892 return newBitCount 893 } 894 895 // canMinimize returns whether the coordinator should attempt to find smaller 896 // inputs that reproduce a crash or new coverage. 897 func (c *coordinator) canMinimize() bool { 898 return c.minimizationAllowed && 899 (c.opts.Limit == 0 || c.count+c.countWaiting < c.opts.Limit) 900 } 901 902 func (c *coordinator) elapsed() time.Duration { 903 return time.Since(c.startTime).Round(1 * time.Second) 904 } 905 906 // readCache creates a combined corpus from seed values and values in the cache 907 // (in GOCACHE/fuzz). 908 // 909 // TODO(fuzzing): need a mechanism that can remove values that 910 // aren't useful anymore, for example, because they have the wrong type. 911 func readCache(seed []CorpusEntry, types []reflect.Type, cacheDir string) (corpus, error) { 912 var c corpus 913 c.entries = append(c.entries, seed...) 914 entries, err := ReadCorpus(cacheDir, types) 915 if err != nil { 916 if _, ok := err.(*MalformedCorpusError); !ok { 917 // It's okay if some files in the cache directory are malformed and 918 // are not included in the corpus, but fail if it's an I/O error. 919 return corpus{}, err 920 } 921 // TODO(jayconrod,katiehockman): consider printing some kind of warning 922 // indicating the number of files which were skipped because they are 923 // malformed. 924 } 925 c.entries = append(c.entries, entries...) 926 return c, nil 927 } 928 929 // MalformedCorpusError is an error found while reading the corpus from the 930 // filesystem. All of the errors are stored in the errs list. The testing 931 // framework uses this to report malformed files in testdata. 932 type MalformedCorpusError struct { 933 errs []error 934 } 935 936 func (e *MalformedCorpusError) Error() string { 937 var msgs []string 938 for _, s := range e.errs { 939 msgs = append(msgs, s.Error()) 940 } 941 return strings.Join(msgs, "\n") 942 } 943 944 // ReadCorpus reads the corpus from the provided dir. The returned corpus 945 // entries are guaranteed to match the given types. Any malformed files will 946 // be saved in a MalformedCorpusError and returned, along with the most recent 947 // error. 948 func ReadCorpus(dir string, types []reflect.Type) ([]CorpusEntry, error) { 949 files, err := ioutil.ReadDir(dir) 950 if os.IsNotExist(err) { 951 return nil, nil // No corpus to read 952 } else if err != nil { 953 return nil, fmt.Errorf("reading seed corpus from testdata: %v", err) 954 } 955 var corpus []CorpusEntry 956 var errs []error 957 for _, file := range files { 958 // TODO(jayconrod,katiehockman): determine when a file is a fuzzing input 959 // based on its name. We should only read files created by writeToCorpus. 960 // If we read ALL files, we won't be able to change the file format by 961 // changing the extension. We also won't be able to add files like 962 // README.txt explaining why the directory exists. 963 if file.IsDir() { 964 continue 965 } 966 filename := filepath.Join(dir, file.Name()) 967 data, err := ioutil.ReadFile(filename) 968 if err != nil { 969 return nil, fmt.Errorf("failed to read corpus file: %v", err) 970 } 971 var vals []interface{} 972 vals, err = readCorpusData(data, types) 973 if err != nil { 974 errs = append(errs, fmt.Errorf("%q: %v", filename, err)) 975 continue 976 } 977 corpus = append(corpus, CorpusEntry{Path: filename, Values: vals}) 978 } 979 if len(errs) > 0 { 980 return corpus, &MalformedCorpusError{errs: errs} 981 } 982 return corpus, nil 983 } 984 985 func readCorpusData(data []byte, types []reflect.Type) ([]interface{}, error) { 986 vals, err := unmarshalCorpusFile(data) 987 if err != nil { 988 return nil, fmt.Errorf("unmarshal: %v", err) 989 } 990 if err = CheckCorpus(vals, types); err != nil { 991 return nil, err 992 } 993 return vals, nil 994 } 995 996 // CheckCorpus verifies that the types in vals match the expected types 997 // provided. 998 func CheckCorpus(vals []interface{}, types []reflect.Type) error { 999 if len(vals) != len(types) { 1000 return fmt.Errorf("wrong number of values in corpus entry: %d, want %d", len(vals), len(types)) 1001 } 1002 valsT := make([]reflect.Type, len(vals)) 1003 for valsI, v := range vals { 1004 valsT[valsI] = reflect.TypeOf(v) 1005 } 1006 for i := range types { 1007 if valsT[i] != types[i] { 1008 return fmt.Errorf("mismatched types in corpus entry: %v, want %v", valsT, types) 1009 } 1010 } 1011 return nil 1012 } 1013 1014 // writeToCorpus atomically writes the given bytes to a new file in testdata. If 1015 // the directory does not exist, it will create one. If the file already exists, 1016 // writeToCorpus will not rewrite it. writeToCorpus sets entry.Path to the new 1017 // file that was just written or an error if it failed. 1018 func writeToCorpus(entry *CorpusEntry, dir string) (err error) { 1019 sum := fmt.Sprintf("%x", sha256.Sum256(entry.Data)) 1020 entry.Path = filepath.Join(dir, sum) 1021 if err := os.MkdirAll(dir, 0777); err != nil { 1022 return err 1023 } 1024 if err := ioutil.WriteFile(entry.Path, entry.Data, 0666); err != nil { 1025 os.Remove(entry.Path) // remove partially written file 1026 return err 1027 } 1028 return nil 1029 } 1030 1031 func testName(path string) string { 1032 return filepath.Base(path) 1033 } 1034 1035 func zeroValue(t reflect.Type) interface{} { 1036 for _, v := range zeroVals { 1037 if reflect.TypeOf(v) == t { 1038 return v 1039 } 1040 } 1041 panic(fmt.Sprintf("unsupported type: %v", t)) 1042 } 1043 1044 var zeroVals []interface{} = []interface{}{ 1045 []byte(""), 1046 string(""), 1047 false, 1048 byte(0), 1049 rune(0), 1050 float32(0), 1051 float64(0), 1052 int(0), 1053 int8(0), 1054 int16(0), 1055 int32(0), 1056 int64(0), 1057 uint(0), 1058 uint8(0), 1059 uint16(0), 1060 uint32(0), 1061 uint64(0), 1062 } 1063 1064 var ( 1065 debugInfo bool 1066 debugInfoOnce sync.Once 1067 ) 1068 1069 func shouldPrintDebugInfo() bool { 1070 debugInfoOnce.Do(func() { 1071 debugInfo = godebug.Get("fuzzdebug") == "1" 1072 }) 1073 return debugInfo 1074 }