code-intelligence.com/cifuzz@v0.40.0/internal/cmd/coverage/llvm/llvm.go (about)

     1  package llvm
     2  
     3  import (
     4  	"bytes"
     5  	"debug/macho"
     6  	"io"
     7  	"os"
     8  	"os/exec"
     9  	"path/filepath"
    10  	"regexp"
    11  	"runtime"
    12  	"strings"
    13  
    14  	"github.com/pkg/errors"
    15  	"github.com/pterm/pterm"
    16  	"github.com/spf13/viper"
    17  
    18  	"code-intelligence.com/cifuzz/internal/build"
    19  	"code-intelligence.com/cifuzz/internal/build/cmake"
    20  	"code-intelligence.com/cifuzz/internal/build/other"
    21  	"code-intelligence.com/cifuzz/internal/cmd/coverage/summary"
    22  	"code-intelligence.com/cifuzz/internal/cmdutils"
    23  	"code-intelligence.com/cifuzz/internal/config"
    24  	"code-intelligence.com/cifuzz/pkg/binary"
    25  	"code-intelligence.com/cifuzz/pkg/log"
    26  	"code-intelligence.com/cifuzz/pkg/minijail"
    27  	"code-intelligence.com/cifuzz/pkg/runfiles"
    28  	"code-intelligence.com/cifuzz/util/envutil"
    29  	"code-intelligence.com/cifuzz/util/executil"
    30  	"code-intelligence.com/cifuzz/util/fileutil"
    31  	"code-intelligence.com/cifuzz/util/stringutil"
    32  )
    33  
    34  type CoverageGenerator struct {
    35  	OutputFormat    string
    36  	OutputPath      string
    37  	BuildSystem     string
    38  	BuildCommand    string
    39  	BuildSystemArgs []string
    40  	CleanCommand    string
    41  	NumBuildJobs    uint
    42  	SeedCorpusDirs  []string
    43  	UseSandbox      bool
    44  	FuzzTest        string
    45  	ProjectDir      string
    46  	Stderr          io.Writer
    47  	BuildStdout     io.Writer
    48  	BuildStderr     io.Writer
    49  
    50  	buildResult    *build.Result
    51  	tmpDir         string
    52  	outputDir      string
    53  	runfilesFinder runfiles.RunfilesFinder
    54  }
    55  
    56  func (cov *CoverageGenerator) BuildFuzzTestForCoverage() error {
    57  	// ensure a finder is set
    58  	if cov.runfilesFinder == nil {
    59  		cov.runfilesFinder = runfiles.Finder
    60  	}
    61  
    62  	var err error
    63  	cov.tmpDir, err = os.MkdirTemp("", "llvm-coverage-")
    64  	if err != nil {
    65  		return errors.WithStack(err)
    66  	}
    67  	cov.outputDir = filepath.Join(cov.tmpDir, "output")
    68  	err = os.Mkdir(cov.outputDir, 0o755)
    69  	if err != nil {
    70  		return errors.WithStack(err)
    71  	}
    72  
    73  	err = cov.build()
    74  	if err != nil {
    75  		return err
    76  	}
    77  
    78  	return nil
    79  }
    80  
    81  func (cov *CoverageGenerator) GenerateCoverageReport() (string, error) {
    82  	defer fileutil.Cleanup(cov.tmpDir)
    83  
    84  	err := cov.run()
    85  	if err != nil {
    86  		var exitErr *exec.ExitError
    87  		if errors.As(err, &exitErr) && cov.UseSandbox {
    88  			return "", cmdutils.WrapCouldBeSandboxError(err)
    89  		}
    90  		return "", err
    91  	}
    92  
    93  	reportPath, err := cov.report()
    94  	if err != nil {
    95  		return "", err
    96  	}
    97  	return reportPath, nil
    98  }
    99  
   100  func (cov *CoverageGenerator) build() error {
   101  	switch cov.BuildSystem {
   102  	case config.BuildSystemCMake:
   103  		builder, err := cmake.NewBuilder(&cmake.BuilderOptions{
   104  			ProjectDir: cov.ProjectDir,
   105  			Args:       cov.BuildSystemArgs,
   106  			Sanitizers: []string{"coverage"},
   107  			Parallel: cmake.ParallelOptions{
   108  				Enabled: viper.IsSet("build-jobs"),
   109  				NumJobs: uint(cov.NumBuildJobs),
   110  			},
   111  			Stdout: cov.BuildStdout,
   112  			Stderr: cov.BuildStderr,
   113  			// We want the runtime deps in the build result because we
   114  			// pass them to the llvm-cov command.
   115  			FindRuntimeDeps: true,
   116  		})
   117  		if err != nil {
   118  			return err
   119  		}
   120  		err = builder.Configure()
   121  		if err != nil {
   122  			return err
   123  		}
   124  		buildResults, err := builder.Build([]string{cov.FuzzTest})
   125  		if err != nil {
   126  			return err
   127  		}
   128  		cov.buildResult = buildResults[0]
   129  		return nil
   130  
   131  	case config.BuildSystemOther:
   132  		if runtime.GOOS == "windows" {
   133  			return errors.New("CMake is the only supported build system on Windows")
   134  		}
   135  		builder, err := other.NewBuilder(&other.BuilderOptions{
   136  			ProjectDir:     cov.ProjectDir,
   137  			BuildCommand:   cov.BuildCommand,
   138  			CleanCommand:   cov.CleanCommand,
   139  			Sanitizers:     []string{"coverage"},
   140  			RunfilesFinder: cov.runfilesFinder,
   141  			Stdout:         cov.BuildStdout,
   142  			Stderr:         cov.BuildStderr,
   143  		})
   144  		if err != nil {
   145  			return err
   146  		}
   147  
   148  		if err := builder.Clean(); err != nil {
   149  			return err
   150  		}
   151  
   152  		buildResult, err := builder.Build(cov.FuzzTest)
   153  		if err != nil {
   154  			return err
   155  		}
   156  		cov.buildResult = buildResult
   157  		return nil
   158  
   159  	}
   160  	return errors.New("unknown build system")
   161  }
   162  
   163  func (cov *CoverageGenerator) run() error {
   164  	log.Infof("Running %s on corpus", pterm.Style{pterm.Reset, pterm.FgLightBlue}.Sprint(cov.FuzzTest))
   165  	log.Debugf("Executable: %s", cov.buildResult.Executable)
   166  
   167  	// Use user-specified seed corpus dirs (if any), the default seed
   168  	// corpus (if it exists), and the generated corpus (if it exists).
   169  	corpusDirs := cov.SeedCorpusDirs
   170  	exists, err := fileutil.Exists(cov.buildResult.SeedCorpus)
   171  	if err != nil {
   172  		return err
   173  	}
   174  	if exists {
   175  		corpusDirs = append(corpusDirs, cov.buildResult.SeedCorpus)
   176  	}
   177  	exists, err = fileutil.Exists(cov.buildResult.GeneratedCorpus)
   178  	if err != nil {
   179  		return err
   180  	}
   181  	if exists {
   182  		corpusDirs = append(corpusDirs, cov.buildResult.GeneratedCorpus)
   183  	}
   184  
   185  	// Ensure that symlinks are resolved to be able to add minijail
   186  	// bindings for the corpus dirs.
   187  	for i, dir := range corpusDirs {
   188  		corpusDirs[i], err = filepath.EvalSymlinks(dir)
   189  		if err != nil {
   190  			return errors.WithStack(err)
   191  		}
   192  	}
   193  
   194  	executable := cov.buildResult.Executable
   195  	conModeSupport := binary.SupportsLlvmProfileContinuousMode(executable)
   196  	var env []string
   197  	env, err = envutil.Setenv(env, "LLVM_PROFILE_FILE", cov.rawProfilePattern(conModeSupport))
   198  	if err != nil {
   199  		return err
   200  	}
   201  	env, err = envutil.Setenv(env, "NO_CIFUZZ", "1")
   202  	if err != nil {
   203  		return err
   204  	}
   205  
   206  	dirWithEmptyFile := filepath.Join(cov.outputDir, "empty-file-corpus")
   207  	err = os.Mkdir(dirWithEmptyFile, 0o755)
   208  	if err != nil {
   209  		return err
   210  	}
   211  	err = fileutil.Touch(filepath.Join(dirWithEmptyFile, "empty_file"))
   212  	if err != nil {
   213  		return err
   214  	}
   215  
   216  	emptyDir := filepath.Join(cov.outputDir, "merge-target")
   217  	err = os.Mkdir(emptyDir, 0o755)
   218  	if err != nil {
   219  		return err
   220  	}
   221  	artifactsDir := filepath.Join(cov.outputDir, "merge-artifacts")
   222  	err = os.Mkdir(artifactsDir, 0o755)
   223  	if err != nil {
   224  		return err
   225  	}
   226  
   227  	// libFuzzer emits crashing inputs in merge mode, but these aren't useful as we only run on already known inputs.
   228  	// Since there is no way to disable this behavior in libFuzzer, we instead emit artifacts into a dedicated temporary
   229  	// directory that is thrown away after the coverage run.
   230  	args := []string{"-artifact_prefix=" + artifactsDir + "/"}
   231  
   232  	// libFuzzer's merge mode never runs the empty input, whereas regular fuzzing runs and the replayer always try the
   233  	// empty input first. To achieve consistent behavior, manually run the empty input, ignoring any crashes. runFuzzer
   234  	// always logs any error we encounter.
   235  	// This line is responsible for empty inputs being skipped:
   236  	// https://github.com/llvm/llvm-project/blob/c7c0ce7d9ebdc0a49313bc77e14d1e856794f2e0/compiler-rt/lib/fuzzer/FuzzerIO.cpp#L127
   237  	_ = cov.runFuzzer(append(args, "-runs=0"), []string{dirWithEmptyFile}, env)
   238  
   239  	// We use libFuzzer's crash-resistant merge mode to merge all corpus directories into an empty directory, which
   240  	// makes libFuzzer go over all inputs in a subprocess that is restarted in case it crashes. With LLVM's continuous
   241  	// mode (see rawProfilePattern) and since the LLVM coverage information is automatically appended to the existing
   242  	// .profraw file, we collect complete coverage information even if the target crashes on an input in the corpus.
   243  	return cov.runFuzzer(append(args, "-merge=1"), append([]string{emptyDir}, corpusDirs...), env)
   244  }
   245  
   246  func (cov *CoverageGenerator) runFuzzer(preCorpusArgs []string, corpusDirs []string, env []string) error {
   247  	var err error
   248  	args := []string{cov.buildResult.Executable}
   249  	args = append(args, preCorpusArgs...)
   250  	args = append(args, corpusDirs...)
   251  
   252  	if cov.UseSandbox {
   253  		bindings := []*minijail.Binding{
   254  			// The fuzz target must be accessible
   255  			{Source: cov.buildResult.Executable},
   256  		}
   257  
   258  		for _, dir := range corpusDirs {
   259  			bindings = append(bindings, &minijail.Binding{Source: dir})
   260  		}
   261  
   262  		// Set up Minijail
   263  		mj, err := minijail.NewMinijail(&minijail.Options{
   264  			Args:      args,
   265  			Bindings:  bindings,
   266  			OutputDir: cov.outputDir,
   267  		})
   268  		if err != nil {
   269  			return err
   270  		}
   271  		defer mj.Cleanup()
   272  
   273  		// Use the command which runs the fuzz test via minijail
   274  		args = mj.Args
   275  	}
   276  
   277  	cmd := executil.Command(args[0], args[1:]...)
   278  	cmd.Env, err = envutil.Copy(os.Environ(), env)
   279  	if err != nil {
   280  		return err
   281  	}
   282  
   283  	errStream := &bytes.Buffer{}
   284  	if viper.GetBool("verbose") {
   285  		cmd.Stdout = os.Stdout
   286  		cmd.Stderr = os.Stderr
   287  	} else if cov.UseSandbox {
   288  		cmd.Stderr = minijail.NewOutputFilter(errStream)
   289  	} else {
   290  		cmd.Stderr = errStream
   291  	}
   292  
   293  	log.Debugf("Command: %s", envutil.QuotedCommandWithEnv(cmd.Args, env))
   294  	err = cmd.Run()
   295  	if err != nil {
   296  		// Add stderr output of the fuzzer to provide users with
   297  		// the context of this error even without verbose mode.
   298  		if !viper.GetBool("verbose") {
   299  			err = errors.Errorf("%v\n %s", err, errStream.String())
   300  		}
   301  		return cmdutils.WrapExecError(errors.WithStack(err), cmd.Cmd)
   302  	}
   303  	return err
   304  }
   305  
   306  func (cov *CoverageGenerator) report() (string, error) {
   307  	err := cov.indexRawProfile()
   308  	if err != nil {
   309  		return "", err
   310  	}
   311  
   312  	lcovReportSummary, err := cov.lcovReportSummary()
   313  	if err != nil {
   314  		return "", err
   315  	}
   316  	reportReader := strings.NewReader(lcovReportSummary)
   317  	summary.ParseLcov(reportReader).PrintTable(cov.Stderr)
   318  
   319  	reportPath := ""
   320  	switch cov.OutputFormat {
   321  	case "html":
   322  		reportPath, err = cov.generateHTMLReport()
   323  		if err != nil {
   324  			return "", err
   325  		}
   326  
   327  	case "lcov":
   328  		reportPath, err = cov.generateLcovReport()
   329  		if err != nil {
   330  			return "", err
   331  		}
   332  	}
   333  
   334  	return reportPath, nil
   335  }
   336  
   337  func (cov *CoverageGenerator) indexRawProfile() error {
   338  	rawProfileFiles, err := cov.rawProfileFiles()
   339  	if err != nil {
   340  		return err
   341  	}
   342  	if len(rawProfileFiles) == 0 {
   343  		// The rawProfilePattern parameter only governs whether we add "%c",
   344  		// which doesn't affect the actual raw profile location.
   345  		return errors.Errorf("%s did not generate .profraw files at %s", cov.buildResult.Executable, cov.rawProfilePattern(false))
   346  	}
   347  
   348  	llvmProfData, err := cov.runfilesFinder.LLVMProfDataPath()
   349  	if err != nil {
   350  		return err
   351  	}
   352  
   353  	args := append([]string{"merge", "-sparse", "-o", cov.indexedProfilePath()}, rawProfileFiles...)
   354  	cmd := exec.Command(llvmProfData, args...)
   355  	cmd.Stdout = os.Stdout
   356  	cmd.Stderr = os.Stderr
   357  	log.Debugf("Command: %s", strings.Join(stringutil.QuotedStrings(cmd.Args), " "))
   358  	err = cmd.Run()
   359  	if err != nil {
   360  		return cmdutils.WrapExecError(errors.WithStack(err), cmd)
   361  	}
   362  	return nil
   363  }
   364  
   365  func (cov *CoverageGenerator) rawProfilePattern(supportsContinuousMode bool) string {
   366  	// Use "%m" instead of a fixed path to support coverage of shared
   367  	// libraries: Each executable or library generates its own profile
   368  	// file, all of which we have to merge in the end. By using "%m",
   369  	// the profile is written to a unique file for each executable and
   370  	// shared library.
   371  	// Use "%c", if supported, which expands out to nothing, to enable the
   372  	// continuous mode in which the .profraw is mmaped and thus kept in sync with
   373  	// the counters in the instrumented code even when it crashes.
   374  	// https://clang.llvm.org/docs/SourceBasedCodeCoverage.html#running-the-instrumented-program
   375  	basePattern := "%m.profraw"
   376  	if supportsContinuousMode {
   377  		basePattern = "%c" + basePattern
   378  	}
   379  	return filepath.Join(cov.outputDir, basePattern)
   380  }
   381  
   382  func (cov *CoverageGenerator) generateHTMLReport() (string, error) {
   383  	args := []string{"export", "-format=lcov"}
   384  	ignoreCIFuzzIncludesArgs, err := cov.getIgnoreCIFuzzIncludesArgs()
   385  	if err != nil {
   386  		return "", err
   387  	}
   388  	args = append(args, ignoreCIFuzzIncludesArgs...)
   389  	report, err := cov.runLlvmCov(args)
   390  	if err != nil {
   391  		return "", err
   392  	}
   393  	// Write lcov report to temp dir
   394  	reportDir, err := os.MkdirTemp("", "coverage-")
   395  	if err != nil {
   396  		return "", errors.WithStack(err)
   397  	}
   398  	lcovReport := filepath.Join(reportDir, "coverage.lcov")
   399  	err = os.WriteFile(lcovReport, []byte(report), 0o644)
   400  	if err != nil {
   401  		return "", errors.WithStack(err)
   402  	}
   403  
   404  	if cov.OutputPath == "" {
   405  		// If no output path is specified, we create the output in a
   406  		// temporary directory.
   407  		outputDir, err := os.MkdirTemp("", "coverage-")
   408  		if err != nil {
   409  			return "", errors.WithStack(err)
   410  		}
   411  		cov.OutputPath = filepath.Join(outputDir, cov.executableName())
   412  	}
   413  
   414  	// Create an HTML report via genhtml
   415  	genHTML, err := runfiles.Finder.GenHTMLPath()
   416  	if err != nil {
   417  		return "", err
   418  	}
   419  	args = []string{"--output", cov.OutputPath, lcovReport}
   420  
   421  	var cmd *exec.Cmd
   422  	if runtime.GOOS == "windows" {
   423  		// genHTML is a perl script, which has to be started like
   424  		// "perl /path/to/genhtml args..." on Windows
   425  		args = append([]string{genHTML}, args...)
   426  		perl, err := runfiles.Finder.PerlPath()
   427  		if err != nil {
   428  			return "", err
   429  		}
   430  		cmd = exec.Command(perl, args...)
   431  	} else {
   432  		cmd = exec.Command(genHTML, args...)
   433  	}
   434  
   435  	cmd.Dir = cov.ProjectDir
   436  	cmd.Stderr = os.Stderr
   437  	log.Debugf("Command: %s", cmd.String())
   438  	err = cmd.Run()
   439  	if err != nil {
   440  		return "", errors.WithStack(err)
   441  	}
   442  
   443  	return cov.OutputPath, nil
   444  }
   445  
   446  func (cov *CoverageGenerator) runLlvmCov(args []string) (string, error) {
   447  	llvmCov, err := cov.runfilesFinder.LLVMCovPath()
   448  	if err != nil {
   449  		return "", err
   450  	}
   451  
   452  	// Add all runtime dependencies of the fuzz test to the binaries
   453  	// processed by llvm-cov to include them in the coverage report
   454  	args = append(args, "-instr-profile="+cov.indexedProfilePath())
   455  	args = append(args, cov.buildResult.Executable)
   456  	if archArg, err := cov.archFlagIfNeeded(cov.buildResult.Executable); err != nil {
   457  		return "", err
   458  	} else if archArg != "" {
   459  		args = append(args, archArg)
   460  	}
   461  	for _, path := range cov.buildResult.RuntimeDeps {
   462  		args = append(args, "-object="+path)
   463  		if archArg, err := cov.archFlagIfNeeded(path); err != nil {
   464  			return "", err
   465  		} else if archArg != "" {
   466  			args = append(args, archArg)
   467  		}
   468  	}
   469  
   470  	cmd := exec.Command(llvmCov, args...)
   471  	cmd.Stderr = os.Stderr
   472  	log.Debugf("Command: %s", strings.Join(stringutil.QuotedStrings(cmd.Args), " "))
   473  	output, err := cmd.Output()
   474  	if err != nil {
   475  		return "", cmdutils.WrapExecError(errors.WithStack(err), cmd)
   476  	}
   477  	return string(output), nil
   478  }
   479  
   480  func (cov *CoverageGenerator) generateLcovReport() (string, error) {
   481  	args := []string{"export", "-format=lcov"}
   482  	ignoreCIFuzzIncludesArgs, err := cov.getIgnoreCIFuzzIncludesArgs()
   483  	if err != nil {
   484  		return "", err
   485  	}
   486  	args = append(args, ignoreCIFuzzIncludesArgs...)
   487  	report, err := cov.runLlvmCov(args)
   488  	if err != nil {
   489  		return "", err
   490  	}
   491  
   492  	outputPath := cov.OutputPath
   493  	if cov.OutputPath == "" {
   494  		// If no output path is specified, we create the output in the
   495  		// current working directory. We don't create it in a temporary
   496  		// directory like we do for HTML reports, because we can't open
   497  		// the lcov report in a browser, so the command is only useful
   498  		// if the lcov report is accessible after it was created.
   499  		outputPath = cov.executableName() + ".coverage.lcov"
   500  	}
   501  
   502  	err = os.WriteFile(outputPath, []byte(report), 0o644)
   503  	if err != nil {
   504  		return "", errors.WithStack(err)
   505  	}
   506  
   507  	log.Debugf("Created lcov trace file: %s", outputPath)
   508  	return outputPath, nil
   509  }
   510  
   511  func (cov *CoverageGenerator) lcovReportSummary() (string, error) {
   512  	args := []string{"export", "-format=lcov", "-summary-only"}
   513  	ignoreCIFuzzIncludesArgs, err := cov.getIgnoreCIFuzzIncludesArgs()
   514  	if err != nil {
   515  		return "", err
   516  	}
   517  	args = append(args, ignoreCIFuzzIncludesArgs...)
   518  	output, err := cov.runLlvmCov(args)
   519  	if err != nil {
   520  		return "", err
   521  	}
   522  
   523  	return output, nil
   524  }
   525  
   526  func (cov *CoverageGenerator) getIgnoreCIFuzzIncludesArgs() ([]string, error) {
   527  	cifuzzIncludePath, err := cov.runfilesFinder.CIFuzzIncludePath()
   528  	if err != nil {
   529  		return nil, err
   530  	}
   531  	return []string{"-ignore-filename-regex=" + regexp.QuoteMeta(cifuzzIncludePath) + "/.*"}, nil
   532  }
   533  
   534  func (cov *CoverageGenerator) rawProfileFiles() ([]string, error) {
   535  	files, err := filepath.Glob(filepath.Join(cov.outputDir, "*.profraw"))
   536  	return files, errors.WithStack(err)
   537  }
   538  
   539  func (cov *CoverageGenerator) indexedProfilePath() string {
   540  	return filepath.Join(cov.tmpDir, filepath.Base(cov.buildResult.Executable)+".profdata")
   541  }
   542  
   543  func (cov *CoverageGenerator) executableName() string {
   544  	executable := cov.buildResult.Executable
   545  	// Remove .exe file extension on Windows
   546  	if runtime.GOOS == "windows" {
   547  		executable = strings.TrimSuffix(executable, filepath.Ext(executable))
   548  	}
   549  	return filepath.Base(executable)
   550  }
   551  
   552  // Returns an llvm-cov -arch flag indicating the preferred architecture of the given object on macOS, where objects can
   553  // be "universal", that is, contain versions for multiple architectures.
   554  func (cov *CoverageGenerator) archFlagIfNeeded(object string) (string, error) {
   555  	if runtime.GOOS != "darwin" {
   556  		// Only macOS uses universal binaries that bundle multiple architectures.
   557  		return "", nil
   558  	}
   559  	var cifuzzCPU macho.Cpu
   560  	if runtime.GOARCH == "amd64" {
   561  		cifuzzCPU = macho.CpuAmd64
   562  	} else {
   563  		cifuzzCPU = macho.CpuArm64
   564  	}
   565  	fatFile, fatErr := macho.OpenFat(object)
   566  	if fatErr == nil {
   567  		defer fatFile.Close()
   568  		var fallbackCPU macho.Cpu
   569  		for _, arch := range fatFile.Arches {
   570  			// Give preference to the architecture matching that of the cifuzz binary.
   571  			if arch.Cpu == cifuzzCPU {
   572  				return cov.cpuToArchFlag(arch.Cpu)
   573  			}
   574  			if arch.Cpu == macho.CpuAmd64 || arch.Cpu == macho.CpuArm64 {
   575  				fallbackCPU = arch.Cpu
   576  			}
   577  		}
   578  		return cov.cpuToArchFlag(fallbackCPU)
   579  	}
   580  	file, err := macho.Open(object)
   581  	if err == nil {
   582  		defer file.Close()
   583  		return cov.cpuToArchFlag(file.Cpu)
   584  	}
   585  	return "", errors.Errorf("failed to parse Mach-O file %q: %q (as universal binary), %q", object, fatErr, err)
   586  }
   587  
   588  func (cov *CoverageGenerator) cpuToArchFlag(cpu macho.Cpu) (string, error) {
   589  	switch cpu {
   590  	case macho.CpuArm64:
   591  		return "-arch=arm64", nil
   592  	case macho.CpuAmd64:
   593  		return "-arch=x86_64", nil
   594  	default:
   595  		return "", errors.Errorf("unsupported architecture: %s", cpu.String())
   596  	}
   597  }