github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/clangtool/clangtool.go (about)

     1  // Copyright 2024 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package clangtool
     5  
     6  import (
     7  	"bytes"
     8  	"crypto/sha256"
     9  	"encoding/json"
    10  	"errors"
    11  	"fmt"
    12  	"io"
    13  	"math/rand"
    14  	"os"
    15  	"os/exec"
    16  	"path/filepath"
    17  	"runtime"
    18  	"slices"
    19  	"strings"
    20  	"time"
    21  
    22  	"github.com/google/syzkaller/pkg/osutil"
    23  )
    24  
    25  type Config struct {
    26  	ToolBin    string
    27  	KernelSrc  string
    28  	KernelObj  string
    29  	CacheFile  string
    30  	DebugTrace io.Writer
    31  }
    32  
    33  type OutputDataPtr[T any] interface {
    34  	*T
    35  	Merge(*T)
    36  	SetSourceFile(string, func(filename string) string)
    37  	Finalize(*Verifier)
    38  }
    39  
    40  // Run runs the clang tool on all files in the compilation database
    41  // in the kernel build dir and returns combined output for all files.
    42  // It always caches results, and optionally reuses previously cached results.
    43  func Run[Output any, OutputPtr OutputDataPtr[Output]](cfg *Config) (OutputPtr, error) {
    44  	if cfg.CacheFile != "" {
    45  		out, err := osutil.ReadJSON[OutputPtr](cfg.CacheFile)
    46  		if err == nil {
    47  			return out, nil
    48  		}
    49  	}
    50  
    51  	dbFile := filepath.Join(cfg.KernelObj, "compile_commands.json")
    52  	cmds, err := loadCompileCommands(dbFile)
    53  	if err != nil {
    54  		return nil, fmt.Errorf("failed to load compile commands: %w", err)
    55  	}
    56  
    57  	type result struct {
    58  		out OutputPtr
    59  		err error
    60  	}
    61  	results := make(chan *result, 10)
    62  	files := make(chan string, len(cmds))
    63  	for w := 0; w < runtime.NumCPU(); w++ {
    64  		go func() {
    65  			for file := range files {
    66  				out, err := runTool[Output, OutputPtr](cfg, dbFile, file)
    67  				results <- &result{out, err}
    68  			}
    69  		}()
    70  	}
    71  	for _, cmd := range cmds {
    72  		files <- cmd.File
    73  	}
    74  	close(files)
    75  
    76  	out := OutputPtr(new(Output))
    77  	for range cmds {
    78  		res := <-results
    79  		if res.err != nil {
    80  			return nil, res.err
    81  		}
    82  		out.Merge(res.out)
    83  	}
    84  	// Finalize the output (sort, dedup, etc), and let the output verify
    85  	// that all source file names, line numbers, etc are valid/present.
    86  	// If there are any bogus entries, it's better to detect them early,
    87  	// than to crash/error much later when the info is used.
    88  	// Some of the source files (generated) may be in the obj dir.
    89  	srcDirs := []string{cfg.KernelSrc, cfg.KernelObj}
    90  	if err := Finalize(out, srcDirs); err != nil {
    91  		return nil, err
    92  	}
    93  	if cfg.CacheFile != "" {
    94  		osutil.MkdirAll(filepath.Dir(cfg.CacheFile))
    95  		data, err := json.MarshalIndent(out, "", "\t")
    96  		if err != nil {
    97  			return nil, fmt.Errorf("failed to marshal output data: %w", err)
    98  		}
    99  		if err := osutil.WriteFile(cfg.CacheFile, data); err != nil {
   100  			return nil, err
   101  		}
   102  	}
   103  	return out, nil
   104  }
   105  
   106  func Finalize[Output any, OutputPtr OutputDataPtr[Output]](out OutputPtr, srcDirs []string) error {
   107  	v := &Verifier{
   108  		srcDirs:   srcDirs,
   109  		fileCache: make(map[string]int),
   110  	}
   111  	out.Finalize(v)
   112  	if v.err.Len() == 0 {
   113  		return nil
   114  	}
   115  	return errors.New(v.err.String())
   116  }
   117  
   118  type Verifier struct {
   119  	srcDirs   []string
   120  	fileCache map[string]int // file->line count (-1 is cached for missing files)
   121  	err       strings.Builder
   122  }
   123  
   124  func (v *Verifier) Filename(file string) {
   125  	if _, ok := v.fileCache[file]; ok {
   126  		return
   127  	}
   128  	for _, srcDir := range v.srcDirs {
   129  		data, err := os.ReadFile(filepath.Join(srcDir, file))
   130  		if err != nil {
   131  			continue
   132  		}
   133  		v.fileCache[file] = len(bytes.Split(data, []byte{'\n'}))
   134  		return
   135  	}
   136  	v.fileCache[file] = -1
   137  	fmt.Fprintf(&v.err, "missing file: %v\n", file)
   138  }
   139  
   140  func (v *Verifier) LineRange(file string, start, end int) {
   141  	v.Filename(file)
   142  	lines, ok := v.fileCache[file]
   143  	if !ok || lines < 0 {
   144  		return
   145  	}
   146  	// Line numbers produced by clang are 1-based.
   147  	if start <= 0 || end < start || end > lines {
   148  		fmt.Fprintf(&v.err, "bad line range [%v-%v] for file %v with %v lines\n",
   149  			start, end, file, lines)
   150  	}
   151  }
   152  
   153  func runTool[Output any, OutputPtr OutputDataPtr[Output]](cfg *Config, dbFile, file string) (OutputPtr, error) {
   154  	relFile := strings.TrimPrefix(strings.TrimPrefix(strings.TrimPrefix(filepath.Clean(file),
   155  		cfg.KernelSrc), cfg.KernelObj), "/")
   156  	// Suppress warning since we may build the tool on a different clang
   157  	// version that produces more warnings.
   158  	// Comments are needed for codesearch tool, but may be useful for declextract
   159  	// in the future if we try to parse them with LLMs.
   160  	data, err := exec.Command(cfg.ToolBin, "-p", dbFile,
   161  		"--extra-arg=-w", "--extra-arg=-fparse-all-comments", file).Output()
   162  	if err != nil {
   163  		var exitErr *exec.ExitError
   164  		if errors.As(err, &exitErr) {
   165  			err = fmt.Errorf("%v: %w\n%s", relFile, err, exitErr.Stderr)
   166  		}
   167  		return nil, err
   168  	}
   169  	out, err := osutil.ParseJSON[OutputPtr](data)
   170  	if err != nil {
   171  		return nil, err
   172  	}
   173  	// All includes in the tool output are relative to the build dir.
   174  	// Make them relative to the source dir.
   175  	out.SetSourceFile(relFile, func(filename string) string {
   176  		rel, err := filepath.Rel(cfg.KernelSrc, filepath.Join(cfg.KernelObj, filename))
   177  		if err == nil && filename != "" {
   178  			return rel
   179  		}
   180  		return filename
   181  	})
   182  	return out, nil
   183  }
   184  
   185  type compileCommand struct {
   186  	Command   string
   187  	Directory string
   188  	File      string
   189  }
   190  
   191  func loadCompileCommands(dbFile string) ([]compileCommand, error) {
   192  	data, err := os.ReadFile(dbFile)
   193  	if err != nil {
   194  		return nil, err
   195  	}
   196  	var cmds []compileCommand
   197  	if err := json.Unmarshal(data, &cmds); err != nil {
   198  		return nil, err
   199  	}
   200  	// Remove commands that don't relate to the kernel build
   201  	// (probably some host tools, etc).
   202  	cmds = slices.DeleteFunc(cmds, func(cmd compileCommand) bool {
   203  		return !strings.HasSuffix(cmd.File, ".c") ||
   204  			// Files compiled with gcc are not a part of the kernel
   205  			// (assuming compile commands were generated with make CC=clang).
   206  			// They are probably a part of some host tool.
   207  			strings.HasPrefix(cmd.Command, "gcc") ||
   208  			// KBUILD should add this define all kernel files.
   209  			!strings.Contains(cmd.Command, "-DKBUILD_BASENAME")
   210  	})
   211  	// Shuffle the order to detect any non-determinism caused by the order early.
   212  	// The result should be the same regardless.
   213  	rand.New(rand.NewSource(time.Now().UnixNano())).Shuffle(len(cmds), func(i, j int) {
   214  		cmds[i], cmds[j] = cmds[j], cmds[i]
   215  	})
   216  	if len(cmds) == 0 {
   217  		return nil, fmt.Errorf("no kernel compile commands in compile_commands.json" +
   218  			" (was the kernel compiled with gcc?)")
   219  	}
   220  	return cmds, nil
   221  }
   222  
   223  func SortAndDedupSlice[Slice ~[]E, E comparable](s Slice) Slice {
   224  	dedup := make(map[[sha256.Size]byte]E)
   225  	text := make(map[E][]byte)
   226  	for _, e := range s {
   227  		t, _ := json.Marshal(e)
   228  		dedup[sha256.Sum256(t)] = e
   229  		text[e] = t
   230  	}
   231  	s = make([]E, 0, len(dedup))
   232  	for _, e := range dedup {
   233  		s = append(s, e)
   234  	}
   235  	slices.SortFunc(s, func(a, b E) int {
   236  		return bytes.Compare(text[a], text[b])
   237  	})
   238  	return s
   239  }