github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/clangtool/clangtool.go (about) 1 // Copyright 2024 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package clangtool 5 6 import ( 7 "bytes" 8 "crypto/sha256" 9 "encoding/json" 10 "errors" 11 "fmt" 12 "io" 13 "math/rand" 14 "os" 15 "os/exec" 16 "path/filepath" 17 "runtime" 18 "slices" 19 "strings" 20 "time" 21 22 "github.com/google/syzkaller/pkg/osutil" 23 ) 24 25 type Config struct { 26 ToolBin string 27 KernelSrc string 28 KernelObj string 29 CacheFile string 30 DebugTrace io.Writer 31 } 32 33 type OutputDataPtr[T any] interface { 34 *T 35 Merge(*T) 36 SetSourceFile(string, func(filename string) string) 37 Finalize(*Verifier) 38 } 39 40 // Run runs the clang tool on all files in the compilation database 41 // in the kernel build dir and returns combined output for all files. 42 // It always caches results, and optionally reuses previously cached results. 43 func Run[Output any, OutputPtr OutputDataPtr[Output]](cfg *Config) (OutputPtr, error) { 44 if cfg.CacheFile != "" { 45 out, err := osutil.ReadJSON[OutputPtr](cfg.CacheFile) 46 if err == nil { 47 return out, nil 48 } 49 } 50 51 dbFile := filepath.Join(cfg.KernelObj, "compile_commands.json") 52 cmds, err := loadCompileCommands(dbFile) 53 if err != nil { 54 return nil, fmt.Errorf("failed to load compile commands: %w", err) 55 } 56 57 type result struct { 58 out OutputPtr 59 err error 60 } 61 results := make(chan *result, 10) 62 files := make(chan string, len(cmds)) 63 for w := 0; w < runtime.NumCPU(); w++ { 64 go func() { 65 for file := range files { 66 out, err := runTool[Output, OutputPtr](cfg, dbFile, file) 67 results <- &result{out, err} 68 } 69 }() 70 } 71 for _, cmd := range cmds { 72 files <- cmd.File 73 } 74 close(files) 75 76 out := OutputPtr(new(Output)) 77 for range cmds { 78 res := <-results 79 if res.err != nil { 80 return nil, res.err 81 } 82 out.Merge(res.out) 83 } 84 // Finalize the output (sort, dedup, etc), and let the output verify 85 // that all source file names, line numbers, etc are valid/present. 86 // If there are any bogus entries, it's better to detect them early, 87 // than to crash/error much later when the info is used. 88 // Some of the source files (generated) may be in the obj dir. 89 srcDirs := []string{cfg.KernelSrc, cfg.KernelObj} 90 if err := Finalize(out, srcDirs); err != nil { 91 return nil, err 92 } 93 if cfg.CacheFile != "" { 94 osutil.MkdirAll(filepath.Dir(cfg.CacheFile)) 95 data, err := json.MarshalIndent(out, "", "\t") 96 if err != nil { 97 return nil, fmt.Errorf("failed to marshal output data: %w", err) 98 } 99 if err := osutil.WriteFile(cfg.CacheFile, data); err != nil { 100 return nil, err 101 } 102 } 103 return out, nil 104 } 105 106 func Finalize[Output any, OutputPtr OutputDataPtr[Output]](out OutputPtr, srcDirs []string) error { 107 v := &Verifier{ 108 srcDirs: srcDirs, 109 fileCache: make(map[string]int), 110 } 111 out.Finalize(v) 112 if v.err.Len() == 0 { 113 return nil 114 } 115 return errors.New(v.err.String()) 116 } 117 118 type Verifier struct { 119 srcDirs []string 120 fileCache map[string]int // file->line count (-1 is cached for missing files) 121 err strings.Builder 122 } 123 124 func (v *Verifier) Filename(file string) { 125 if _, ok := v.fileCache[file]; ok { 126 return 127 } 128 for _, srcDir := range v.srcDirs { 129 data, err := os.ReadFile(filepath.Join(srcDir, file)) 130 if err != nil { 131 continue 132 } 133 v.fileCache[file] = len(bytes.Split(data, []byte{'\n'})) 134 return 135 } 136 v.fileCache[file] = -1 137 fmt.Fprintf(&v.err, "missing file: %v\n", file) 138 } 139 140 func (v *Verifier) LineRange(file string, start, end int) { 141 v.Filename(file) 142 lines, ok := v.fileCache[file] 143 if !ok || lines < 0 { 144 return 145 } 146 // Line numbers produced by clang are 1-based. 147 if start <= 0 || end < start || end > lines { 148 fmt.Fprintf(&v.err, "bad line range [%v-%v] for file %v with %v lines\n", 149 start, end, file, lines) 150 } 151 } 152 153 func runTool[Output any, OutputPtr OutputDataPtr[Output]](cfg *Config, dbFile, file string) (OutputPtr, error) { 154 relFile := strings.TrimPrefix(strings.TrimPrefix(strings.TrimPrefix(filepath.Clean(file), 155 cfg.KernelSrc), cfg.KernelObj), "/") 156 // Suppress warning since we may build the tool on a different clang 157 // version that produces more warnings. 158 // Comments are needed for codesearch tool, but may be useful for declextract 159 // in the future if we try to parse them with LLMs. 160 data, err := exec.Command(cfg.ToolBin, "-p", dbFile, 161 "--extra-arg=-w", "--extra-arg=-fparse-all-comments", file).Output() 162 if err != nil { 163 var exitErr *exec.ExitError 164 if errors.As(err, &exitErr) { 165 err = fmt.Errorf("%v: %w\n%s", relFile, err, exitErr.Stderr) 166 } 167 return nil, err 168 } 169 out, err := osutil.ParseJSON[OutputPtr](data) 170 if err != nil { 171 return nil, err 172 } 173 // All includes in the tool output are relative to the build dir. 174 // Make them relative to the source dir. 175 out.SetSourceFile(relFile, func(filename string) string { 176 rel, err := filepath.Rel(cfg.KernelSrc, filepath.Join(cfg.KernelObj, filename)) 177 if err == nil && filename != "" { 178 return rel 179 } 180 return filename 181 }) 182 return out, nil 183 } 184 185 type compileCommand struct { 186 Command string 187 Directory string 188 File string 189 } 190 191 func loadCompileCommands(dbFile string) ([]compileCommand, error) { 192 data, err := os.ReadFile(dbFile) 193 if err != nil { 194 return nil, err 195 } 196 var cmds []compileCommand 197 if err := json.Unmarshal(data, &cmds); err != nil { 198 return nil, err 199 } 200 // Remove commands that don't relate to the kernel build 201 // (probably some host tools, etc). 202 cmds = slices.DeleteFunc(cmds, func(cmd compileCommand) bool { 203 return !strings.HasSuffix(cmd.File, ".c") || 204 // Files compiled with gcc are not a part of the kernel 205 // (assuming compile commands were generated with make CC=clang). 206 // They are probably a part of some host tool. 207 strings.HasPrefix(cmd.Command, "gcc") || 208 // KBUILD should add this define all kernel files. 209 !strings.Contains(cmd.Command, "-DKBUILD_BASENAME") 210 }) 211 // Shuffle the order to detect any non-determinism caused by the order early. 212 // The result should be the same regardless. 213 rand.New(rand.NewSource(time.Now().UnixNano())).Shuffle(len(cmds), func(i, j int) { 214 cmds[i], cmds[j] = cmds[j], cmds[i] 215 }) 216 if len(cmds) == 0 { 217 return nil, fmt.Errorf("no kernel compile commands in compile_commands.json" + 218 " (was the kernel compiled with gcc?)") 219 } 220 return cmds, nil 221 } 222 223 func SortAndDedupSlice[Slice ~[]E, E comparable](s Slice) Slice { 224 dedup := make(map[[sha256.Size]byte]E) 225 text := make(map[E][]byte) 226 for _, e := range s { 227 t, _ := json.Marshal(e) 228 dedup[sha256.Sum256(t)] = e 229 text[e] = t 230 } 231 s = make([]E, 0, len(dedup)) 232 for _, e := range dedup { 233 s = append(s, e) 234 } 235 slices.SortFunc(s, func(a, b E) int { 236 return bytes.Compare(text[a], text[b]) 237 }) 238 return s 239 }