github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/tools/syz-declextract/declextract.go (about) 1 // Copyright 2024 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package main 5 6 import ( 7 "bufio" 8 "bytes" 9 "encoding/json" 10 "flag" 11 "fmt" 12 "io" 13 "io/fs" 14 "os" 15 "path/filepath" 16 "slices" 17 "strings" 18 "time" 19 20 "github.com/google/syzkaller/pkg/ast" 21 "github.com/google/syzkaller/pkg/clangtool" 22 "github.com/google/syzkaller/pkg/compiler" 23 "github.com/google/syzkaller/pkg/cover" 24 "github.com/google/syzkaller/pkg/declextract" 25 "github.com/google/syzkaller/pkg/ifaceprobe" 26 "github.com/google/syzkaller/pkg/mgrconfig" 27 "github.com/google/syzkaller/pkg/osutil" 28 "github.com/google/syzkaller/pkg/subsystem" 29 _ "github.com/google/syzkaller/pkg/subsystem/lists" 30 "github.com/google/syzkaller/pkg/tool" 31 "github.com/google/syzkaller/sys/targets" 32 "golang.org/x/sync/errgroup" 33 ) 34 35 // The target we currently assume for extracted descriptions. 36 var target = targets.Get(targets.Linux, targets.AMD64) 37 38 func main() { 39 var ( 40 flagConfig = flag.String("config", "", "manager config file") 41 flagBinary = flag.String("binary", "syz-declextract", "path to syz-declextract binary") 42 flagCoverage = flag.String("coverage", "", "syzbot coverage jsonl file") 43 flagArches = flag.String("arches", "", "comma-separated list of arches to extract (all if empty)") 44 ) 45 defer tool.Init()() 46 mgrcfg, err := mgrconfig.LoadFile(*flagConfig) 47 if err != nil { 48 tool.Fail(err) 49 } 50 loadProbeInfo := func() (*ifaceprobe.Info, error) { 51 return probe(mgrcfg, *flagConfig) 52 } 53 cfg := &config{ 54 archList: *flagArches, 55 autoFile: filepath.FromSlash("sys/linux/auto.txt"), 56 coverFile: *flagCoverage, 57 loadProbeInfo: loadProbeInfo, 58 Config: &clangtool.Config{ 59 ToolBin: *flagBinary, 60 KernelSrc: mgrcfg.KernelSrc, 61 KernelObj: mgrcfg.KernelObj, 62 CacheFile: filepath.Join(mgrcfg.Workdir, "declextract.cache"), 63 DebugTrace: os.Stderr, 64 }, 65 } 66 if _, err := run(cfg); err != nil { 67 tool.Fail(err) 68 } 69 } 70 71 type config struct { 72 archList string 73 autoFile string 74 coverFile string 75 loadProbeInfo func() (*ifaceprobe.Info, error) 76 *clangtool.Config 77 } 78 79 func run(cfg *config) (*declextract.Result, error) { 80 out, probeInfo, coverage, syscallRename, err := prepare(cfg) 81 if err != nil { 82 return nil, err 83 } 84 res, err := declextract.Run(out, probeInfo, coverage, syscallRename, cfg.DebugTrace) 85 if err != nil { 86 return nil, err 87 } 88 if err := osutil.WriteFile(cfg.autoFile, res.Descriptions); err != nil { 89 return nil, err 90 } 91 if err := osutil.WriteFile(cfg.autoFile+".info", serialize(res.Interfaces)); err != nil { 92 return nil, err 93 } 94 // In order to remove unused bits of the descriptions, we need to write them out first, 95 // and then parse all descriptions back b/c auto descriptions use some types defined 96 // by manual descriptions (compiler.CollectUnused requires complete descriptions). 97 // This also canonicalizes them b/c new lines are added during parsing. 98 eh, errors := errorHandler() 99 desc := ast.ParseGlob(filepath.Join(filepath.Dir(cfg.autoFile), "*.txt"), eh) 100 if desc == nil { 101 return nil, fmt.Errorf("failed to parse descriptions\n%s", errors.Bytes()) 102 } 103 // Need to clone descriptions b/c CollectUnused changes them slightly during type checking. 104 unusedNodes, err := compiler.CollectUnused(desc.Clone(), target, eh) 105 if err != nil { 106 return nil, fmt.Errorf("failed to typecheck descriptions: %w\n%s", err, errors.Bytes()) 107 } 108 consts := compiler.ExtractConsts(desc.Clone(), target, eh) 109 if consts == nil { 110 return nil, fmt.Errorf("failed to typecheck descriptions: %w\n%s", err, errors.Bytes()) 111 } 112 finishInterfaces(res.Interfaces, consts, cfg.autoFile) 113 if err := osutil.WriteFile(cfg.autoFile+".info", serialize(res.Interfaces)); err != nil { 114 return nil, err 115 } 116 removeUnused(desc, "", unusedNodes) 117 // Second pass to remove unused defines/includes. This needs to be done after removing 118 // other garbage b/c they may be used by other garbage. 119 unusedConsts, err := compiler.CollectUnusedConsts(desc.Clone(), target, res.IncludeUse, eh) 120 if err != nil { 121 return nil, fmt.Errorf("failed to typecheck descriptions: %w\n%s", err, errors.Bytes()) 122 } 123 removeUnused(desc, cfg.autoFile, unusedConsts) 124 // We need re-parse them again b/c new lines are fixed up during parsing. 125 formatted := ast.Format(ast.Parse(ast.Format(desc), cfg.autoFile, nil)) 126 if err := osutil.WriteFile(cfg.autoFile, formatted); err != nil { 127 return nil, err 128 } 129 return res, nil 130 } 131 132 func removeUnused(desc *ast.Description, autoFile string, unusedNodes []ast.Node) { 133 unused := make(map[string]bool) 134 for _, n := range unusedNodes { 135 _, typ, name := n.Info() 136 unused[typ+name] = true 137 } 138 desc.Nodes = slices.DeleteFunc(desc.Nodes, func(n ast.Node) bool { 139 pos, typ, name := n.Info() 140 return autoFile != "" && pos.File != autoFile || unused[typ+name] 141 }) 142 } 143 144 func prepare(cfg *config) (*declextract.Output, *ifaceprobe.Info, []*cover.FileCoverage, 145 map[string][]string, error) { 146 arches, err := tool.ParseArchList(target.OS, cfg.archList) 147 if err != nil { 148 return nil, nil, nil, nil, fmt.Errorf("failed to parse arches flag: %w", err) 149 } 150 var eg errgroup.Group 151 var out *declextract.Output 152 eg.Go(func() error { 153 var err error 154 out, err = clangtool.Run[declextract.Output](cfg.Config) 155 if err != nil { 156 return err 157 } 158 return nil 159 }) 160 var probeInfo *ifaceprobe.Info 161 eg.Go(func() error { 162 var err error 163 probeInfo, err = cfg.loadProbeInfo() 164 if err != nil { 165 return fmt.Errorf("kernel probing failed: %w", err) 166 } 167 return nil 168 }) 169 var syscallRename map[string][]string 170 eg.Go(func() error { 171 var err error 172 syscallRename, err = buildSyscallRenameMap(cfg.KernelSrc, arches) 173 if err != nil { 174 return fmt.Errorf("failed to build syscall rename map: %w", err) 175 } 176 return nil 177 }) 178 var coverage []*cover.FileCoverage 179 eg.Go(func() error { 180 if cfg.coverFile == "" { 181 return nil 182 } 183 var err error 184 coverage, err = loadCoverage(cfg.coverFile) 185 return err 186 }) 187 err = eg.Wait() 188 return out, probeInfo, coverage, syscallRename, err 189 } 190 191 func loadCoverage(fileName string) ([]*cover.FileCoverage, error) { 192 f, err := os.Open(fileName) 193 if err != nil { 194 return nil, err 195 } 196 dec := json.NewDecoder(f) 197 dec.DisallowUnknownFields() 198 var coverage []*cover.FileCoverage 199 for { 200 elem := new(cover.FileCoverage) 201 if err := dec.Decode(elem); err != nil { 202 if err == io.EOF { 203 break 204 } 205 return nil, err 206 } 207 coverage = append(coverage, elem) 208 } 209 return coverage, nil 210 } 211 212 func probe(cfg *mgrconfig.Config, cfgFile string) (*ifaceprobe.Info, error) { 213 cacheFile := filepath.Join(cfg.Workdir, "interfaces.json") 214 info, err := readProbeResult(cacheFile) 215 if err == nil { 216 return info, nil 217 } 218 _, err = osutil.RunCmd(30*time.Minute, "", filepath.Join(cfg.Syzkaller, "bin", "syz-manager"), 219 "-config", cfgFile, "-mode", "iface-probe") 220 if err != nil { 221 return nil, err 222 } 223 return readProbeResult(cacheFile) 224 } 225 226 func readProbeResult(file string) (*ifaceprobe.Info, error) { 227 data, err := os.ReadFile(file) 228 if err != nil { 229 return nil, err 230 } 231 dec := json.NewDecoder(bytes.NewReader(data)) 232 dec.DisallowUnknownFields() 233 info := new(ifaceprobe.Info) 234 if err := dec.Decode(info); err != nil { 235 return nil, fmt.Errorf("failed to unmarshal interfaces.json: %w", err) 236 } 237 return info, nil 238 } 239 240 func errorHandler() (func(pos ast.Pos, msg string), *bytes.Buffer) { 241 errors := new(bytes.Buffer) 242 eh := func(pos ast.Pos, msg string) { 243 pos.File = filepath.Base(pos.File) 244 fmt.Fprintf(errors, "%v: %v\n", pos, msg) 245 } 246 return eh, errors 247 } 248 249 func serialize(interfaces []*declextract.Interface) []byte { 250 w := new(bytes.Buffer) 251 for _, iface := range interfaces { 252 fmt.Fprintf(w, "%v\t%v\tfunc:%v\tloc:%v\tcoverage:%v\taccess:%v\tmanual_desc:%v\tauto_desc:%v", 253 iface.Type, iface.Name, iface.Func, iface.ReachableLOC, 254 cover.Percent(iface.CoveredBlocks, iface.TotalBlocks), 255 iface.Access, iface.ManualDescriptions, iface.AutoDescriptions) 256 for _, file := range iface.Files { 257 fmt.Fprintf(w, "\tfile:%v", file) 258 } 259 for _, subsys := range iface.Subsystems { 260 fmt.Fprintf(w, "\tsubsystem:%v", subsys) 261 } 262 fmt.Fprintf(w, "\n") 263 } 264 return w.Bytes() 265 } 266 267 func finishInterfaces(interfaces []*declextract.Interface, consts map[string]*compiler.ConstInfo, autoFile string) { 268 manual := make(map[string]bool) 269 for file, desc := range consts { 270 for _, c := range desc.Consts { 271 if file != autoFile { 272 manual[c.Name] = true 273 } 274 } 275 } 276 extractor := subsystem.MakeExtractor(subsystem.GetList(target.OS)) 277 for _, iface := range interfaces { 278 if iface.IdentifyingConst != "" { 279 iface.ManualDescriptions = declextract.Tristate(manual[iface.IdentifyingConst]) 280 } 281 var crashes []*subsystem.Crash 282 for _, file := range iface.Files { 283 crashes = append(crashes, &subsystem.Crash{GuiltyPath: file}) 284 } 285 for _, s := range extractor.Extract(crashes) { 286 iface.Subsystems = append(iface.Subsystems, s.Name) 287 } 288 slices.Sort(iface.Subsystems) 289 } 290 } 291 292 func buildSyscallRenameMap(sourceDir string, arches []string) (map[string][]string, error) { 293 // Some syscalls have different names and entry points and thus need to be renamed. 294 // e.g. SYSCALL_DEFINE1(setuid16, old_uid_t, uid) is referred to in the .tbl file with setuid. 295 // Parse *.tbl files that map functions defined with SYSCALL_DEFINE macros to actual syscall names. 296 // Lines in the files look as follows: 297 // 288 common accept4 sys_accept4 298 // Total mapping is many-to-many, so we give preference to x86 arch, then to 64-bit syscalls, 299 // and then just order arches by name to have deterministic result. 300 // Note: some syscalls may have no record in the tables for the architectures we support. 301 syscalls := make(map[string][]tblSyscall) 302 tblFiles, err := findTblFiles(sourceDir, arches) 303 if err != nil { 304 return nil, err 305 } 306 if len(tblFiles) == 0 { 307 return nil, fmt.Errorf("found no *.tbl files in the kernel dir %v", sourceDir) 308 } 309 for file, arches := range tblFiles { 310 for _, arch := range arches { 311 data, err := os.ReadFile(file) 312 if err != nil { 313 return nil, err 314 } 315 parseTblFile(data, arch, syscalls) 316 } 317 } 318 rename := make(map[string][]string) 319 for syscall, descs := range syscalls { 320 slices.SortFunc(descs, func(a, b tblSyscall) int { 321 if (a.arch == target.Arch) != (b.arch == target.Arch) { 322 if a.arch == target.Arch { 323 return -1 324 } 325 return 1 326 } 327 if a.is64bit != b.is64bit { 328 if a.is64bit { 329 return -1 330 } 331 return 1 332 } 333 return strings.Compare(a.arch, b.arch) 334 }) 335 fn := descs[0].fn 336 rename[fn] = append(rename[fn], syscall) 337 } 338 return rename, nil 339 } 340 341 type tblSyscall struct { 342 fn string 343 arch string 344 is64bit bool 345 } 346 347 func parseTblFile(data []byte, arch string, syscalls map[string][]tblSyscall) { 348 for s := bufio.NewScanner(bytes.NewReader(data)); s.Scan(); { 349 fields := strings.Fields(s.Text()) 350 if len(fields) < 4 || fields[0] == "#" { 351 continue 352 } 353 group := fields[1] 354 syscall := fields[2] 355 fn := strings.TrimPrefix(fields[3], "sys_") 356 if strings.HasPrefix(syscall, "unused") || fn == "-" || 357 // Powerpc spu group defines some syscalls (utimesat) 358 // that are not present on any of our arches. 359 group == "spu" || 360 // llseek does not exist, it comes from: 361 // arch/arm64/tools/syscall_64.tbl -> scripts/syscall.tbl 362 // 62 32 llseek sys_llseek 363 // So scripts/syscall.tbl is pulled for 64-bit arch, but the syscall 364 // is defined only for 32-bit arch in that file. 365 syscall == "llseek" || 366 // Don't want to test it (but see issue 5308). 367 syscall == "reboot" { 368 continue 369 } 370 syscalls[syscall] = append(syscalls[syscall], tblSyscall{ 371 fn: fn, 372 arch: arch, 373 is64bit: group == "common" || strings.Contains(group, "64"), 374 }) 375 } 376 } 377 378 func findTblFiles(sourceDir string, arches []string) (map[string][]string, error) { 379 files := make(map[string][]string) 380 for _, name := range arches { 381 arch := targets.List[target.OS][name] 382 err := filepath.WalkDir(filepath.Join(sourceDir, "arch", arch.KernelHeaderArch), 383 func(file string, d fs.DirEntry, err error) error { 384 if err == nil && strings.HasSuffix(file, ".tbl") { 385 files[file] = append(files[file], arch.VMArch) 386 } 387 return err 388 }) 389 if err != nil { 390 return nil, err 391 } 392 } 393 return files, nil 394 }