github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/tools/syz-declextract/declextract.go (about)

     1  // Copyright 2024 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package main
     5  
     6  import (
     7  	"bufio"
     8  	"bytes"
     9  	"encoding/json"
    10  	"flag"
    11  	"fmt"
    12  	"io"
    13  	"io/fs"
    14  	"os"
    15  	"path/filepath"
    16  	"slices"
    17  	"strings"
    18  	"time"
    19  
    20  	"github.com/google/syzkaller/pkg/ast"
    21  	"github.com/google/syzkaller/pkg/clangtool"
    22  	"github.com/google/syzkaller/pkg/compiler"
    23  	"github.com/google/syzkaller/pkg/cover"
    24  	"github.com/google/syzkaller/pkg/declextract"
    25  	"github.com/google/syzkaller/pkg/ifaceprobe"
    26  	"github.com/google/syzkaller/pkg/mgrconfig"
    27  	"github.com/google/syzkaller/pkg/osutil"
    28  	"github.com/google/syzkaller/pkg/subsystem"
    29  	_ "github.com/google/syzkaller/pkg/subsystem/lists"
    30  	"github.com/google/syzkaller/pkg/tool"
    31  	"github.com/google/syzkaller/sys/targets"
    32  	"golang.org/x/sync/errgroup"
    33  )
    34  
    35  // The target we currently assume for extracted descriptions.
    36  var target = targets.Get(targets.Linux, targets.AMD64)
    37  
    38  func main() {
    39  	var (
    40  		flagConfig   = flag.String("config", "", "manager config file")
    41  		flagBinary   = flag.String("binary", "syz-declextract", "path to syz-declextract binary")
    42  		flagCoverage = flag.String("coverage", "", "syzbot coverage jsonl file")
    43  		flagArches   = flag.String("arches", "", "comma-separated list of arches to extract (all if empty)")
    44  	)
    45  	defer tool.Init()()
    46  	mgrcfg, err := mgrconfig.LoadFile(*flagConfig)
    47  	if err != nil {
    48  		tool.Fail(err)
    49  	}
    50  	loadProbeInfo := func() (*ifaceprobe.Info, error) {
    51  		return probe(mgrcfg, *flagConfig)
    52  	}
    53  	cfg := &config{
    54  		archList:      *flagArches,
    55  		autoFile:      filepath.FromSlash("sys/linux/auto.txt"),
    56  		coverFile:     *flagCoverage,
    57  		loadProbeInfo: loadProbeInfo,
    58  		Config: &clangtool.Config{
    59  			ToolBin:    *flagBinary,
    60  			KernelSrc:  mgrcfg.KernelSrc,
    61  			KernelObj:  mgrcfg.KernelObj,
    62  			CacheFile:  filepath.Join(mgrcfg.Workdir, "declextract.cache"),
    63  			DebugTrace: os.Stderr,
    64  		},
    65  	}
    66  	if _, err := run(cfg); err != nil {
    67  		tool.Fail(err)
    68  	}
    69  }
    70  
    71  type config struct {
    72  	archList      string
    73  	autoFile      string
    74  	coverFile     string
    75  	loadProbeInfo func() (*ifaceprobe.Info, error)
    76  	*clangtool.Config
    77  }
    78  
    79  func run(cfg *config) (*declextract.Result, error) {
    80  	out, probeInfo, coverage, syscallRename, err := prepare(cfg)
    81  	if err != nil {
    82  		return nil, err
    83  	}
    84  	res, err := declextract.Run(out, probeInfo, coverage, syscallRename, cfg.DebugTrace)
    85  	if err != nil {
    86  		return nil, err
    87  	}
    88  	if err := osutil.WriteFile(cfg.autoFile, res.Descriptions); err != nil {
    89  		return nil, err
    90  	}
    91  	if err := osutil.WriteFile(cfg.autoFile+".info", serialize(res.Interfaces)); err != nil {
    92  		return nil, err
    93  	}
    94  	// In order to remove unused bits of the descriptions, we need to write them out first,
    95  	// and then parse all descriptions back b/c auto descriptions use some types defined
    96  	// by manual descriptions (compiler.CollectUnused requires complete descriptions).
    97  	// This also canonicalizes them b/c new lines are added during parsing.
    98  	eh, errors := errorHandler()
    99  	desc := ast.ParseGlob(filepath.Join(filepath.Dir(cfg.autoFile), "*.txt"), eh)
   100  	if desc == nil {
   101  		return nil, fmt.Errorf("failed to parse descriptions\n%s", errors.Bytes())
   102  	}
   103  	// Need to clone descriptions b/c CollectUnused changes them slightly during type checking.
   104  	unusedNodes, err := compiler.CollectUnused(desc.Clone(), target, eh)
   105  	if err != nil {
   106  		return nil, fmt.Errorf("failed to typecheck descriptions: %w\n%s", err, errors.Bytes())
   107  	}
   108  	consts := compiler.ExtractConsts(desc.Clone(), target, eh)
   109  	if consts == nil {
   110  		return nil, fmt.Errorf("failed to typecheck descriptions: %w\n%s", err, errors.Bytes())
   111  	}
   112  	finishInterfaces(res.Interfaces, consts, cfg.autoFile)
   113  	if err := osutil.WriteFile(cfg.autoFile+".info", serialize(res.Interfaces)); err != nil {
   114  		return nil, err
   115  	}
   116  	removeUnused(desc, "", unusedNodes)
   117  	// Second pass to remove unused defines/includes. This needs to be done after removing
   118  	// other garbage b/c they may be used by other garbage.
   119  	unusedConsts, err := compiler.CollectUnusedConsts(desc.Clone(), target, res.IncludeUse, eh)
   120  	if err != nil {
   121  		return nil, fmt.Errorf("failed to typecheck descriptions: %w\n%s", err, errors.Bytes())
   122  	}
   123  	removeUnused(desc, cfg.autoFile, unusedConsts)
   124  	// We need re-parse them again b/c new lines are fixed up during parsing.
   125  	formatted := ast.Format(ast.Parse(ast.Format(desc), cfg.autoFile, nil))
   126  	if err := osutil.WriteFile(cfg.autoFile, formatted); err != nil {
   127  		return nil, err
   128  	}
   129  	return res, nil
   130  }
   131  
   132  func removeUnused(desc *ast.Description, autoFile string, unusedNodes []ast.Node) {
   133  	unused := make(map[string]bool)
   134  	for _, n := range unusedNodes {
   135  		_, typ, name := n.Info()
   136  		unused[typ+name] = true
   137  	}
   138  	desc.Nodes = slices.DeleteFunc(desc.Nodes, func(n ast.Node) bool {
   139  		pos, typ, name := n.Info()
   140  		return autoFile != "" && pos.File != autoFile || unused[typ+name]
   141  	})
   142  }
   143  
   144  func prepare(cfg *config) (*declextract.Output, *ifaceprobe.Info, []*cover.FileCoverage,
   145  	map[string][]string, error) {
   146  	arches, err := tool.ParseArchList(target.OS, cfg.archList)
   147  	if err != nil {
   148  		return nil, nil, nil, nil, fmt.Errorf("failed to parse arches flag: %w", err)
   149  	}
   150  	var eg errgroup.Group
   151  	var out *declextract.Output
   152  	eg.Go(func() error {
   153  		var err error
   154  		out, err = clangtool.Run[declextract.Output](cfg.Config)
   155  		if err != nil {
   156  			return err
   157  		}
   158  		return nil
   159  	})
   160  	var probeInfo *ifaceprobe.Info
   161  	eg.Go(func() error {
   162  		var err error
   163  		probeInfo, err = cfg.loadProbeInfo()
   164  		if err != nil {
   165  			return fmt.Errorf("kernel probing failed: %w", err)
   166  		}
   167  		return nil
   168  	})
   169  	var syscallRename map[string][]string
   170  	eg.Go(func() error {
   171  		var err error
   172  		syscallRename, err = buildSyscallRenameMap(cfg.KernelSrc, arches)
   173  		if err != nil {
   174  			return fmt.Errorf("failed to build syscall rename map: %w", err)
   175  		}
   176  		return nil
   177  	})
   178  	var coverage []*cover.FileCoverage
   179  	eg.Go(func() error {
   180  		if cfg.coverFile == "" {
   181  			return nil
   182  		}
   183  		var err error
   184  		coverage, err = loadCoverage(cfg.coverFile)
   185  		return err
   186  	})
   187  	err = eg.Wait()
   188  	return out, probeInfo, coverage, syscallRename, err
   189  }
   190  
   191  func loadCoverage(fileName string) ([]*cover.FileCoverage, error) {
   192  	f, err := os.Open(fileName)
   193  	if err != nil {
   194  		return nil, err
   195  	}
   196  	dec := json.NewDecoder(f)
   197  	dec.DisallowUnknownFields()
   198  	var coverage []*cover.FileCoverage
   199  	for {
   200  		elem := new(cover.FileCoverage)
   201  		if err := dec.Decode(elem); err != nil {
   202  			if err == io.EOF {
   203  				break
   204  			}
   205  			return nil, err
   206  		}
   207  		coverage = append(coverage, elem)
   208  	}
   209  	return coverage, nil
   210  }
   211  
   212  func probe(cfg *mgrconfig.Config, cfgFile string) (*ifaceprobe.Info, error) {
   213  	cacheFile := filepath.Join(cfg.Workdir, "interfaces.json")
   214  	info, err := readProbeResult(cacheFile)
   215  	if err == nil {
   216  		return info, nil
   217  	}
   218  	_, err = osutil.RunCmd(30*time.Minute, "", filepath.Join(cfg.Syzkaller, "bin", "syz-manager"),
   219  		"-config", cfgFile, "-mode", "iface-probe")
   220  	if err != nil {
   221  		return nil, err
   222  	}
   223  	return readProbeResult(cacheFile)
   224  }
   225  
   226  func readProbeResult(file string) (*ifaceprobe.Info, error) {
   227  	data, err := os.ReadFile(file)
   228  	if err != nil {
   229  		return nil, err
   230  	}
   231  	dec := json.NewDecoder(bytes.NewReader(data))
   232  	dec.DisallowUnknownFields()
   233  	info := new(ifaceprobe.Info)
   234  	if err := dec.Decode(info); err != nil {
   235  		return nil, fmt.Errorf("failed to unmarshal interfaces.json: %w", err)
   236  	}
   237  	return info, nil
   238  }
   239  
   240  func errorHandler() (func(pos ast.Pos, msg string), *bytes.Buffer) {
   241  	errors := new(bytes.Buffer)
   242  	eh := func(pos ast.Pos, msg string) {
   243  		pos.File = filepath.Base(pos.File)
   244  		fmt.Fprintf(errors, "%v: %v\n", pos, msg)
   245  	}
   246  	return eh, errors
   247  }
   248  
   249  func serialize(interfaces []*declextract.Interface) []byte {
   250  	w := new(bytes.Buffer)
   251  	for _, iface := range interfaces {
   252  		fmt.Fprintf(w, "%v\t%v\tfunc:%v\tloc:%v\tcoverage:%v\taccess:%v\tmanual_desc:%v\tauto_desc:%v",
   253  			iface.Type, iface.Name, iface.Func, iface.ReachableLOC,
   254  			cover.Percent(iface.CoveredBlocks, iface.TotalBlocks),
   255  			iface.Access, iface.ManualDescriptions, iface.AutoDescriptions)
   256  		for _, file := range iface.Files {
   257  			fmt.Fprintf(w, "\tfile:%v", file)
   258  		}
   259  		for _, subsys := range iface.Subsystems {
   260  			fmt.Fprintf(w, "\tsubsystem:%v", subsys)
   261  		}
   262  		fmt.Fprintf(w, "\n")
   263  	}
   264  	return w.Bytes()
   265  }
   266  
   267  func finishInterfaces(interfaces []*declextract.Interface, consts map[string]*compiler.ConstInfo, autoFile string) {
   268  	manual := make(map[string]bool)
   269  	for file, desc := range consts {
   270  		for _, c := range desc.Consts {
   271  			if file != autoFile {
   272  				manual[c.Name] = true
   273  			}
   274  		}
   275  	}
   276  	extractor := subsystem.MakeExtractor(subsystem.GetList(target.OS))
   277  	for _, iface := range interfaces {
   278  		if iface.IdentifyingConst != "" {
   279  			iface.ManualDescriptions = declextract.Tristate(manual[iface.IdentifyingConst])
   280  		}
   281  		var crashes []*subsystem.Crash
   282  		for _, file := range iface.Files {
   283  			crashes = append(crashes, &subsystem.Crash{GuiltyPath: file})
   284  		}
   285  		for _, s := range extractor.Extract(crashes) {
   286  			iface.Subsystems = append(iface.Subsystems, s.Name)
   287  		}
   288  		slices.Sort(iface.Subsystems)
   289  	}
   290  }
   291  
   292  func buildSyscallRenameMap(sourceDir string, arches []string) (map[string][]string, error) {
   293  	// Some syscalls have different names and entry points and thus need to be renamed.
   294  	// e.g. SYSCALL_DEFINE1(setuid16, old_uid_t, uid) is referred to in the .tbl file with setuid.
   295  	// Parse *.tbl files that map functions defined with SYSCALL_DEFINE macros to actual syscall names.
   296  	// Lines in the files look as follows:
   297  	//	288      common  accept4                 sys_accept4
   298  	// Total mapping is many-to-many, so we give preference to x86 arch, then to 64-bit syscalls,
   299  	// and then just order arches by name to have deterministic result.
   300  	// Note: some syscalls may have no record in the tables for the architectures we support.
   301  	syscalls := make(map[string][]tblSyscall)
   302  	tblFiles, err := findTblFiles(sourceDir, arches)
   303  	if err != nil {
   304  		return nil, err
   305  	}
   306  	if len(tblFiles) == 0 {
   307  		return nil, fmt.Errorf("found no *.tbl files in the kernel dir %v", sourceDir)
   308  	}
   309  	for file, arches := range tblFiles {
   310  		for _, arch := range arches {
   311  			data, err := os.ReadFile(file)
   312  			if err != nil {
   313  				return nil, err
   314  			}
   315  			parseTblFile(data, arch, syscalls)
   316  		}
   317  	}
   318  	rename := make(map[string][]string)
   319  	for syscall, descs := range syscalls {
   320  		slices.SortFunc(descs, func(a, b tblSyscall) int {
   321  			if (a.arch == target.Arch) != (b.arch == target.Arch) {
   322  				if a.arch == target.Arch {
   323  					return -1
   324  				}
   325  				return 1
   326  			}
   327  			if a.is64bit != b.is64bit {
   328  				if a.is64bit {
   329  					return -1
   330  				}
   331  				return 1
   332  			}
   333  			return strings.Compare(a.arch, b.arch)
   334  		})
   335  		fn := descs[0].fn
   336  		rename[fn] = append(rename[fn], syscall)
   337  	}
   338  	return rename, nil
   339  }
   340  
   341  type tblSyscall struct {
   342  	fn      string
   343  	arch    string
   344  	is64bit bool
   345  }
   346  
   347  func parseTblFile(data []byte, arch string, syscalls map[string][]tblSyscall) {
   348  	for s := bufio.NewScanner(bytes.NewReader(data)); s.Scan(); {
   349  		fields := strings.Fields(s.Text())
   350  		if len(fields) < 4 || fields[0] == "#" {
   351  			continue
   352  		}
   353  		group := fields[1]
   354  		syscall := fields[2]
   355  		fn := strings.TrimPrefix(fields[3], "sys_")
   356  		if strings.HasPrefix(syscall, "unused") || fn == "-" ||
   357  			// Powerpc spu group defines some syscalls (utimesat)
   358  			// that are not present on any of our arches.
   359  			group == "spu" ||
   360  			// llseek does not exist, it comes from:
   361  			//	arch/arm64/tools/syscall_64.tbl -> scripts/syscall.tbl
   362  			//	62  32      llseek                          sys_llseek
   363  			// So scripts/syscall.tbl is pulled for 64-bit arch, but the syscall
   364  			// is defined only for 32-bit arch in that file.
   365  			syscall == "llseek" ||
   366  			// Don't want to test it (but see issue 5308).
   367  			syscall == "reboot" {
   368  			continue
   369  		}
   370  		syscalls[syscall] = append(syscalls[syscall], tblSyscall{
   371  			fn:      fn,
   372  			arch:    arch,
   373  			is64bit: group == "common" || strings.Contains(group, "64"),
   374  		})
   375  	}
   376  }
   377  
   378  func findTblFiles(sourceDir string, arches []string) (map[string][]string, error) {
   379  	files := make(map[string][]string)
   380  	for _, name := range arches {
   381  		arch := targets.List[target.OS][name]
   382  		err := filepath.WalkDir(filepath.Join(sourceDir, "arch", arch.KernelHeaderArch),
   383  			func(file string, d fs.DirEntry, err error) error {
   384  				if err == nil && strings.HasSuffix(file, ".tbl") {
   385  					files[file] = append(files[file], arch.VMArch)
   386  				}
   387  				return err
   388  			})
   389  		if err != nil {
   390  			return nil, err
   391  		}
   392  	}
   393  	return files, nil
   394  }