github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/prog/target.go (about)

     1  // Copyright 2017 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package prog
     5  
     6  import (
     7  	"fmt"
     8  	"math/rand"
     9  	"slices"
    10  	"sort"
    11  	"strings"
    12  	"sync"
    13  	"sync/atomic"
    14  
    15  	"github.com/google/syzkaller/pkg/hash"
    16  )
    17  
    18  // Target describes target OS/arch pair.
    19  type Target struct {
    20  	OS         string
    21  	Arch       string
    22  	Revision   string // unique hash representing revision of the descriptions
    23  	PtrSize    uint64
    24  	PageSize   uint64
    25  	NumPages   uint64
    26  	DataOffset uint64
    27  	BigEndian  bool
    28  
    29  	Syscalls  []*Syscall
    30  	Resources []*ResourceDesc
    31  	Consts    []ConstValue
    32  	Flags     []FlagDesc
    33  	Types     []Type
    34  
    35  	// MakeDataMmap creates calls that mmaps target data memory range.
    36  	MakeDataMmap func() []*Call
    37  
    38  	// Neutralize neutralizes harmful calls by transforming them into non-harmful ones
    39  	// (e.g. an ioctl that turns off console output is turned into ioctl that turns on output).
    40  	// fixStructure determines whether it's allowed to make structural changes (e.g. add or
    41  	// remove arguments). It is helpful e.g. when we do neutralization while iterating over the
    42  	// arguments.
    43  	Neutralize func(c *Call, fixStructure bool) error
    44  
    45  	// AnnotateCall annotates a syscall invocation in C reproducers.
    46  	// The returned string will be placed inside a comment except for the
    47  	// empty string which will omit the comment.
    48  	AnnotateCall func(c ExecCall) string
    49  
    50  	// SpecialTypes allows target to do custom generation/mutation for some struct's and union's.
    51  	// Map key is struct/union name for which custom generation/mutation is required.
    52  	// Map value is custom generation/mutation function that will be called
    53  	// for the corresponding type. g is helper object that allows generate random numbers,
    54  	// allocate memory, etc. typ is the struct/union type. old is the old value of the struct/union
    55  	// for mutation, or nil for generation. The function returns a new value of the struct/union,
    56  	// and optionally any calls that need to be inserted before the arg reference.
    57  	SpecialTypes map[string]func(g *Gen, typ Type, dir Dir, old Arg) (Arg, []*Call)
    58  
    59  	// Resources that play auxiliary role, but widely used throughout all syscalls (e.g. pid/uid).
    60  	AuxResources map[string]bool
    61  
    62  	// Additional special invalid pointer values besides NULL to use.
    63  	SpecialPointers []uint64
    64  
    65  	// Special file name length that can provoke bugs (e.g. PATH_MAX).
    66  	SpecialFileLenghts []int
    67  
    68  	// Filled by prog package:
    69  	SyscallMap map[string]*Syscall
    70  	ConstMap   map[string]uint64
    71  	FlagsMap   map[string][]string
    72  
    73  	init        sync.Once
    74  	fillArch    func(target *Target)
    75  	initArch    func(target *Target)
    76  	resourceMap map[string]*ResourceDesc
    77  	// Maps resource name to a list of calls that can create the resource.
    78  	resourceCtors map[string][]ResourceCtor
    79  	any           anyTypes
    80  
    81  	// The default ChoiceTable is used only by tests and utilities, so we initialize it lazily.
    82  	defaultOnce        sync.Once
    83  	defaultChoiceTable *ChoiceTable
    84  }
    85  
    86  const maxSpecialPointers = 16
    87  
    88  var targets = make(map[string]*Target)
    89  
    90  func RegisterTarget(target *Target, fill, init func(target *Target)) {
    91  	key := target.OS + "/" + target.Arch
    92  	if targets[key] != nil {
    93  		panic(fmt.Sprintf("duplicate target %v", key))
    94  	}
    95  	target.fillArch = fill
    96  	target.initArch = init
    97  	targets[key] = target
    98  }
    99  
   100  func GetTarget(OS, arch string) (*Target, error) {
   101  	key := OS + "/" + arch
   102  	target := targets[key]
   103  	if target == nil {
   104  		var supported []string
   105  		for _, t := range targets {
   106  			supported = append(supported, fmt.Sprintf("%v/%v", t.OS, t.Arch))
   107  		}
   108  		sort.Strings(supported)
   109  		return nil, fmt.Errorf("unknown target: %v (supported: %v)", key, supported)
   110  	}
   111  	target.init.Do(target.lazyInit)
   112  	return target, nil
   113  }
   114  
   115  func AllTargets() []*Target {
   116  	var res []*Target
   117  	for _, target := range targets {
   118  		target.init.Do(target.lazyInit)
   119  		res = append(res, target)
   120  	}
   121  	sort.Slice(res, func(i, j int) bool {
   122  		if res[i].OS != res[j].OS {
   123  			return res[i].OS < res[j].OS
   124  		}
   125  		return res[i].Arch < res[j].Arch
   126  	})
   127  	return res
   128  }
   129  
   130  // Extend extends a target with a new set of syscalls, types, and resources.
   131  // It is assumed that all new syscalls, types, and resources do not conflict
   132  // with those already present in the target.
   133  func (target *Target) Extend(syscalls []*Syscall, types []Type, resources []*ResourceDesc) {
   134  	target.Syscalls = append(target.Syscalls, syscalls...)
   135  	target.Types = append(target.Types, types...)
   136  	target.Resources = append(target.Resources, resources...)
   137  	// Updates the system call map and restores any links.
   138  	target.initTarget()
   139  }
   140  
   141  func (target *Target) lazyInit() {
   142  	target.Neutralize = func(c *Call, fixStructure bool) error { return nil }
   143  	target.AnnotateCall = func(c ExecCall) string { return "" }
   144  	target.fillArch(target)
   145  	target.initTarget()
   146  	target.initUselessHints()
   147  	target.initRelatedFields()
   148  	target.initArch(target)
   149  	// We ignore the return value here as they are cached, and it makes more
   150  	// sense to react to them when we attempt to execute a KFuzzTest call.
   151  	_, _ = target.KFuzzTestRunID()
   152  
   153  	// Give these 2 known addresses fixed positions and prepend target-specific ones at the end.
   154  	target.SpecialPointers = append([]uint64{
   155  		0x0000000000000000, // NULL pointer (keep this first because code uses special index=0 as NULL)
   156  		0xffffffffffffffff, // unmapped kernel address (keep second because serialized value will match actual pointer value)
   157  		0x9999999999999999, // non-canonical address
   158  	}, target.SpecialPointers...)
   159  	if len(target.SpecialPointers) > maxSpecialPointers {
   160  		panic("too many special pointers")
   161  	}
   162  	if len(target.SpecialFileLenghts) == 0 {
   163  		// Just some common lengths that can be used as PATH_MAX/MAX_NAME.
   164  		target.SpecialFileLenghts = []int{256, 512, 4096}
   165  	}
   166  	for _, ln := range target.SpecialFileLenghts {
   167  		if ln <= 0 || ln >= memAllocMaxMem {
   168  			panic(fmt.Sprintf("bad special file length %v", ln))
   169  		}
   170  	}
   171  }
   172  
   173  func (target *Target) initTarget() {
   174  	checkMaxCallID(len(target.Syscalls) - 1)
   175  	target.ConstMap = make(map[string]uint64)
   176  	for _, c := range target.Consts {
   177  		target.ConstMap[c.Name] = c.Value
   178  	}
   179  
   180  	target.resourceMap = restoreLinks(target.Syscalls, target.Resources, target.Types)
   181  	target.initAnyTypes()
   182  
   183  	target.SyscallMap = make(map[string]*Syscall)
   184  	for i, c := range target.Syscalls {
   185  		c.ID = i
   186  		target.SyscallMap[c.Name] = c
   187  	}
   188  
   189  	target.FlagsMap = make(map[string][]string)
   190  	for _, c := range target.Flags {
   191  		target.FlagsMap[c.Name] = c.Values
   192  	}
   193  
   194  	target.populateResourceCtors()
   195  	target.resourceCtors = make(map[string][]ResourceCtor)
   196  	for _, res := range target.Resources {
   197  		target.resourceCtors[res.Name] = target.calcResourceCtors(res, false)
   198  	}
   199  }
   200  
   201  func (target *Target) initUselessHints() {
   202  	// Pre-compute useless hints for each type and deduplicate resulting maps
   203  	// (there will be lots of duplicates).
   204  	computed := make(map[Type]bool)
   205  	dedup := make(map[string]map[uint64]struct{})
   206  	ForeachType(target.Syscalls, func(t Type, ctx *TypeCtx) {
   207  		hinter, ok := t.(uselessHinter)
   208  		if !ok || computed[t] {
   209  			return
   210  		}
   211  		computed[t] = true
   212  		hints := hinter.calcUselessHints()
   213  		if len(hints) == 0 {
   214  			return
   215  		}
   216  		slices.Sort(hints)
   217  		hints = slices.Compact(hints)
   218  		sig := hash.String(hints)
   219  		m := dedup[sig]
   220  		if m == nil {
   221  			m = make(map[uint64]struct{})
   222  			for _, v := range hints {
   223  				m[v] = struct{}{}
   224  			}
   225  			dedup[sig] = m
   226  		}
   227  		hinter.setUselessHints(m)
   228  	})
   229  }
   230  
   231  func (target *Target) initRelatedFields() {
   232  	// Compute sets of related fields that are used to reduce amount of produced hint replacements.
   233  	// Related fields are sets of arguments to the same syscall, in the same position, that operate
   234  	// on the same resource. The best example of related fields is a set of ioctl commands on the same fd:
   235  	//
   236  	//	ioctl$FOO1(fd fd_foo, cmd const[FOO1], ...)
   237  	//	ioctl$FOO2(fd fd_foo, cmd const[FOO2], ...)
   238  	//	ioctl$FOO3(fd fd_foo, cmd const[FOO3], ...)
   239  	//
   240  	// All cmd args related and we should not try to replace them with each other
   241  	// (e.g. try to morph ioctl$FOO1 into ioctl$FOO2). This is both unnecessary, leads to confusing reproducers,
   242  	// and in some cases to badly confused argument types, see e.g.:
   243  	// https://github.com/google/syzkaller/issues/502
   244  	// https://github.com/google/syzkaller/issues/4939
   245  	//
   246  	// However, notion of related fields is wider and includes e.g. socket syscall family/type/proto,
   247  	// setsockopt consts, and in some cases even openat flags/mode.
   248  	//
   249  	// Related fields can include const, flags and int types.
   250  	//
   251  	// Notion of "same resource" is also quite generic b/c syscalls can accept several resource types,
   252  	// and filenames/strings are also considered as a resource in this context. For example, openat syscalls
   253  	// that operate on the same file are related, but are not related to openat calls that operate on other files.
   254  	groups := make(map[string]map[Type]struct{})
   255  	for _, call := range target.Syscalls {
   256  		// Id is used to identify related syscalls.
   257  		// We first collect all resources/strings/files. This needs to be done first b/c e.g. mmap has
   258  		// fd resource at the end, so we need to do this before the next loop.
   259  		id := call.CallName
   260  		for i, field := range call.Args {
   261  			switch arg := field.Type.(type) {
   262  			case *ResourceType:
   263  				id += fmt.Sprintf("-%v:%v", i, arg.Name())
   264  			case *PtrType:
   265  				if typ, ok := arg.Elem.(*BufferType); ok && typ.Kind == BufferString && len(typ.Values) == 1 {
   266  					id += fmt.Sprintf("-%v:%v", i, typ.Values[0])
   267  				}
   268  			}
   269  		}
   270  		// Now we group const/flags args together.
   271  		// But also if we see a const, we update id to include it. This is required for e.g.
   272  		// socket/socketpair/setsockopt calls. For these calls all families can be groups, but types should be
   273  		// grouped only for the same family, and protocols should be grouped only for the same family+type.
   274  		// We assume the "more important" discriminating arguments come first (this is not necessary true,
   275  		// but seems to be the case in real syscalls as it's unreasonable to pass less important things first).
   276  		for i, field := range call.Args {
   277  			switch field.Type.(type) {
   278  			case *ConstType:
   279  			case *FlagsType:
   280  			case *IntType:
   281  			default:
   282  				continue
   283  			}
   284  			argID := fmt.Sprintf("%v/%v", id, i)
   285  			group := groups[argID]
   286  			if group == nil {
   287  				group = make(map[Type]struct{})
   288  				groups[argID] = group
   289  			}
   290  			call.Args[i].relatedFields = group
   291  			group[field.Type] = struct{}{}
   292  			switch arg := field.Type.(type) {
   293  			case *ConstType:
   294  				id += fmt.Sprintf("-%v:%v", i, arg.Val)
   295  			}
   296  		}
   297  	}
   298  	// Drop groups that consist of only a single field as they are not useful.
   299  	for _, call := range target.Syscalls {
   300  		for i := range call.Args {
   301  			if len(call.Args[i].relatedFields) == 1 {
   302  				call.Args[i].relatedFields = nil
   303  			}
   304  		}
   305  	}
   306  }
   307  
   308  func (target *Target) GetConst(name string) uint64 {
   309  	v, ok := target.ConstMap[name]
   310  	if !ok {
   311  		panic(fmt.Sprintf("const %v is not defined for %v/%v", name, target.OS, target.Arch))
   312  	}
   313  	return v
   314  }
   315  
   316  func (target *Target) sanitize(c *Call, fix bool) error {
   317  	// For now, even though we accept the fix argument, it does not have the full effect.
   318  	// It de facto only denies structural changes, e.g. deletions of arguments.
   319  	// TODO: rewrite the corresponding sys/*/init.go code.
   320  	return target.Neutralize(c, fix)
   321  }
   322  
   323  func RestoreLinks(syscalls []*Syscall, resources []*ResourceDesc, types []Type) {
   324  	restoreLinks(syscalls, resources, types)
   325  }
   326  
   327  var (
   328  	typeRefMu sync.Mutex
   329  	typeRefs  atomic.Value // []Type
   330  )
   331  
   332  func restoreLinks(syscalls []*Syscall, resources []*ResourceDesc, types []Type) map[string]*ResourceDesc {
   333  	typeRefMu.Lock()
   334  	defer typeRefMu.Unlock()
   335  	refs := []Type{nil}
   336  	if old := typeRefs.Load(); old != nil {
   337  		refs = old.([]Type)
   338  	}
   339  	for _, typ := range types {
   340  		typ.setRef(Ref(len(refs)))
   341  		refs = append(refs, typ)
   342  	}
   343  	typeRefs.Store(refs)
   344  
   345  	resourceMap := make(map[string]*ResourceDesc)
   346  	for _, res := range resources {
   347  		resourceMap[res.Name] = res
   348  	}
   349  
   350  	ForeachType(syscalls, func(typ Type, ctx *TypeCtx) {
   351  		if ref, ok := typ.(Ref); ok {
   352  			typ = types[ref]
   353  			*ctx.Ptr = typ
   354  		}
   355  		switch t := typ.(type) {
   356  		case *ResourceType:
   357  			t.Desc = resourceMap[t.TypeName]
   358  			if t.Desc == nil {
   359  				panic("no resource desc")
   360  			}
   361  		}
   362  	})
   363  	return resourceMap
   364  }
   365  
   366  func (target *Target) DefaultChoiceTable() *ChoiceTable {
   367  	target.defaultOnce.Do(func() {
   368  		target.defaultChoiceTable = target.BuildChoiceTable(nil, nil)
   369  	})
   370  	return target.defaultChoiceTable
   371  }
   372  
   373  func (target *Target) NoAutoChoiceTable() *ChoiceTable {
   374  	calls := map[*Syscall]bool{}
   375  	for _, c := range target.Syscalls {
   376  		if c.Attrs.Automatic {
   377  			continue
   378  		}
   379  		calls[c] = true
   380  	}
   381  	return target.BuildChoiceTable(nil, calls)
   382  }
   383  
   384  func (target *Target) RequiredGlobs() []string {
   385  	globs := make(map[string]bool)
   386  	ForeachType(target.Syscalls, func(typ Type, ctx *TypeCtx) {
   387  		switch a := typ.(type) {
   388  		case *BufferType:
   389  			if a.Kind == BufferGlob {
   390  				for _, glob := range requiredGlobs(a.SubKind) {
   391  					globs[glob] = true
   392  				}
   393  			}
   394  		}
   395  	})
   396  	return stringMapToSlice(globs)
   397  }
   398  
   399  func (target *Target) UpdateGlobs(globFiles map[string][]string) {
   400  	// TODO: make host.DetectSupportedSyscalls below filter out globs with no values.
   401  	// Also make prog package more strict with respect to generation/mutation of globs
   402  	// with no values (they still can appear in tests and tools). We probably should
   403  	// generate an empty string for these and never mutate.
   404  	ForeachType(target.Syscalls, func(typ Type, ctx *TypeCtx) {
   405  		switch a := typ.(type) {
   406  		case *BufferType:
   407  			if a.Kind == BufferGlob {
   408  				a.Values = populateGlob(a.SubKind, globFiles)
   409  			}
   410  		}
   411  	})
   412  }
   413  
   414  func requiredGlobs(pattern string) []string {
   415  	var res []string
   416  	for _, tok := range strings.Split(pattern, ":") {
   417  		if tok[0] != '-' {
   418  			res = append(res, tok)
   419  		}
   420  	}
   421  	return res
   422  }
   423  
   424  func populateGlob(pattern string, globFiles map[string][]string) []string {
   425  	files := make(map[string]bool)
   426  	parts := strings.Split(pattern, ":")
   427  	for _, tok := range parts {
   428  		if tok[0] != '-' {
   429  			for _, file := range globFiles[tok] {
   430  				files[file] = true
   431  			}
   432  		}
   433  	}
   434  	for _, tok := range parts {
   435  		if tok[0] == '-' {
   436  			delete(files, tok[1:])
   437  		}
   438  	}
   439  	return stringMapToSlice(files)
   440  }
   441  
   442  func stringMapToSlice(m map[string]bool) []string {
   443  	var res []string
   444  	for k := range m {
   445  		res = append(res, k)
   446  	}
   447  	sort.Strings(res)
   448  	return res
   449  }
   450  
   451  type Gen struct {
   452  	r *randGen
   453  	s *state
   454  }
   455  
   456  func (g *Gen) Target() *Target {
   457  	return g.r.target
   458  }
   459  
   460  func (g *Gen) Rand() *rand.Rand {
   461  	return g.r.Rand
   462  }
   463  
   464  func (g *Gen) NOutOf(n, outOf int) bool {
   465  	return g.r.nOutOf(n, outOf)
   466  }
   467  
   468  func (g *Gen) Alloc(ptrType Type, dir Dir, data Arg) (Arg, []*Call) {
   469  	return g.r.allocAddr(g.s, ptrType, dir, data.Size(), data), nil
   470  }
   471  
   472  func (g *Gen) GenerateArg(typ Type, dir Dir, pcalls *[]*Call) Arg {
   473  	return g.generateArg(typ, dir, pcalls, false)
   474  }
   475  
   476  func (g *Gen) GenerateSpecialArg(typ Type, dir Dir, pcalls *[]*Call) Arg {
   477  	return g.generateArg(typ, dir, pcalls, true)
   478  }
   479  
   480  func (g *Gen) generateArg(typ Type, dir Dir, pcalls *[]*Call, ignoreSpecial bool) Arg {
   481  	arg, calls := g.r.generateArgImpl(g.s, typ, dir, ignoreSpecial)
   482  	*pcalls = append(*pcalls, calls...)
   483  	g.r.target.assignSizesArray([]Arg{arg}, []Field{{Name: "", Type: arg.Type()}}, nil)
   484  	return arg
   485  }
   486  
   487  func (g *Gen) MutateArg(arg0 Arg) (calls []*Call) {
   488  	updateSizes := true
   489  	for stop := false; !stop; stop = g.r.oneOf(3) {
   490  		ma := &mutationArgs{target: g.r.target, ignoreSpecial: true}
   491  		ForeachSubArg(arg0, ma.collectArg)
   492  		if len(ma.args) == 0 {
   493  			// TODO(dvyukov): probably need to return this condition
   494  			// and updateSizes to caller so that Mutate can act accordingly.
   495  			return
   496  		}
   497  		arg, ctx := ma.chooseArg(g.r.Rand)
   498  		newCalls, ok := g.r.target.mutateArg(g.r, g.s, arg, ctx, &updateSizes)
   499  		if !ok {
   500  			continue
   501  		}
   502  		calls = append(calls, newCalls...)
   503  	}
   504  	return calls
   505  }
   506  
   507  type Builder struct {
   508  	target *Target
   509  	ma     *memAlloc
   510  	p      *Prog
   511  }
   512  
   513  func MakeProgGen(target *Target) *Builder {
   514  	return &Builder{
   515  		target: target,
   516  		ma:     newMemAlloc(target.NumPages * target.PageSize),
   517  		p: &Prog{
   518  			Target: target,
   519  		},
   520  	}
   521  }
   522  
   523  func (pg *Builder) Append(c *Call) error {
   524  	pg.target.assignSizesCall(c)
   525  	pg.target.sanitize(c, true)
   526  	pg.p.Calls = append(pg.p.Calls, c)
   527  	return nil
   528  }
   529  
   530  func (pg *Builder) Allocate(size, alignment uint64) uint64 {
   531  	return pg.ma.alloc(nil, size, alignment)
   532  }
   533  
   534  func (pg *Builder) AllocateVMA(npages uint64) uint64 {
   535  	return pg.ma.alloc(nil, npages*pg.target.PageSize, pg.target.PageSize)
   536  }
   537  
   538  func (pg *Builder) Finalize() (*Prog, error) {
   539  	if err := pg.p.validate(); err != nil {
   540  		return nil, err
   541  	}
   542  	if _, err := pg.p.SerializeForExec(); err != nil {
   543  		return nil, err
   544  	}
   545  	p := pg.p
   546  	pg.p = nil
   547  	return p, nil
   548  }
   549  
   550  var kFuzzTestIDCache struct {
   551  	sync.Once
   552  	id  int
   553  	err error
   554  }
   555  
   556  // KFuzzTestRunID returns the ID for the syz_kfuzztest_run pseudo-syscall,
   557  // or an error if it is not found in the target.
   558  func (t *Target) KFuzzTestRunID() (int, error) {
   559  	kFuzzTestIDCache.Do(func() {
   560  		for _, call := range t.Syscalls {
   561  			if call.Attrs.KFuzzTest {
   562  				kFuzzTestIDCache.id = call.ID
   563  				return
   564  			}
   565  		}
   566  		kFuzzTestIDCache.err = fmt.Errorf("could not find ID for syz_kfuzztest_run - does it exist?")
   567  	})
   568  	return kFuzzTestIDCache.id, kFuzzTestIDCache.err
   569  }