github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/prog/hints.go (about)

     1  // Copyright 2017 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package prog
     5  
     6  // A hint is basically a tuple consisting of a pointer to an argument
     7  // in one of the syscalls of a program and a value, which should be
     8  // assigned to that argument (we call it a replacer).
     9  
    10  // A simplified version of hints workflow looks like this:
    11  //		1. Fuzzer launches a program (we call it a hint seed) and collects all
    12  // the comparisons' data for every syscall in the program.
    13  //		2. Next it tries to match the obtained comparison operands' values
    14  // vs. the input arguments' values.
    15  //		3. For every such match the fuzzer mutates the program by
    16  // replacing the pointed argument with the saved value.
    17  //		4. If a valid program is obtained, then fuzzer launches it and
    18  // checks if new coverage is obtained.
    19  // For more insights on particular mutations please see prog/hints_test.go.
    20  
    21  import (
    22  	"bytes"
    23  	"encoding/binary"
    24  	"fmt"
    25  	"sort"
    26  	"sync"
    27  
    28  	"github.com/google/syzkaller/pkg/image"
    29  )
    30  
    31  // CompMap maps comparison operand that could come from the input to the second operand to the PC.
    32  type CompMap map[uint64]map[uint64]map[uint64]bool
    33  
    34  const (
    35  	maxDataLength = 100
    36  )
    37  
    38  var specialIntsSet map[uint64]bool
    39  
    40  func (m CompMap) Add(pc, arg1, arg2 uint64, isConst bool) {
    41  	if _, ok := m[arg1]; !ok {
    42  		m[arg1] = make(map[uint64]map[uint64]bool)
    43  	}
    44  	if _, ok := m[arg1][arg2]; !ok {
    45  		m[arg1][arg2] = make(map[uint64]bool)
    46  	}
    47  	m[arg1][arg2][pc] = true
    48  	if !isConst {
    49  		// Both operands could come from the input.
    50  		m.Add(pc, arg2, arg1, true)
    51  	}
    52  }
    53  
    54  func (m CompMap) String() string {
    55  	buf := new(bytes.Buffer)
    56  	for v, comps := range m {
    57  		if len(buf.Bytes()) != 0 {
    58  			fmt.Fprintf(buf, ", ")
    59  		}
    60  		fmt.Fprintf(buf, "0x%x:", v)
    61  		for c := range comps {
    62  			fmt.Fprintf(buf, " 0x%x", c)
    63  		}
    64  	}
    65  	return buf.String()
    66  }
    67  
    68  func (m CompMap) Len() int {
    69  	var count int
    70  	for _, nested := range m {
    71  		for _, nested2 := range nested {
    72  			count += len(nested2)
    73  		}
    74  	}
    75  	return count
    76  }
    77  
    78  // InplaceIntersect() only leaves the value pairs that are also present in other.
    79  func (m CompMap) InplaceIntersect(other CompMap) {
    80  	for val1, nested := range m {
    81  		for val2, pcs := range nested {
    82  			for pc := range pcs {
    83  				if !other[val1][val2][pc] {
    84  					delete(pcs, pc)
    85  				}
    86  			}
    87  			if len(pcs) == 0 {
    88  				delete(nested, val2)
    89  			}
    90  		}
    91  		if len(nested) == 0 {
    92  			delete(m, val1)
    93  		}
    94  	}
    95  }
    96  
    97  // Mutates the program using the comparison operands stored in compMaps.
    98  // For each of the mutants executes the exec callback.
    99  // The callback must return whether we should continue substitution (true)
   100  // or abort the process (false).
   101  func (p *Prog) MutateWithHints(callIndex int, comps CompMap, exec func(p *Prog) bool) {
   102  	p = p.Clone()
   103  	c := p.Calls[callIndex]
   104  	doMore := true
   105  	execValidate := func() bool {
   106  		// Don't try to fix the candidate program.
   107  		// Assuming the original call was sanitized, we've got a bad call
   108  		// as the result of hint substitution, so just throw it away.
   109  		if p.Target.sanitize(c, false) != nil {
   110  			return true
   111  		}
   112  		if p.checkConditions() != nil {
   113  			// Patching unions that no longer satisfy conditions would
   114  			// require much deeped changes to prog arguments than
   115  			// generateHints() expects.
   116  			// Let's just ignore such mutations.
   117  			return true
   118  		}
   119  		p.debugValidate()
   120  		doMore = exec(p)
   121  		return doMore
   122  	}
   123  	ForeachArg(c, func(arg Arg, ctx *ArgCtx) {
   124  		if !doMore {
   125  			ctx.Stop = true
   126  			return
   127  		}
   128  		generateHints(comps, arg, ctx.Field, execValidate)
   129  	})
   130  }
   131  
   132  func generateHints(compMap CompMap, arg Arg, field *Field, exec func() bool) {
   133  	typ := arg.Type()
   134  	if typ == nil || arg.Dir() == DirOut {
   135  		return
   136  	}
   137  	switch t := typ.(type) {
   138  	case *ProcType:
   139  		// Random proc will not pass validation.
   140  		// We can mutate it, but only if the resulting value is within the legal range.
   141  		return
   142  	case *ConstType:
   143  		if IsPad(typ) {
   144  			return
   145  		}
   146  	case *CsumType:
   147  		// Csum will not pass validation and is always computed.
   148  		return
   149  	case *BufferType:
   150  		switch t.Kind {
   151  		case BufferFilename:
   152  			// This can generate escaping paths and is probably not too useful anyway.
   153  			return
   154  		case BufferString, BufferGlob:
   155  			if len(t.Values) != 0 {
   156  				// These are frequently file names or complete enumerations.
   157  				// Mutating these may be useful iff we intercept strcmp
   158  				// (and filter out file names).
   159  				return
   160  			}
   161  		}
   162  	}
   163  
   164  	switch a := arg.(type) {
   165  	case *ConstArg:
   166  		if arg.Type().TypeBitSize() <= 8 {
   167  			// Very small arg, hopefully we can guess it w/o hints help.
   168  			return
   169  		}
   170  		checkConstArg(a, field, compMap, exec)
   171  	case *DataArg:
   172  		if arg.Size() <= 3 {
   173  			// Let's assume it either does not contain anything interesting,
   174  			// or we can guess everything eventually by brute force.
   175  			return
   176  		}
   177  		if typ.(*BufferType).Kind == BufferCompressed {
   178  			checkCompressedArg(a, compMap, exec)
   179  		} else {
   180  			checkDataArg(a, compMap, exec)
   181  		}
   182  	}
   183  }
   184  
   185  func checkConstArg(arg *ConstArg, field *Field, compMap CompMap, exec func() bool) {
   186  	original := arg.Val
   187  	// Note: because shrinkExpand returns a map, order of programs is non-deterministic.
   188  	// This can affect test coverage reports.
   189  replacerLoop:
   190  	for _, replacer := range shrinkExpand(original, compMap, arg.Type().TypeBitSize(), false) {
   191  		if field != nil && len(field.relatedFields) != 0 {
   192  			for related := range field.relatedFields {
   193  				if related.(uselessHinter).uselessHint(replacer) {
   194  					continue replacerLoop
   195  				}
   196  			}
   197  		} else if arg.Type().(uselessHinter).uselessHint(replacer) {
   198  			continue
   199  		}
   200  		arg.Val = replacer
   201  		if !exec() {
   202  			break
   203  		}
   204  	}
   205  	arg.Val = original
   206  }
   207  
   208  func checkDataArg(arg *DataArg, compMap CompMap, exec func() bool) {
   209  	bytes := make([]byte, 8)
   210  	data := arg.Data()
   211  	size := min(len(data), maxDataLength)
   212  	for i := 0; i < size; i++ {
   213  		original := make([]byte, 8)
   214  		copy(original, data[i:])
   215  		val := binary.LittleEndian.Uint64(original)
   216  		for _, replacer := range shrinkExpand(val, compMap, 64, false) {
   217  			binary.LittleEndian.PutUint64(bytes, replacer)
   218  			copy(data[i:], bytes)
   219  			if !exec() {
   220  				break
   221  			}
   222  		}
   223  		copy(data[i:], original)
   224  	}
   225  }
   226  
   227  func checkCompressedArg(arg *DataArg, compMap CompMap, exec func() bool) {
   228  	data0 := arg.Data()
   229  	data, dtor := image.MustDecompress(data0)
   230  	// Images are very large so the generic algorithm for data arguments
   231  	// can produce too many mutants. For images we consider only
   232  	// 4/8-byte aligned ints. This is enough to handle all magic
   233  	// numbers and checksums. We also ignore 0 and ^uint64(0) source bytes,
   234  	// because there are too many of these in lots of images.
   235  	bytes := make([]byte, 8)
   236  	doMore := true
   237  	for i := 0; i < len(data) && doMore; i += 4 {
   238  		original := make([]byte, 8)
   239  		copy(original, data[i:])
   240  		val := binary.LittleEndian.Uint64(original)
   241  		for _, replacer := range shrinkExpand(val, compMap, 64, true) {
   242  			binary.LittleEndian.PutUint64(bytes, replacer)
   243  			copy(data[i:], bytes)
   244  			arg.SetData(image.Compress(data))
   245  			// Unmap the image for the duration of the execution.
   246  			// Execution can take a while and uncompressed images are large,
   247  			// since hints jobs are executed round-robin, we can have thousands of them running.
   248  			dtor()
   249  			doMore = exec()
   250  			data, dtor = image.MustDecompress(data0)
   251  			if !doMore {
   252  				break
   253  			}
   254  		}
   255  		copy(data[i:], original)
   256  	}
   257  	dtor()
   258  	arg.SetData(data0)
   259  }
   260  
   261  // Shrink and expand mutations model the cases when the syscall arguments
   262  // are casted to narrower (and wider) integer types.
   263  //
   264  // Motivation for shrink:
   265  //
   266  //	void f(u16 x) {
   267  //			u8 y = (u8)x;
   268  //			if (y == 0xab) {...}
   269  //	}
   270  //
   271  // If we call f(0x1234), then we'll see a comparison 0x34 vs 0xab and we'll
   272  // be unable to match the argument 0x1234 with any of the comparison operands.
   273  // Thus we shrink 0x1234 to 0x34 and try to match 0x34.
   274  // If there's a match for the shrank value, then we replace the corresponding
   275  // bytes of the input (in the given example we'll get 0x12ab).
   276  // Sometimes the other comparison operand will be wider than the shrank value
   277  // (in the example above consider comparison if (y == 0xdeadbeef) {...}).
   278  // In this case we ignore such comparison because we couldn't come up with
   279  // any valid code example that does similar things. To avoid such comparisons
   280  // we check the sizes with leastSize().
   281  //
   282  // Motivation for expand:
   283  //
   284  //	void f(i8 x) {
   285  //			i16 y = (i16)x;
   286  //			if (y == -2) {...}
   287  //	}
   288  //
   289  // Suppose we call f(-1), then we'll see a comparison 0xffff vs 0xfffe and be
   290  // unable to match input vs any operands. Thus we sign extend the input and
   291  // check the extension.
   292  // As with shrink we ignore cases when the other operand is wider.
   293  // Note that executor sign extends all the comparison operands to int64.
   294  func shrinkExpand(v uint64, compMap CompMap, bitsize uint64, image bool) []uint64 {
   295  	v = truncateToBitSize(v, bitsize)
   296  	limit := uint64(1<<bitsize - 1)
   297  	var replacers map[uint64]bool
   298  	for _, iwidth := range []int{8, 4, 2, 1, -4, -2, -1} {
   299  		var width int
   300  		var size, mutant uint64
   301  		if iwidth > 0 {
   302  			width = iwidth
   303  			size = uint64(width) * 8
   304  			mutant = v & ((1 << size) - 1)
   305  		} else {
   306  			width = -iwidth
   307  			size = min(uint64(width)*8, bitsize)
   308  			if v&(1<<(size-1)) == 0 {
   309  				continue
   310  			}
   311  			mutant = v | ^((1 << size) - 1)
   312  		}
   313  		if image {
   314  			// For images we can produce too many mutants for small integers.
   315  			if width < 4 {
   316  				continue
   317  			}
   318  			if mutant == 0 || (mutant|^((1<<size)-1)) == ^uint64(0) {
   319  				continue
   320  			}
   321  		}
   322  		// Use big-endian match/replace for both blobs and ints.
   323  		// Sometimes we have unmarked blobs (no little/big-endian info);
   324  		// for ANYBLOBs we intentionally lose all marking;
   325  		// but even for marked ints we may need this too.
   326  		// Consider that kernel code does not convert the data
   327  		// (i.e. not ntohs(pkt->proto) == ETH_P_BATMAN),
   328  		// but instead converts the constant (i.e. pkt->proto == htons(ETH_P_BATMAN)).
   329  		// In such case we will see dynamic operand that does not match what we have in the program.
   330  		for _, bigendian := range []bool{false, true} {
   331  			if bigendian {
   332  				if width == 1 {
   333  					continue
   334  				}
   335  				mutant = swapInt(mutant, width)
   336  			}
   337  			for newV := range compMap[mutant] {
   338  				// Check the limit for negative numbers.
   339  				if newV > limit && ((^(limit >> 1) & newV) != ^(limit >> 1)) {
   340  					continue
   341  				}
   342  				mask := uint64(1<<size - 1)
   343  				newHi := newV & ^mask
   344  				newV = newV & mask
   345  				if newHi != 0 && newHi^^mask != 0 {
   346  					continue
   347  				}
   348  				if bigendian {
   349  					newV = swapInt(newV, width)
   350  				}
   351  				// We insert special ints (like 0) with high probability,
   352  				// so we don't try to replace to special ints them here.
   353  				// Images are large so it's hard to guess even special
   354  				// ints with random mutations.
   355  				if !image && specialIntsSet[newV] {
   356  					continue
   357  				}
   358  				// Replace size least significant bits of v with
   359  				// corresponding bits of newV. Leave the rest of v as it was.
   360  				replacer := (v &^ mask) | newV
   361  				if replacer == v {
   362  					continue
   363  				}
   364  				replacer = truncateToBitSize(replacer, bitsize)
   365  				// TODO(dvyukov): should we try replacing with arg+/-1?
   366  				// This could trigger some off-by-ones.
   367  				if replacers == nil {
   368  					replacers = make(map[uint64]bool)
   369  				}
   370  				replacers[replacer] = true
   371  			}
   372  		}
   373  	}
   374  	if replacers == nil {
   375  		return nil
   376  	}
   377  	res := make([]uint64, 0, len(replacers))
   378  	for v := range replacers {
   379  		res = append(res, v)
   380  	}
   381  	sort.Slice(res, func(i, j int) bool {
   382  		return res[i] < res[j]
   383  	})
   384  	return res
   385  }
   386  
   387  type HintsLimiter struct {
   388  	mu       sync.Mutex
   389  	attempts map[uint64]int // replacement attempts per PC
   390  }
   391  
   392  // Limit restricts hints to at most N replacement attempts per single kernel PC
   393  // (globally, across all hints mutations for all programs).
   394  // We are getting too many generated candidates, the fuzzer may not keep up
   395  // with them at all (hints jobs keep growing infinitely). If a hint indeed came
   396  // from the input w/o transformation, then we should guess it on the first
   397  // attempt (or at least after few attempts). If it did not come from the input,
   398  // or came with a non-trivial transformation, then any number of attempts won't
   399  // help. So limit the total number of attempts (until the next restart).
   400  func (limiter *HintsLimiter) Limit(comps CompMap) {
   401  	const N = 10
   402  	limiter.mu.Lock()
   403  	defer limiter.mu.Unlock()
   404  	if limiter.attempts == nil {
   405  		limiter.attempts = make(map[uint64]int)
   406  	}
   407  	for op1, ops2 := range comps {
   408  		for op2, pcs := range ops2 {
   409  			for pc := range pcs {
   410  				limiter.attempts[pc]++
   411  				if limiter.attempts[pc] > N {
   412  					delete(pcs, pc)
   413  				}
   414  			}
   415  			if len(pcs) == 0 {
   416  				delete(ops2, op2)
   417  			}
   418  		}
   419  		if len(ops2) == 0 {
   420  			delete(comps, op1)
   421  		}
   422  	}
   423  }
   424  
   425  func init() {
   426  	specialIntsSet = make(map[uint64]bool)
   427  	for _, v := range specialInts {
   428  		specialIntsSet[v] = true
   429  	}
   430  }