github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/tools/syz-trace2syz/proggen/proggen.go (about)

     1  // Copyright 2018 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  //go:build !codeanalysis
     5  
     6  package proggen
     7  
     8  import (
     9  	"encoding/binary"
    10  	"fmt"
    11  	"math/rand"
    12  	"os"
    13  
    14  	"github.com/google/syzkaller/pkg/log"
    15  	"github.com/google/syzkaller/prog"
    16  	"github.com/google/syzkaller/tools/syz-trace2syz/parser"
    17  )
    18  
    19  func ParseFile(filename string, target *prog.Target) ([]*prog.Prog, error) {
    20  	data, err := os.ReadFile(filename)
    21  	if err != nil {
    22  		return nil, fmt.Errorf("error reading file: %v", err)
    23  	}
    24  	return ParseData(data, target)
    25  }
    26  
    27  func ParseData(data []byte, target *prog.Target) ([]*prog.Prog, error) {
    28  	tree, err := parser.ParseData(data)
    29  	if err != nil {
    30  		return nil, err
    31  	}
    32  	if tree == nil {
    33  		return nil, nil
    34  	}
    35  	var progs []*prog.Prog
    36  	parseTree(tree, tree.RootPid, target, &progs)
    37  	return progs, nil
    38  }
    39  
    40  // parseTree groups system calls in the trace by process id.
    41  // The tree preserves process hierarchy i.e. parent->[]child
    42  func parseTree(tree *parser.TraceTree, pid int64, target *prog.Target, progs *[]*prog.Prog) {
    43  	log.Logf(2, "parsing trace pid %v", pid)
    44  	if p := genProg(tree.TraceMap[pid], target); p != nil {
    45  		*progs = append(*progs, p)
    46  	}
    47  	for _, childPid := range tree.Ptree[pid] {
    48  		if tree.TraceMap[childPid] != nil {
    49  			parseTree(tree, childPid, target, progs)
    50  		}
    51  	}
    52  }
    53  
    54  // Context stores metadata related to a syzkaller program
    55  type context struct {
    56  	builder           *prog.Builder
    57  	target            *prog.Target
    58  	selectors         []callSelector
    59  	returnCache       returnCache
    60  	currentStraceCall *parser.Syscall
    61  	currentSyzCall    *prog.Call
    62  }
    63  
    64  // genProg converts a trace to one of our programs.
    65  func genProg(trace *parser.Trace, target *prog.Target) *prog.Prog {
    66  	retCache := newRCache()
    67  	ctx := &context{
    68  		builder:     prog.MakeProgGen(target),
    69  		target:      target,
    70  		selectors:   newSelectors(target, retCache),
    71  		returnCache: retCache,
    72  	}
    73  	for _, sCall := range trace.Calls {
    74  		if sCall.Paused {
    75  			// Probably a case where the call was killed by a signal like the following
    76  			// 2179  wait4(2180,  <unfinished ...>
    77  			// 2179  <... wait4 resumed> 0x7fff28981bf8, 0, NULL) = ? ERESTARTSYS
    78  			// 2179  --- SIGUSR1 {si_signo=SIGUSR1, si_code=SI_USER, si_pid=2180, si_uid=0} ---
    79  			continue
    80  		}
    81  		if shouldSkip(sCall) {
    82  			log.Logf(2, "skipping call: %s", sCall.CallName)
    83  			continue
    84  		}
    85  		ctx.currentStraceCall = sCall
    86  		call := ctx.genCall()
    87  		if call == nil {
    88  			continue
    89  		}
    90  		if err := ctx.builder.Append(call); err != nil {
    91  			log.Fatalf("%v", err)
    92  		}
    93  	}
    94  	p, err := ctx.builder.Finalize()
    95  	if err != nil {
    96  		log.Fatalf("error validating program: %v", err)
    97  	}
    98  	return p
    99  }
   100  
   101  func (ctx *context) genCall() *prog.Call {
   102  	log.Logf(3, "parsing call: %s", ctx.currentStraceCall.CallName)
   103  	straceCall := ctx.currentStraceCall
   104  	meta := ctx.Select(straceCall)
   105  	if meta == nil {
   106  		log.Logf(2, "skipping call: %s which has no matching description", ctx.currentStraceCall.CallName)
   107  		return nil
   108  	}
   109  	ctx.currentSyzCall = prog.MakeCall(meta, nil)
   110  	syzCall := ctx.currentSyzCall
   111  
   112  	for i := range syzCall.Meta.Args {
   113  		var strArg parser.IrType
   114  		if i < len(straceCall.Args) {
   115  			strArg = straceCall.Args[i]
   116  		}
   117  		res := ctx.genArg(syzCall.Meta.Args[i].Type, prog.DirIn, strArg)
   118  		syzCall.Args = append(syzCall.Args, res)
   119  	}
   120  	ctx.genResult(syzCall.Meta.Ret, straceCall.Ret)
   121  	return syzCall
   122  }
   123  
   124  func (ctx *context) Select(syscall *parser.Syscall) *prog.Syscall {
   125  	for _, selector := range ctx.selectors {
   126  		if variant := selector.Select(syscall); variant != nil {
   127  			return variant
   128  		}
   129  	}
   130  	return ctx.target.SyscallMap[syscall.CallName]
   131  }
   132  
   133  func (ctx *context) genResult(syzType prog.Type, straceRet int64) {
   134  	if straceRet <= 0 {
   135  		return
   136  	}
   137  	straceExpr := parser.Constant(uint64(straceRet))
   138  	switch syzType.(type) {
   139  	case *prog.ResourceType:
   140  		log.Logf(2, "call: %s returned a resource type with val: %s",
   141  			ctx.currentStraceCall.CallName, straceExpr.String())
   142  		ctx.returnCache.cache(syzType, straceExpr, ctx.currentSyzCall.Ret)
   143  	}
   144  }
   145  
   146  func (ctx *context) genArg(syzType prog.Type, dir prog.Dir, traceArg parser.IrType) prog.Arg {
   147  	if traceArg == nil {
   148  		log.Logf(3, "parsing syzType: %s, traceArg is nil. generating default arg...", syzType.Name())
   149  		return syzType.DefaultArg(dir)
   150  	}
   151  	log.Logf(3, "parsing arg of syz type: %s, ir type: %#v", syzType.Name(), traceArg)
   152  
   153  	if dir == prog.DirOut {
   154  		switch syzType.(type) {
   155  		case *prog.PtrType, *prog.StructType, *prog.ResourceType, *prog.BufferType:
   156  			// Resource Types need special care. Pointers, Structs can have resource fields e.g. pipe, socketpair
   157  			// Buffer may need special care in out direction
   158  		default:
   159  			return syzType.DefaultArg(dir)
   160  		}
   161  	}
   162  
   163  	switch a := syzType.(type) {
   164  	case *prog.IntType, *prog.ConstType, *prog.FlagsType, *prog.CsumType:
   165  		return ctx.genConst(a, dir, traceArg)
   166  	case *prog.LenType:
   167  		return syzType.DefaultArg(dir)
   168  	case *prog.ProcType:
   169  		return ctx.parseProc(a, dir, traceArg)
   170  	case *prog.ResourceType:
   171  		return ctx.genResource(a, dir, traceArg)
   172  	case *prog.PtrType:
   173  		return ctx.genPtr(a, dir, traceArg)
   174  	case *prog.BufferType:
   175  		return ctx.genBuffer(a, dir, traceArg)
   176  	case *prog.StructType:
   177  		return ctx.genStruct(a, dir, traceArg)
   178  	case *prog.ArrayType:
   179  		return ctx.genArray(a, dir, traceArg)
   180  	case *prog.UnionType:
   181  		return ctx.genUnionArg(a, dir, traceArg)
   182  	case *prog.VmaType:
   183  		return ctx.genVma(a, dir, traceArg)
   184  	default:
   185  		log.Fatalf("unsupported type: %#v", syzType)
   186  	}
   187  	return nil
   188  }
   189  
   190  func (ctx *context) genVma(syzType *prog.VmaType, dir prog.Dir, _ parser.IrType) prog.Arg {
   191  	npages := uint64(1)
   192  	if syzType.RangeBegin != 0 || syzType.RangeEnd != 0 {
   193  		npages = syzType.RangeEnd
   194  	}
   195  	return prog.MakeVmaPointerArg(syzType, dir, ctx.builder.AllocateVMA(npages), npages)
   196  }
   197  
   198  func (ctx *context) genArray(syzType *prog.ArrayType, dir prog.Dir, traceType parser.IrType) prog.Arg {
   199  	var args []prog.Arg
   200  	switch a := traceType.(type) {
   201  	case *parser.GroupType:
   202  		for i := 0; i < len(a.Elems); i++ {
   203  			args = append(args, ctx.genArg(syzType.Elem, dir, a.Elems[i]))
   204  		}
   205  	default:
   206  		log.Fatalf("unsupported type for array: %#v", traceType)
   207  	}
   208  	return prog.MakeGroupArg(syzType, dir, args)
   209  }
   210  
   211  func (ctx *context) genStruct(syzType *prog.StructType, dir prog.Dir, traceType parser.IrType) prog.Arg {
   212  	var args []prog.Arg
   213  	switch a := traceType.(type) {
   214  	case *parser.GroupType:
   215  		j := 0
   216  		if ret, recursed := ctx.recurseStructs(syzType, dir, a); recursed {
   217  			return ret
   218  		}
   219  		for i := range syzType.Fields {
   220  			fldDir := syzType.Fields[i].Dir(dir)
   221  			if prog.IsPad(syzType.Fields[i].Type) {
   222  				args = append(args, syzType.Fields[i].DefaultArg(fldDir))
   223  				continue
   224  			}
   225  			// If the last n fields of a struct are zero or NULL, strace will occasionally omit those values
   226  			// this creates a mismatch in the number of elements in the ir type and in
   227  			// our descriptions. We generate default values for omitted fields
   228  			if j >= len(a.Elems) {
   229  				args = append(args, syzType.Fields[i].DefaultArg(fldDir))
   230  			} else {
   231  				args = append(args, ctx.genArg(syzType.Fields[i].Type, fldDir, a.Elems[j]))
   232  			}
   233  			j++
   234  		}
   235  	case *parser.BufferType:
   236  		// We could have a case like the following:
   237  		// ioctl(3, 35111, {ifr_name="\x6c\x6f", ifr_hwaddr=00:00:00:00:00:00}) = 0
   238  		// if_hwaddr gets parsed as a BufferType but our syscall descriptions have it as a struct type
   239  		return syzType.DefaultArg(dir)
   240  	default:
   241  		log.Fatalf("unsupported type for struct: %#v", a)
   242  	}
   243  	return prog.MakeGroupArg(syzType, dir, args)
   244  }
   245  
   246  // recurseStructs handles cases where syzType corresponds to struct descriptions like
   247  //
   248  //	sockaddr_storage_in6 {
   249  //	       addr    sockaddr_in6
   250  //	} [size[SOCKADDR_STORAGE_SIZE], align_ptr]
   251  //
   252  // which need to be recursively generated. It returns true if we needed to recurse
   253  // along with the generated argument and false otherwise.
   254  func (ctx *context) recurseStructs(syzType *prog.StructType, dir prog.Dir, traceType *parser.GroupType) (prog.Arg, bool) {
   255  	// only consider structs with one non-padded field
   256  	numFields := 0
   257  	for _, field := range syzType.Fields {
   258  		if prog.IsPad(field.Type) {
   259  			continue
   260  		}
   261  		numFields++
   262  	}
   263  	if numFields != 1 {
   264  		return nil, false
   265  	}
   266  	// the strace group type needs to have more one field (a mismatch)
   267  	if len(traceType.Elems) == 1 {
   268  		return nil, false
   269  	}
   270  	// first field needs to be a struct
   271  	switch t := syzType.Fields[0].Type.(type) {
   272  	case *prog.StructType:
   273  		var args []prog.Arg
   274  		// first element and traceType should have the same number of elements
   275  		if len(t.Fields) != len(traceType.Elems) {
   276  			return nil, false
   277  		}
   278  		args = append(args, ctx.genStruct(t, dir, traceType))
   279  		for _, field := range syzType.Fields[1:] {
   280  			args = append(args, field.DefaultArg(field.Dir(dir)))
   281  		}
   282  		return prog.MakeGroupArg(syzType, dir, args), true
   283  	}
   284  	return nil, false
   285  }
   286  
   287  func (ctx *context) genUnionArg(syzType *prog.UnionType, dir prog.Dir, straceType parser.IrType) prog.Arg {
   288  	if straceType == nil {
   289  		log.Logf(1, "generating union arg. straceType is nil")
   290  		return syzType.DefaultArg(dir)
   291  	}
   292  	log.Logf(4, "generating union arg: %s %#v", syzType.TypeName, straceType)
   293  
   294  	// Unions are super annoying because they sometimes need to be handled case by case
   295  	// We might need to lookinto a matching algorithm to identify the union type that most closely
   296  	// matches our strace type.
   297  
   298  	switch syzType.TypeName {
   299  	case "sockaddr_storage":
   300  		return ctx.genSockaddrStorage(syzType, dir, straceType)
   301  	case "sockaddr_nl":
   302  		return ctx.genSockaddrNetlink(syzType, dir, straceType)
   303  	case "ifr_ifru":
   304  		return ctx.genIfrIfru(syzType, dir, straceType)
   305  	}
   306  	return prog.MakeUnionArg(syzType, dir, ctx.genArg(syzType.Fields[0].Type, syzType.Fields[0].Dir(dir), straceType), 0)
   307  }
   308  
   309  func (ctx *context) genBuffer(syzType *prog.BufferType, dir prog.Dir, traceType parser.IrType) prog.Arg {
   310  	if dir == prog.DirOut {
   311  		if !syzType.Varlen() {
   312  			return prog.MakeOutDataArg(syzType, dir, syzType.Size())
   313  		}
   314  		switch a := traceType.(type) {
   315  		case *parser.BufferType:
   316  			return prog.MakeOutDataArg(syzType, dir, uint64(len(a.Val)))
   317  		default:
   318  			switch syzType.Kind {
   319  			case prog.BufferBlobRand:
   320  				size := rand.Intn(256)
   321  				return prog.MakeOutDataArg(syzType, dir, uint64(size))
   322  
   323  			case prog.BufferBlobRange:
   324  				max := rand.Intn(int(syzType.RangeEnd) - int(syzType.RangeBegin) + 1)
   325  				size := max + int(syzType.RangeBegin)
   326  				return prog.MakeOutDataArg(syzType, dir, uint64(size))
   327  			default:
   328  				log.Fatalf("unexpected buffer type kind: %v. call %v arg %#v", syzType.Kind, ctx.currentSyzCall, traceType)
   329  			}
   330  		}
   331  	}
   332  	var bufVal []byte
   333  	switch a := traceType.(type) {
   334  	case *parser.BufferType:
   335  		bufVal = []byte(a.Val)
   336  	case parser.Constant:
   337  		val := a.Val()
   338  		bArr := make([]byte, 8)
   339  		binary.LittleEndian.PutUint64(bArr, val)
   340  		bufVal = bArr
   341  	default:
   342  		log.Fatalf("unsupported type for buffer: %#v", traceType)
   343  	}
   344  	// strace always drops the null byte for buffer types but we only need to add it back for filenames and strings
   345  	switch syzType.Kind {
   346  	case prog.BufferFilename, prog.BufferString:
   347  		bufVal = append(bufVal, '\x00')
   348  	}
   349  	if !syzType.Varlen() {
   350  		size := syzType.Size()
   351  		for uint64(len(bufVal)) < size {
   352  			bufVal = append(bufVal, 0)
   353  		}
   354  		bufVal = bufVal[:size]
   355  	}
   356  	return prog.MakeDataArg(syzType, dir, bufVal)
   357  }
   358  
   359  func (ctx *context) genPtr(syzType *prog.PtrType, dir prog.Dir, traceType parser.IrType) prog.Arg {
   360  	switch a := traceType.(type) {
   361  	case parser.Constant:
   362  		if a.Val() == 0 {
   363  			return prog.MakeSpecialPointerArg(syzType, dir, 0)
   364  		}
   365  		// Likely have a type of the form bind(3, 0xfffffffff, [3]);
   366  		res := syzType.Elem.DefaultArg(syzType.ElemDir)
   367  		return ctx.addr(syzType, dir, res.Size(), res)
   368  	default:
   369  		res := ctx.genArg(syzType.Elem, syzType.ElemDir, a)
   370  		return ctx.addr(syzType, dir, res.Size(), res)
   371  	}
   372  }
   373  
   374  func (ctx *context) genConst(syzType prog.Type, dir prog.Dir, traceType parser.IrType) prog.Arg {
   375  	switch a := traceType.(type) {
   376  	case parser.Constant:
   377  		return prog.MakeConstArg(syzType, dir, a.Val())
   378  	case *parser.GroupType:
   379  		// Sometimes strace represents a pointer to int as [0] which gets parsed
   380  		// as Array([0], len=1). A good example is ioctl(3, FIONBIO, [1]). We may also have an union int type that
   381  		// is a represented as a struct in strace e.g.
   382  		// sigev_value={sival_int=-2123636944, sival_ptr=0x7ffd816bdf30}
   383  		// For now we choose the first option
   384  		if len(a.Elems) == 0 {
   385  			log.Logf(2, "parsing const type, got array type with len 0")
   386  			return syzType.DefaultArg(dir)
   387  		}
   388  		return ctx.genConst(syzType, dir, a.Elems[0])
   389  	case *parser.BufferType:
   390  		// strace decodes some arguments as hex strings because those values are network ordered
   391  		// e.g. sin_port or sin_addr fields of sockaddr_in.
   392  		// network order is big endian byte order so if the len of byte array is 1, 2, 4, or 8 then
   393  		// it is a good chance that we are decoding one of those fields. If it isn't, then most likely
   394  		// we have an error i.e. a sockaddr_un struct passed to a connect call with an inet file descriptor
   395  		var val uint64
   396  		toUint64 := binary.LittleEndian.Uint64
   397  		toUint32 := binary.LittleEndian.Uint32
   398  		toUint16 := binary.LittleEndian.Uint16
   399  		if syzType.Format() == prog.FormatBigEndian {
   400  			toUint64 = binary.BigEndian.Uint64
   401  			toUint32 = binary.BigEndian.Uint32
   402  			toUint16 = binary.BigEndian.Uint16
   403  		}
   404  		switch len(a.Val) {
   405  		case 8:
   406  			val = toUint64([]byte(a.Val))
   407  		case 4:
   408  			val = uint64(toUint32([]byte(a.Val)))
   409  		case 2:
   410  			val = uint64(toUint16([]byte(a.Val)))
   411  		case 1:
   412  			val = uint64(a.Val[0])
   413  		default:
   414  			return syzType.DefaultArg(dir)
   415  		}
   416  		return prog.MakeConstArg(syzType, dir, val)
   417  	default:
   418  		log.Fatalf("unsupported type for const: %#v", traceType)
   419  	}
   420  	return nil
   421  }
   422  
   423  func (ctx *context) genResource(syzType *prog.ResourceType, dir prog.Dir, traceType parser.IrType) prog.Arg {
   424  	if dir == prog.DirOut {
   425  		log.Logf(2, "resource returned by call argument: %s", traceType.String())
   426  		res := prog.MakeResultArg(syzType, dir, nil, syzType.Default())
   427  		ctx.returnCache.cache(syzType, traceType, res)
   428  		return res
   429  	}
   430  	switch a := traceType.(type) {
   431  	case parser.Constant:
   432  		val := a.Val()
   433  		if arg := ctx.returnCache.get(syzType, traceType); arg != nil {
   434  			res := prog.MakeResultArg(syzType, dir, arg.(*prog.ResultArg), syzType.Default())
   435  			return res
   436  		}
   437  		res := prog.MakeResultArg(syzType, dir, nil, val)
   438  		return res
   439  	case *parser.GroupType:
   440  		if len(a.Elems) == 1 {
   441  			// For example: 5028  ioctl(3, SIOCSPGRP, [0])          = 0
   442  			// last argument is a pointer to a resource. Strace will output a pointer to
   443  			// a number x as [x].
   444  			res := prog.MakeResultArg(syzType, dir, nil, syzType.Default())
   445  			ctx.returnCache.cache(syzType, a.Elems[0], res)
   446  			return res
   447  		}
   448  		log.Fatalf("generating resource type from GroupType with %d elements", len(a.Elems))
   449  	default:
   450  		log.Fatalf("unsupported type for resource: %#v", traceType)
   451  	}
   452  	return nil
   453  }
   454  
   455  func (ctx *context) parseProc(syzType *prog.ProcType, dir prog.Dir, traceType parser.IrType) prog.Arg {
   456  	switch a := traceType.(type) {
   457  	case parser.Constant:
   458  		val := a.Val()
   459  		if val >= syzType.ValuesPerProc {
   460  			return prog.MakeConstArg(syzType, dir, syzType.ValuesPerProc-1)
   461  		}
   462  		return prog.MakeConstArg(syzType, dir, val)
   463  	case *parser.BufferType:
   464  		// Again probably an error case
   465  		// Something like the following will trigger this
   466  		// bind(3, {sa_family=AF_INET, sa_data="\xac"}, 3) = -1 EINVAL(Invalid argument)
   467  		return syzType.DefaultArg(dir)
   468  	default:
   469  		log.Fatalf("unsupported type for proc: %#v", traceType)
   470  	}
   471  	return nil
   472  }
   473  
   474  func (ctx *context) addr(syzType prog.Type, dir prog.Dir, size uint64, data prog.Arg) prog.Arg {
   475  	return prog.MakePointerArg(syzType, dir, ctx.builder.Allocate(size, data.Type().Alignment()), data)
   476  }
   477  
   478  func shouldSkip(c *parser.Syscall) bool {
   479  	switch c.CallName {
   480  	case "write":
   481  		// We skip all writes to stdout and stderr because they can corrupt our crash summary.
   482  		// Also there will be nothing on stdin, so any reads will hang.
   483  		switch a := c.Args[0].(type) {
   484  		case parser.Constant:
   485  			if a.Val() <= 2 {
   486  				return true
   487  			}
   488  		}
   489  	}
   490  	return unsupportedCalls[c.CallName]
   491  }