github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/prog/rotation.go (about)

     1  // Copyright 2019 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package prog
     5  
     6  import (
     7  	"math/rand"
     8  	"sort"
     9  )
    10  
    11  // Rotator selects a random subset of syscalls for corpus rotation.
    12  type Rotator struct {
    13  	target        *Target
    14  	calls         map[*Syscall]bool
    15  	rnd           *rand.Rand
    16  	resourceless  []*Syscall
    17  	resources     map[*ResourceDesc]rotatorResource
    18  	goal          int
    19  	nresourceless int
    20  }
    21  
    22  type rotatorResource struct {
    23  	// 0 - precise ctors that don't require other resources as inputs (e.g. socket).
    24  	// 1 - precise ctors that require other resources (e.g. accept).
    25  	// 2 - all imprecise ctors.
    26  	ctors [3][]*Syscall
    27  	// 0 - precise uses of this resource.
    28  	// 1 - uses of parent resources (e.g. close for sock).
    29  	uses [2][]*Syscall
    30  }
    31  
    32  func MakeRotator(target *Target, calls map[*Syscall]bool, rnd *rand.Rand) *Rotator {
    33  	r := &Rotator{
    34  		target:    target,
    35  		calls:     calls,
    36  		rnd:       rnd,
    37  		resources: make(map[*ResourceDesc]rotatorResource),
    38  	}
    39  	var sorted []*Syscall
    40  	for call := range calls {
    41  		sorted = append(sorted, call)
    42  	}
    43  	sort.Slice(sorted, func(i, j int) bool {
    44  		return sorted[i].Name < sorted[j].Name
    45  	})
    46  	for _, call := range sorted {
    47  		var inputs []*ResourceDesc
    48  		for _, res := range call.inputResources {
    49  			// Don't take into account pid/uid/etc, they create too many links.
    50  			if !target.AuxResources[res.Name] {
    51  				inputs = append(inputs, res)
    52  			}
    53  		}
    54  		// VMAs and filenames are effectively resources for our purposes
    55  		// (but they don't have ctors).
    56  		ForeachCallType(call, func(t Type, _ *TypeCtx) {
    57  			switch a := t.(type) {
    58  			case *BufferType:
    59  				switch a.Kind {
    60  				case BufferFilename:
    61  					inputs = append(inputs, filenameRes)
    62  				}
    63  			case *VmaType:
    64  				inputs = append(inputs, vmaRes)
    65  			}
    66  		})
    67  
    68  		inputDedup := make(map[string]bool, len(inputs))
    69  		for _, res := range inputs {
    70  			if inputDedup[res.Name] {
    71  				continue
    72  			}
    73  			inputDedup[res.Name] = true
    74  			info := r.resources[res]
    75  			info.uses[0] = append(info.uses[0], call)
    76  			r.resources[res] = info
    77  
    78  			for _, kind := range res.Kind[:len(res.Kind)-1] {
    79  				parent := target.resourceMap[kind]
    80  				info := r.resources[parent]
    81  				info.uses[1] = append(info.uses[1], call)
    82  				r.resources[parent] = info
    83  			}
    84  		}
    85  		outputDedup := make(map[string]bool, len(call.createsResources))
    86  		for _, res := range call.createsResources {
    87  			if outputDedup[res.Name] {
    88  				continue
    89  			}
    90  			outputDedup[res.Name] = true
    91  			info := r.resources[res]
    92  			class := 0
    93  			if len(inputs) != 0 {
    94  				class = 1
    95  			}
    96  			info.ctors[class] = append(info.ctors[class], call)
    97  			r.resources[res] = info
    98  			for _, kind := range res.Kind[:len(res.Kind)-1] {
    99  				parent := target.resourceMap[kind]
   100  				info := r.resources[parent]
   101  				info.ctors[2] = append(info.ctors[2], call)
   102  				r.resources[parent] = info
   103  			}
   104  		}
   105  		if len(inputs)+len(call.createsResources) == 0 {
   106  			r.resourceless = append(r.resourceless, call)
   107  		}
   108  	}
   109  	// For smaller syscall sets we drop ~5% of syscalls.
   110  	// However, we assume that 200 syscalls is enough for a fuzzing session,
   111  	// so we cap at that level to make fuzzing more targeted.
   112  	r.goal = len(calls) * 19 / 20
   113  	if r.goal < 1 {
   114  		r.goal = 1
   115  	}
   116  	if max := 200; r.goal > max {
   117  		r.goal = max
   118  	}
   119  	// How many syscalls that don't use any resources we want to add?
   120  	r.nresourceless = r.goal * len(r.resourceless) / len(calls)
   121  	if r.nresourceless < 1 {
   122  		r.nresourceless = 1
   123  	}
   124  	return r
   125  }
   126  
   127  func (r *Rotator) Select() map[*Syscall]bool {
   128  	rs := rotatorState{
   129  		Rotator: r,
   130  		calls:   make(map[*Syscall]bool, 3*r.goal),
   131  	}
   132  	return rs.Select()
   133  }
   134  
   135  type rotatorState struct {
   136  	*Rotator
   137  	calls      map[*Syscall]bool
   138  	topQueue   []*ResourceDesc
   139  	depQueue   []*ResourceDesc
   140  	topHandled map[*ResourceDesc]bool
   141  	depHandled map[*ResourceDesc]bool
   142  }
   143  
   144  func (rs *rotatorState) Select() map[*Syscall]bool {
   145  	// The algorithm is centered around resources.
   146  	// But first we add some syscalls that don't use any resources at all
   147  	// Otherwise we will never add them in the loop.
   148  	// Then, we select a resource and add some ctors for this resources
   149  	// and some calls that use it. That's handled by topQueue.
   150  	// If any of the calls require other resources as inputs, we also add
   151  	// some ctors for these resources, but don't add calls that use them.
   152  	// That's handled by depQueue.
   153  	// However, a resource can be handled as dependency first, but then
   154  	// handled as top resource again. In such case we will still add calls
   155  	// that use this resource.
   156  	if len(rs.resources) == 0 {
   157  		return rs.Rotator.calls
   158  	}
   159  	for {
   160  		if len(rs.depQueue) == 0 && len(rs.calls) >= rs.goal || len(rs.calls) >= 2*rs.goal {
   161  			rs.calls, _ = rs.target.transitivelyEnabled(rs.calls)
   162  			if len(rs.calls) >= rs.goal {
   163  				return rs.calls
   164  			}
   165  		}
   166  		if len(rs.depQueue) != 0 {
   167  			// Handle a dependent resource, add only ctors for these.
   168  			// Pick a random one, this gives a mix of DFS and BFS.
   169  			idx := rs.rnd.Intn(len(rs.depQueue))
   170  			res := rs.depQueue[idx]
   171  			rs.depQueue[idx] = rs.depQueue[len(rs.depQueue)-1]
   172  			rs.depQueue = rs.depQueue[:len(rs.depQueue)-1]
   173  			info := rs.resources[res]
   174  			nctors0 := len(info.ctors[0]) != 0
   175  			nctors1 := nctors0 || len(info.ctors[1]) != 0
   176  			rs.selectCalls(info.ctors[0], 2, true)
   177  			if nctors0 {
   178  				continue
   179  			}
   180  			rs.selectCalls(info.ctors[1], 2, true)
   181  			if nctors1 {
   182  				continue
   183  			}
   184  			rs.selectCalls(info.ctors[2], 2, true)
   185  			continue
   186  		}
   187  		if len(rs.topQueue) == 0 {
   188  			// We either just started selection or we handled all resources,
   189  			// but did not gather enough syscalls. In both cases we need
   190  			// to reset all queues.
   191  			rs.topQueue = make([]*ResourceDesc, 0, len(rs.resources))
   192  			rs.depQueue = make([]*ResourceDesc, 0, len(rs.resources))
   193  			rs.topHandled = make(map[*ResourceDesc]bool, len(rs.resources))
   194  			rs.depHandled = make(map[*ResourceDesc]bool, len(rs.resources))
   195  			for res := range rs.resources {
   196  				rs.topQueue = append(rs.topQueue, res)
   197  			}
   198  			sort.Slice(rs.topQueue, func(i, j int) bool {
   199  				return rs.topQueue[i].Name < rs.topQueue[j].Name
   200  			})
   201  			rs.rnd.Shuffle(len(rs.topQueue), func(i, j int) {
   202  				rs.topQueue[i], rs.topQueue[j] = rs.topQueue[j], rs.topQueue[i]
   203  			})
   204  			rs.selectCalls(rs.resourceless, rs.nresourceless+1, false)
   205  		}
   206  		// Handle a top resource, add more syscalls for these.
   207  		res := rs.topQueue[0]
   208  		rs.topQueue = rs.topQueue[1:]
   209  		if rs.topHandled[res] {
   210  			panic("top queue already handled")
   211  		}
   212  		rs.topHandled[res] = true
   213  		info := rs.resources[res]
   214  		nctors0 := len(info.ctors[0]) != 0
   215  		nctors1 := nctors0 || len(info.ctors[1]) != 0
   216  		rs.selectCalls(info.ctors[0], 5, true)
   217  		rs.selectCalls(info.ctors[1], 3, !nctors0)
   218  		rs.selectCalls(info.ctors[2], 2, !nctors1)
   219  		rs.selectCalls(info.uses[0], 20, true)
   220  		rs.selectCalls(info.uses[1], 2, len(info.uses[0]) == 0)
   221  	}
   222  }
   223  
   224  func (rs *rotatorState) addCall(call *Syscall) {
   225  	if rs.calls[call] {
   226  		return
   227  	}
   228  	rs.calls[call] = true
   229  	for _, res := range call.usesResources {
   230  		if rs.topHandled[res] || rs.depHandled[res] {
   231  			continue
   232  		}
   233  		rs.depHandled[res] = true
   234  		rs.depQueue = append(rs.depQueue, res)
   235  	}
   236  }
   237  
   238  func (rs *rotatorState) selectCalls(set []*Syscall, probability int, force bool) {
   239  	if !force && probability < 2 {
   240  		panic("will never select anything")
   241  	}
   242  	for ; len(set) != 0 && (force || rs.rnd.Intn(probability) != 0); force = false {
   243  		call := set[rs.rnd.Intn(len(set))]
   244  		rs.addCall(call)
   245  	}
   246  }