github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/prog/rotation.go (about)

     1  // Copyright 2019 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package prog
     5  
     6  import (
     7  	"math/rand"
     8  	"sort"
     9  )
    10  
    11  // Rotator selects a random subset of syscalls for corpus rotation.
    12  type Rotator struct {
    13  	target        *Target
    14  	calls         map[*Syscall]bool
    15  	rnd           *rand.Rand
    16  	resourceless  []*Syscall
    17  	resources     map[*ResourceDesc]rotatorResource
    18  	goal          int
    19  	nresourceless int
    20  }
    21  
    22  type rotatorResource struct {
    23  	// 0 - precise ctors that don't require other resources as inputs (e.g. socket).
    24  	// 1 - precise ctors that require other resources (e.g. accept).
    25  	// 2 - all imprecise ctors.
    26  	ctors [3][]*Syscall
    27  	// 0 - precise uses of this resource.
    28  	// 1 - uses of parent resources (e.g. close for sock).
    29  	uses [2][]*Syscall
    30  }
    31  
    32  func MakeRotator(target *Target, calls map[*Syscall]bool, rnd *rand.Rand) *Rotator {
    33  	r := &Rotator{
    34  		target:    target,
    35  		calls:     calls,
    36  		rnd:       rnd,
    37  		resources: make(map[*ResourceDesc]rotatorResource),
    38  	}
    39  	var sorted []*Syscall
    40  	for call := range calls {
    41  		sorted = append(sorted, call)
    42  	}
    43  	sort.Slice(sorted, func(i, j int) bool {
    44  		return sorted[i].Name < sorted[j].Name
    45  	})
    46  	for _, call := range sorted {
    47  		var inputs []*ResourceDesc
    48  		for _, res := range call.inputResources {
    49  			// Don't take into account pid/uid/etc, they create too many links.
    50  			if !target.AuxResources[res.Name] {
    51  				inputs = append(inputs, res)
    52  			}
    53  		}
    54  		// VMAs and filenames are effectively resources for our purposes
    55  		// (but they don't have ctors).
    56  		ForeachCallType(call, func(t Type, _ *TypeCtx) {
    57  			switch a := t.(type) {
    58  			case *BufferType:
    59  				switch a.Kind {
    60  				case BufferFilename:
    61  					inputs = append(inputs, filenameRes)
    62  				}
    63  			case *VmaType:
    64  				inputs = append(inputs, vmaRes)
    65  			}
    66  		})
    67  
    68  		inputDedup := make(map[string]bool, len(inputs))
    69  		for _, res := range inputs {
    70  			if inputDedup[res.Name] {
    71  				continue
    72  			}
    73  			inputDedup[res.Name] = true
    74  			info := r.resources[res]
    75  			info.uses[0] = append(info.uses[0], call)
    76  			r.resources[res] = info
    77  
    78  			for _, kind := range res.Kind[:len(res.Kind)-1] {
    79  				parent := target.resourceMap[kind]
    80  				info := r.resources[parent]
    81  				info.uses[1] = append(info.uses[1], call)
    82  				r.resources[parent] = info
    83  			}
    84  		}
    85  		outputDedup := make(map[string]bool, len(call.createsResources))
    86  		for _, res := range call.createsResources {
    87  			if outputDedup[res.Name] {
    88  				continue
    89  			}
    90  			outputDedup[res.Name] = true
    91  			info := r.resources[res]
    92  			class := 0
    93  			if len(inputs) != 0 {
    94  				class = 1
    95  			}
    96  			info.ctors[class] = append(info.ctors[class], call)
    97  			r.resources[res] = info
    98  			for _, kind := range res.Kind[:len(res.Kind)-1] {
    99  				parent := target.resourceMap[kind]
   100  				info := r.resources[parent]
   101  				info.ctors[2] = append(info.ctors[2], call)
   102  				r.resources[parent] = info
   103  			}
   104  		}
   105  		if len(inputs)+len(call.createsResources) == 0 {
   106  			r.resourceless = append(r.resourceless, call)
   107  		}
   108  	}
   109  	// For smaller syscall sets we drop ~5% of syscalls.
   110  	// However, we assume that 200 syscalls is enough for a fuzzing session,
   111  	// so we cap at that level to make fuzzing more targeted.
   112  	r.goal = len(calls) * 19 / 20
   113  	r.goal = max(r.goal, 1)
   114  	r.goal = min(r.goal, 200)
   115  	// How many syscalls that don't use any resources we want to add?
   116  	r.nresourceless = max(1, r.goal*len(r.resourceless)/len(calls))
   117  	return r
   118  }
   119  
   120  func (r *Rotator) Select() map[*Syscall]bool {
   121  	rs := rotatorState{
   122  		Rotator: r,
   123  		calls:   make(map[*Syscall]bool, 3*r.goal),
   124  	}
   125  	return rs.Select()
   126  }
   127  
   128  type rotatorState struct {
   129  	*Rotator
   130  	calls      map[*Syscall]bool
   131  	topQueue   []*ResourceDesc
   132  	depQueue   []*ResourceDesc
   133  	topHandled map[*ResourceDesc]bool
   134  	depHandled map[*ResourceDesc]bool
   135  }
   136  
   137  func (rs *rotatorState) Select() map[*Syscall]bool {
   138  	// The algorithm is centered around resources.
   139  	// But first we add some syscalls that don't use any resources at all
   140  	// Otherwise we will never add them in the loop.
   141  	// Then, we select a resource and add some ctors for this resources
   142  	// and some calls that use it. That's handled by topQueue.
   143  	// If any of the calls require other resources as inputs, we also add
   144  	// some ctors for these resources, but don't add calls that use them.
   145  	// That's handled by depQueue.
   146  	// However, a resource can be handled as dependency first, but then
   147  	// handled as top resource again. In such case we will still add calls
   148  	// that use this resource.
   149  	if len(rs.resources) == 0 {
   150  		return rs.Rotator.calls
   151  	}
   152  	for {
   153  		if len(rs.depQueue) == 0 && len(rs.calls) >= rs.goal || len(rs.calls) >= 2*rs.goal {
   154  			rs.calls, _ = rs.target.transitivelyEnabled(rs.calls)
   155  			if len(rs.calls) >= rs.goal {
   156  				return rs.calls
   157  			}
   158  		}
   159  		if len(rs.depQueue) != 0 {
   160  			// Handle a dependent resource, add only ctors for these.
   161  			// Pick a random one, this gives a mix of DFS and BFS.
   162  			idx := rs.rnd.Intn(len(rs.depQueue))
   163  			res := rs.depQueue[idx]
   164  			rs.depQueue[idx] = rs.depQueue[len(rs.depQueue)-1]
   165  			rs.depQueue = rs.depQueue[:len(rs.depQueue)-1]
   166  			info := rs.resources[res]
   167  			nctors0 := len(info.ctors[0]) != 0
   168  			nctors1 := nctors0 || len(info.ctors[1]) != 0
   169  			rs.selectCalls(info.ctors[0], 2, true)
   170  			if nctors0 {
   171  				continue
   172  			}
   173  			rs.selectCalls(info.ctors[1], 2, true)
   174  			if nctors1 {
   175  				continue
   176  			}
   177  			rs.selectCalls(info.ctors[2], 2, true)
   178  			continue
   179  		}
   180  		if len(rs.topQueue) == 0 {
   181  			// We either just started selection or we handled all resources,
   182  			// but did not gather enough syscalls. In both cases we need
   183  			// to reset all queues.
   184  			rs.topQueue = make([]*ResourceDesc, 0, len(rs.resources))
   185  			rs.depQueue = make([]*ResourceDesc, 0, len(rs.resources))
   186  			rs.topHandled = make(map[*ResourceDesc]bool, len(rs.resources))
   187  			rs.depHandled = make(map[*ResourceDesc]bool, len(rs.resources))
   188  			for res := range rs.resources {
   189  				rs.topQueue = append(rs.topQueue, res)
   190  			}
   191  			sort.Slice(rs.topQueue, func(i, j int) bool {
   192  				return rs.topQueue[i].Name < rs.topQueue[j].Name
   193  			})
   194  			rs.rnd.Shuffle(len(rs.topQueue), func(i, j int) {
   195  				rs.topQueue[i], rs.topQueue[j] = rs.topQueue[j], rs.topQueue[i]
   196  			})
   197  			rs.selectCalls(rs.resourceless, rs.nresourceless+1, false)
   198  		}
   199  		// Handle a top resource, add more syscalls for these.
   200  		res := rs.topQueue[0]
   201  		rs.topQueue = rs.topQueue[1:]
   202  		if rs.topHandled[res] {
   203  			panic("top queue already handled")
   204  		}
   205  		rs.topHandled[res] = true
   206  		info := rs.resources[res]
   207  		nctors0 := len(info.ctors[0]) != 0
   208  		nctors1 := nctors0 || len(info.ctors[1]) != 0
   209  		rs.selectCalls(info.ctors[0], 5, true)
   210  		rs.selectCalls(info.ctors[1], 3, !nctors0)
   211  		rs.selectCalls(info.ctors[2], 2, !nctors1)
   212  		rs.selectCalls(info.uses[0], 20, true)
   213  		rs.selectCalls(info.uses[1], 2, len(info.uses[0]) == 0)
   214  	}
   215  }
   216  
   217  func (rs *rotatorState) addCall(call *Syscall) {
   218  	if rs.calls[call] {
   219  		return
   220  	}
   221  	rs.calls[call] = true
   222  	for _, res := range call.usesResources {
   223  		if rs.topHandled[res] || rs.depHandled[res] {
   224  			continue
   225  		}
   226  		rs.depHandled[res] = true
   227  		rs.depQueue = append(rs.depQueue, res)
   228  	}
   229  }
   230  
   231  func (rs *rotatorState) selectCalls(set []*Syscall, probability int, force bool) {
   232  	if !force && probability < 2 {
   233  		panic("will never select anything")
   234  	}
   235  	for ; len(set) != 0 && (force || rs.rnd.Intn(probability) != 0); force = false {
   236  		call := set[rs.rnd.Intn(len(set))]
   237  		rs.addCall(call)
   238  	}
   239  }