github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/bisect/minimize/slice.go (about) 1 // Copyright 2023 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package minimize 5 6 import ( 7 "errors" 8 "fmt" 9 "math" 10 "slices" 11 "strings" 12 ) 13 14 type Config[T any] struct { 15 // The original slice is minimized with respect to this predicate. 16 // If Pred(X) returns true, X is assumed to contain all elements that must stay. 17 Pred func([]T) (bool, error) 18 // MaxSteps is a limit on the number of predicate calls during bisection. 19 // If it's hit, the bisection continues as if Pred() begins to return false. 20 // If it's set to 0 (by default), no limit is applied. 21 MaxSteps int 22 // MaxChunks sets a limit on the number of chunks pursued by the bisection algorithm. 23 // If we hit the limit, bisection is stopped and Array() returns ErrTooManyChunks 24 // anongside the intermediate bisection result (a valid, but not fully minimized slice). 25 MaxChunks int 26 // Logf is used for sharing debugging output. 27 Logf func(string, ...interface{}) 28 } 29 30 // Slice() finds a minimal subsequence of slice elements that still gives Pred() == true. 31 // The algorithm works by sequentially splitting the slice into smaller-size chunks and running 32 // Pred() witout those chunks. Slice() receives the original slice chunks. 33 // The expected number of Pred() runs is O(|result|*log2(|elements|)). 34 func Slice[T any](config Config[T], slice []T) ([]T, error) { 35 if config.Logf == nil { 36 config.Logf = func(string, ...interface{}) {} 37 } 38 ctx := &sliceCtx[T]{ 39 Config: config, 40 chunks: []*arrayChunk[T]{ 41 { 42 elements: slice, 43 }, 44 }, 45 } 46 return ctx.bisect() 47 } 48 49 // SliceWithFixed behaves like Slice, but also allows to designate the elements that 50 // must always remain (the "fixed" ones). 51 func SliceWithFixed[T any](config Config[T], slice []T, fixed func(T) bool) ([]T, error) { 52 var freeIdx, fixedIdx []int 53 for i := 0; i < len(slice); i++ { 54 if fixed(slice[i]) { 55 fixedIdx = append(fixedIdx, i) 56 } else { 57 freeIdx = append(freeIdx, i) 58 } 59 } 60 if len(freeIdx) == 0 { 61 return slice, nil 62 } 63 convert := func(idx []int) []T { 64 ret := make([]T, 0, len(idx)+len(fixedIdx)) 65 idx, fixedIdx := slices.Clone(idx), slices.Clone(fixedIdx) 66 for len(idx)+len(fixedIdx) > 0 { 67 if len(idx) > 0 && (len(fixedIdx) == 0 || 68 len(fixedIdx) > 0 && idx[0] < fixedIdx[0]) { 69 ret = append(ret, slice[idx[0]]) 70 idx = idx[1:] 71 } else { 72 ret = append(ret, slice[fixedIdx[0]]) 73 fixedIdx = fixedIdx[1:] 74 } 75 } 76 return ret 77 } 78 newConfig := Config[int]{ 79 MaxSteps: config.MaxSteps, 80 MaxChunks: config.MaxChunks, 81 Pred: func(idx []int) (bool, error) { 82 return config.Pred(convert(idx)) 83 }, 84 Logf: config.Logf, 85 } 86 result, err := Slice[int](newConfig, freeIdx) 87 if err != nil { 88 return nil, err 89 } 90 return convert(result), nil 91 } 92 93 type sliceCtx[T any] struct { 94 Config[T] 95 chunks []*arrayChunk[T] 96 predRuns int 97 } 98 99 type arrayChunk[T any] struct { 100 elements []T 101 final bool // There's no way to further split this chunk. 102 } 103 104 // ErrTooManyChunks is returned if the number of necessary chunks surpassed MaxChunks. 105 var ErrTooManyChunks = errors.New("the bisection process is following too many necessary chunks") 106 107 func (ctx *sliceCtx[T]) bisect() ([]T, error) { 108 // At first, we don't know if the original chunks are really necessary. 109 err := ctx.splitChunks(false) 110 // Then, keep on splitting the chunks layer by layer until we have identified 111 // all necessary elements. 112 // This way we ensure that we always go from larger to smaller chunks. 113 for err == nil && !ctx.done() { 114 if ctx.MaxChunks > 0 && len(ctx.chunks) > ctx.MaxChunks { 115 err = ErrTooManyChunks 116 break 117 } 118 err = ctx.splitChunks(true) 119 } 120 if err != nil && err != ErrTooManyChunks { 121 return nil, err 122 } 123 return ctx.elements(), err 124 } 125 126 // splitChunks() splits each chunk in two and only leaves the necessary sub-parts. 127 func (ctx *sliceCtx[T]) splitChunks(someNeeded bool) error { 128 ctx.Logf("split chunks (needed=%v): %s", someNeeded, ctx.chunkInfo()) 129 splitInto := 2 130 if !someNeeded && len(ctx.chunks) == 1 { 131 // It's our first iteration. 132 splitInto = ctx.initialSplit(len(ctx.chunks[0].elements)) 133 } 134 var newChunks []*arrayChunk[T] 135 for i, chunk := range ctx.chunks { 136 if chunk.final { 137 newChunks = append(newChunks, chunk) 138 continue 139 } 140 ctx.Logf("split chunk #%d of len %d into %d parts", i, len(chunk.elements), splitInto) 141 chunks := splitChunk[T](chunk.elements, splitInto) 142 if len(chunks) == 1 && someNeeded { 143 ctx.Logf("no way to further split the chunk") 144 chunk.final = true 145 newChunks = append(newChunks, chunk) 146 continue 147 } 148 foundNeeded := false 149 for j := range chunks { 150 ctx.Logf("testing without sub-chunk %d/%d", j+1, len(chunks)) 151 if j < len(chunks)-1 || foundNeeded || !someNeeded { 152 ret, err := ctx.predRun( 153 newChunks, 154 mergeRawChunks(chunks[j+1:]), 155 ctx.chunks[i+1:], 156 ) 157 if err != nil { 158 return err 159 } 160 if ret { 161 ctx.Logf("the chunk can be dropped") 162 continue 163 } 164 } else { 165 ctx.Logf("no need to test this chunk, it's definitely needed") 166 } 167 foundNeeded = true 168 newChunks = append(newChunks, &arrayChunk[T]{ 169 elements: chunks[j], 170 }) 171 } 172 } 173 ctx.chunks = newChunks 174 return nil 175 } 176 177 // Since Pred() runs can be costly, the objective is to get the most out of the 178 // limited number of Pred() calls. 179 // We try to achieve it by splitting the initial array in more than 2 elements. 180 func (ctx *sliceCtx[T]) initialSplit(size int) int { 181 // If the number of steps is small and the number of elements is big, 182 // let's just split the initial array into MaxSteps chunks. 183 // There's no solid reasoning behind the condition below, so feel free to 184 // change it if you have better ideas. 185 if ctx.MaxSteps > 0 && math.Log2(float64(size)) > float64(ctx.MaxSteps) { 186 return ctx.MaxSteps 187 } 188 // Otherwise let's split in 3. 189 return 3 190 } 191 192 // predRun() determines whether (before + mid + after) covers the necessary elements. 193 func (ctx *sliceCtx[T]) predRun(before []*arrayChunk[T], mid []T, after []*arrayChunk[T]) (bool, error) { 194 if ctx.MaxSteps > 0 && ctx.predRuns >= ctx.MaxSteps { 195 ctx.Logf("we have reached the limit on predicate runs (%d); pretend it returns false", 196 ctx.MaxSteps) 197 return false, nil 198 } 199 ctx.predRuns++ 200 return ctx.Pred(mergeChunks(before, mid, after)) 201 } 202 203 // The bisection process is done once every chunk is marked as final. 204 func (ctx *sliceCtx[T]) done() bool { 205 if ctx.MaxSteps > 0 && ctx.predRuns >= ctx.MaxSteps { 206 // No reason to continue. 207 return true 208 } 209 for _, chunk := range ctx.chunks { 210 if !chunk.final { 211 return false 212 } 213 } 214 return true 215 } 216 217 func (ctx *sliceCtx[T]) elements() []T { 218 return mergeChunks(ctx.chunks, nil, nil) 219 } 220 221 func (ctx *sliceCtx[T]) chunkInfo() string { 222 var parts []string 223 for _, chunk := range ctx.chunks { 224 str := "" 225 if chunk.final { 226 str = ", final" 227 } 228 parts = append(parts, fmt.Sprintf("<%d%s>", len(chunk.elements), str)) 229 } 230 return strings.Join(parts, ", ") 231 } 232 233 func mergeChunks[T any](before []*arrayChunk[T], mid []T, after []*arrayChunk[T]) []T { 234 var ret []T 235 for _, chunk := range before { 236 ret = append(ret, chunk.elements...) 237 } 238 ret = append(ret, mid...) 239 for _, chunk := range after { 240 ret = append(ret, chunk.elements...) 241 } 242 return ret 243 } 244 245 func mergeRawChunks[T any](chunks [][]T) []T { 246 var ret []T 247 for _, chunk := range chunks { 248 ret = append(ret, chunk...) 249 } 250 return ret 251 } 252 253 func splitChunk[T any](chunk []T, parts int) [][]T { 254 chunkSize := (len(chunk) + parts - 1) / parts 255 if chunkSize == 0 { 256 chunkSize = 1 257 } 258 var ret [][]T 259 for i := 0; i < len(chunk); i += chunkSize { 260 end := min(i+chunkSize, len(chunk)) 261 ret = append(ret, chunk[i:end]) 262 } 263 return ret 264 }