github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/bisect/minimize/slice.go (about)

     1  // Copyright 2023 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package minimize
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"math"
    10  	"slices"
    11  	"strings"
    12  )
    13  
    14  type Config[T any] struct {
    15  	// The original slice is minimized with respect to this predicate.
    16  	// If Pred(X) returns true, X is assumed to contain all elements that must stay.
    17  	Pred func([]T) (bool, error)
    18  	// MaxSteps is a limit on the number of predicate calls during bisection.
    19  	// If it's hit, the bisection continues as if Pred() begins to return false.
    20  	// If it's set to 0 (by default), no limit is applied.
    21  	MaxSteps int
    22  	// MaxChunks sets a limit on the number of chunks pursued by the bisection algorithm.
    23  	// If we hit the limit, bisection is stopped and Array() returns ErrTooManyChunks
    24  	// anongside the intermediate bisection result (a valid, but not fully minimized slice).
    25  	MaxChunks int
    26  	// Logf is used for sharing debugging output.
    27  	Logf func(string, ...interface{})
    28  }
    29  
    30  // Slice() finds a minimal subsequence of slice elements that still gives Pred() == true.
    31  // The algorithm works by sequentially splitting the slice into smaller-size chunks and running
    32  // Pred() witout those chunks. Slice() receives the original slice chunks.
    33  // The expected number of Pred() runs is O(|result|*log2(|elements|)).
    34  func Slice[T any](config Config[T], slice []T) ([]T, error) {
    35  	if config.Logf == nil {
    36  		config.Logf = func(string, ...interface{}) {}
    37  	}
    38  	ctx := &sliceCtx[T]{
    39  		Config: config,
    40  		chunks: []*arrayChunk[T]{
    41  			{
    42  				elements: slice,
    43  			},
    44  		},
    45  	}
    46  	return ctx.bisect()
    47  }
    48  
    49  // SliceWithFixed behaves like Slice, but also allows to designate the elements that
    50  // must always remain (the "fixed" ones).
    51  func SliceWithFixed[T any](config Config[T], slice []T, fixed func(T) bool) ([]T, error) {
    52  	var freeIdx, fixedIdx []int
    53  	for i := 0; i < len(slice); i++ {
    54  		if fixed(slice[i]) {
    55  			fixedIdx = append(fixedIdx, i)
    56  		} else {
    57  			freeIdx = append(freeIdx, i)
    58  		}
    59  	}
    60  	if len(freeIdx) == 0 {
    61  		return slice, nil
    62  	}
    63  	convert := func(idx []int) []T {
    64  		ret := make([]T, 0, len(idx)+len(fixedIdx))
    65  		idx, fixedIdx := slices.Clone(idx), slices.Clone(fixedIdx)
    66  		for len(idx)+len(fixedIdx) > 0 {
    67  			if len(idx) > 0 && (len(fixedIdx) == 0 ||
    68  				len(fixedIdx) > 0 && idx[0] < fixedIdx[0]) {
    69  				ret = append(ret, slice[idx[0]])
    70  				idx = idx[1:]
    71  			} else {
    72  				ret = append(ret, slice[fixedIdx[0]])
    73  				fixedIdx = fixedIdx[1:]
    74  			}
    75  		}
    76  		return ret
    77  	}
    78  	newConfig := Config[int]{
    79  		MaxSteps:  config.MaxSteps,
    80  		MaxChunks: config.MaxChunks,
    81  		Pred: func(idx []int) (bool, error) {
    82  			return config.Pred(convert(idx))
    83  		},
    84  		Logf: config.Logf,
    85  	}
    86  	result, err := Slice[int](newConfig, freeIdx)
    87  	if err != nil {
    88  		return nil, err
    89  	}
    90  	return convert(result), nil
    91  }
    92  
    93  type sliceCtx[T any] struct {
    94  	Config[T]
    95  	chunks   []*arrayChunk[T]
    96  	predRuns int
    97  }
    98  
    99  type arrayChunk[T any] struct {
   100  	elements []T
   101  	final    bool // There's no way to further split this chunk.
   102  }
   103  
   104  // ErrTooManyChunks is returned if the number of necessary chunks surpassed MaxChunks.
   105  var ErrTooManyChunks = errors.New("the bisection process is following too many necessary chunks")
   106  
   107  func (ctx *sliceCtx[T]) bisect() ([]T, error) {
   108  	// At first, we don't know if the original chunks are really necessary.
   109  	err := ctx.splitChunks(false)
   110  	// Then, keep on splitting the chunks layer by layer until we have identified
   111  	// all necessary elements.
   112  	// This way we ensure that we always go from larger to smaller chunks.
   113  	for err == nil && !ctx.done() {
   114  		if ctx.MaxChunks > 0 && len(ctx.chunks) > ctx.MaxChunks {
   115  			err = ErrTooManyChunks
   116  			break
   117  		}
   118  		err = ctx.splitChunks(true)
   119  	}
   120  	if err != nil && err != ErrTooManyChunks {
   121  		return nil, err
   122  	}
   123  	return ctx.elements(), err
   124  }
   125  
   126  // splitChunks() splits each chunk in two and only leaves the necessary sub-parts.
   127  func (ctx *sliceCtx[T]) splitChunks(someNeeded bool) error {
   128  	ctx.Logf("split chunks (needed=%v): %s", someNeeded, ctx.chunkInfo())
   129  	splitInto := 2
   130  	if !someNeeded && len(ctx.chunks) == 1 {
   131  		// It's our first iteration.
   132  		splitInto = ctx.initialSplit(len(ctx.chunks[0].elements))
   133  	}
   134  	var newChunks []*arrayChunk[T]
   135  	for i, chunk := range ctx.chunks {
   136  		if chunk.final {
   137  			newChunks = append(newChunks, chunk)
   138  			continue
   139  		}
   140  		ctx.Logf("split chunk #%d of len %d into %d parts", i, len(chunk.elements), splitInto)
   141  		chunks := splitChunk[T](chunk.elements, splitInto)
   142  		if len(chunks) == 1 && someNeeded {
   143  			ctx.Logf("no way to further split the chunk")
   144  			chunk.final = true
   145  			newChunks = append(newChunks, chunk)
   146  			continue
   147  		}
   148  		foundNeeded := false
   149  		for j := range chunks {
   150  			ctx.Logf("testing without sub-chunk %d/%d", j+1, len(chunks))
   151  			if j < len(chunks)-1 || foundNeeded || !someNeeded {
   152  				ret, err := ctx.predRun(
   153  					newChunks,
   154  					mergeRawChunks(chunks[j+1:]),
   155  					ctx.chunks[i+1:],
   156  				)
   157  				if err != nil {
   158  					return err
   159  				}
   160  				if ret {
   161  					ctx.Logf("the chunk can be dropped")
   162  					continue
   163  				}
   164  			} else {
   165  				ctx.Logf("no need to test this chunk, it's definitely needed")
   166  			}
   167  			foundNeeded = true
   168  			newChunks = append(newChunks, &arrayChunk[T]{
   169  				elements: chunks[j],
   170  			})
   171  		}
   172  	}
   173  	ctx.chunks = newChunks
   174  	return nil
   175  }
   176  
   177  // Since Pred() runs can be costly, the objective is to get the most out of the
   178  // limited number of Pred() calls.
   179  // We try to achieve it by splitting the initial array in more than 2 elements.
   180  func (ctx *sliceCtx[T]) initialSplit(size int) int {
   181  	// If the number of steps is small and the number of elements is big,
   182  	// let's just split the initial array into MaxSteps chunks.
   183  	// There's no solid reasoning behind the condition below, so feel free to
   184  	// change it if you have better ideas.
   185  	if ctx.MaxSteps > 0 && math.Log2(float64(size)) > float64(ctx.MaxSteps) {
   186  		return ctx.MaxSteps
   187  	}
   188  	// Otherwise let's split in 3.
   189  	return 3
   190  }
   191  
   192  // predRun() determines whether (before + mid + after) covers the necessary elements.
   193  func (ctx *sliceCtx[T]) predRun(before []*arrayChunk[T], mid []T, after []*arrayChunk[T]) (bool, error) {
   194  	if ctx.MaxSteps > 0 && ctx.predRuns >= ctx.MaxSteps {
   195  		ctx.Logf("we have reached the limit on predicate runs (%d); pretend it returns false",
   196  			ctx.MaxSteps)
   197  		return false, nil
   198  	}
   199  	ctx.predRuns++
   200  	return ctx.Pred(mergeChunks(before, mid, after))
   201  }
   202  
   203  // The bisection process is done once every chunk is marked as final.
   204  func (ctx *sliceCtx[T]) done() bool {
   205  	if ctx.MaxSteps > 0 && ctx.predRuns >= ctx.MaxSteps {
   206  		// No reason to continue.
   207  		return true
   208  	}
   209  	for _, chunk := range ctx.chunks {
   210  		if !chunk.final {
   211  			return false
   212  		}
   213  	}
   214  	return true
   215  }
   216  
   217  func (ctx *sliceCtx[T]) elements() []T {
   218  	return mergeChunks(ctx.chunks, nil, nil)
   219  }
   220  
   221  func (ctx *sliceCtx[T]) chunkInfo() string {
   222  	var parts []string
   223  	for _, chunk := range ctx.chunks {
   224  		str := ""
   225  		if chunk.final {
   226  			str = ", final"
   227  		}
   228  		parts = append(parts, fmt.Sprintf("<%d%s>", len(chunk.elements), str))
   229  	}
   230  	return strings.Join(parts, ", ")
   231  }
   232  
   233  func mergeChunks[T any](before []*arrayChunk[T], mid []T, after []*arrayChunk[T]) []T {
   234  	var ret []T
   235  	for _, chunk := range before {
   236  		ret = append(ret, chunk.elements...)
   237  	}
   238  	ret = append(ret, mid...)
   239  	for _, chunk := range after {
   240  		ret = append(ret, chunk.elements...)
   241  	}
   242  	return ret
   243  }
   244  
   245  func mergeRawChunks[T any](chunks [][]T) []T {
   246  	var ret []T
   247  	for _, chunk := range chunks {
   248  		ret = append(ret, chunk...)
   249  	}
   250  	return ret
   251  }
   252  
   253  func splitChunk[T any](chunk []T, parts int) [][]T {
   254  	chunkSize := (len(chunk) + parts - 1) / parts
   255  	if chunkSize == 0 {
   256  		chunkSize = 1
   257  	}
   258  	var ret [][]T
   259  	for i := 0; i < len(chunk); i += chunkSize {
   260  		end := min(i+chunkSize, len(chunk))
   261  		ret = append(ret, chunk[i:end])
   262  	}
   263  	return ret
   264  }