github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/prog/heatmap.go (about)

     1  // Copyright 2022 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package prog
     5  
     6  import (
     7  	"fmt"
     8  	"math/rand"
     9  )
    10  
    11  // Our heatmaps are a flexible mechanism to assign a probability distribution to
    12  // some collection of bytes. Usage:
    13  //  1. Choose a heatmap and initialize it: `hm := MakeXYZHeatmap(data)`.
    14  //     Different heatmaps implement different probability distributions
    15  //     (for now there is only one).
    16  //  2. Select random indices according to the probability distribution:
    17  //     `idx := hm.ChooseLocation(r)`.
    18  type Heatmap interface {
    19  	NumMutations() int
    20  	ChooseLocation() int
    21  }
    22  
    23  // Generic heatmaps model a probability distribution based on sparse data,
    24  // prioritising selection of regions which are not a single repeated byte. It
    25  // views data as a series of chunks of length `granularity`, ignoring chunks
    26  // which are a single repeated byte. Indices are chosen uniformly amongst the
    27  // remaining "interesting" segments.
    28  func MakeGenericHeatmap(data []byte, r *rand.Rand) Heatmap {
    29  	if len(data) == 0 {
    30  		panic("cannot create a GenericHeatmap with no data")
    31  	}
    32  	hm := &GenericHeatmap{
    33  		r: r,
    34  	}
    35  	hm.length, hm.segments = calculateLengthAndSegments(data, granularity)
    36  	return hm
    37  }
    38  
    39  func (hm *GenericHeatmap) NumMutations() int {
    40  	// At least one mutation.
    41  	n := 1
    42  	// + up to about one mutation every 4 KB of heatmap size.
    43  	n += hm.r.Intn(hm.length/(4<<10) + 1)
    44  	// + up to 4 mutations at random so that even small images can get more than one.
    45  	n += hm.r.Intn(5)
    46  	// But don't do too many as it will most likely corrupt the image.
    47  	if max := 10; n > max {
    48  		n = max
    49  	}
    50  	return n
    51  }
    52  
    53  func (hm *GenericHeatmap) ChooseLocation() int {
    54  	// Uniformly choose an index within one of the segments.
    55  	heatmapIdx := hm.r.Intn(hm.length)
    56  	rawIdx := translateIdx(heatmapIdx, hm.segments)
    57  	return rawIdx
    58  }
    59  
    60  type GenericHeatmap struct {
    61  	r        *rand.Rand
    62  	segments []segment // "Interesting" parts of the data.
    63  	length   int       // Sum of all segment lengths.
    64  }
    65  
    66  type segment struct {
    67  	offset int
    68  	length int
    69  }
    70  
    71  const granularity = 64 // Chunk size in bytes for processing the data.
    72  
    73  // Determine the "interesting" segments of data, also returning their combined length.
    74  func calculateLengthAndSegments(data []byte, granularity int) (int, []segment) {
    75  	// Offset and length of current segment, total length of all segments, length of original data.
    76  	offset, currentLength, totalLength, rawLength := 0, 0, 0, len(data)
    77  	segments := []segment{}
    78  
    79  	// Save a segment.
    80  	saveSegment := func() {
    81  		if currentLength != 0 {
    82  			segments = append(segments, segment{offset: offset, length: currentLength})
    83  			offset, totalLength, currentLength = offset+currentLength, totalLength+currentLength, 0
    84  		}
    85  	}
    86  
    87  	for len(data) > 0 {
    88  		var chunk []byte
    89  		if len(data) < granularity {
    90  			chunk, data = data, nil
    91  		} else {
    92  			chunk, data = data[:granularity], data[granularity:]
    93  		}
    94  
    95  		// Check if buffer contains only a single value.
    96  		byt0, isConstant := chunk[0], true
    97  		for _, byt := range chunk {
    98  			if byt != byt0 {
    99  				isConstant = false
   100  				break
   101  			}
   102  		}
   103  
   104  		if !isConstant {
   105  			// Non-constant - extend the current segment.
   106  			currentLength += len(chunk)
   107  		} else {
   108  			// Save current segment.
   109  			saveSegment()
   110  			// Skip past the constant bytes.
   111  			offset += len(chunk)
   112  		}
   113  	}
   114  
   115  	// Save final segment.
   116  	saveSegment()
   117  
   118  	if len(segments) == 0 {
   119  		// We found no segments, i.e. the data is all "boring". Fall back to a
   120  		// uniform probability distribution over the original data by considering it
   121  		// as one long segment.
   122  		return rawLength, append(segments, segment{offset: 0, length: rawLength})
   123  	}
   124  
   125  	return totalLength, segments
   126  }
   127  
   128  // Convert from an index into "interesting" segments to an index into raw data.
   129  // I.e. view `idx` as an index into the concatenated segments, and translate
   130  // this to an index into the original underlying data. E.g.:
   131  //
   132  //	segs = []segment{{offset: 10, length: 20}, {offset: 50, length: 10}}
   133  //	translateIdx(25, segs) = 5
   134  //
   135  // I.e. we index element 5 of the second segment, so element 55 of the raw data.
   136  func translateIdx(idx int, segs []segment) int {
   137  	if idx < 0 {
   138  		panic(fmt.Sprintf("translateIdx: negative index %v", idx))
   139  	}
   140  	savedIdx := idx
   141  	for _, seg := range segs {
   142  		if idx < seg.length {
   143  			return seg.offset + idx
   144  		}
   145  		idx -= seg.length
   146  	}
   147  	panic(fmt.Sprintf("translateIdx: index out of range %v", savedIdx))
   148  }