github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/prog/heatmap.go (about)

     1  // Copyright 2022 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package prog
     5  
     6  import (
     7  	"fmt"
     8  	"math/rand"
     9  )
    10  
    11  // Our heatmaps are a flexible mechanism to assign a probability distribution to
    12  // some collection of bytes. Usage:
    13  //  1. Choose a heatmap and initialize it: `hm := MakeXYZHeatmap(data)`.
    14  //     Different heatmaps implement different probability distributions
    15  //     (for now there is only one).
    16  //  2. Select random indices according to the probability distribution:
    17  //     `idx := hm.ChooseLocation(r)`.
    18  type Heatmap interface {
    19  	NumMutations() int
    20  	ChooseLocation() int
    21  }
    22  
    23  // Generic heatmaps model a probability distribution based on sparse data,
    24  // prioritising selection of regions which are not a single repeated byte. It
    25  // views data as a series of chunks of length `granularity`, ignoring chunks
    26  // which are a single repeated byte. Indices are chosen uniformly amongst the
    27  // remaining "interesting" segments.
    28  func MakeGenericHeatmap(data []byte, r *rand.Rand) Heatmap {
    29  	if len(data) == 0 {
    30  		panic("cannot create a GenericHeatmap with no data")
    31  	}
    32  	hm := &GenericHeatmap{
    33  		r: r,
    34  	}
    35  	hm.length, hm.segments = calculateLengthAndSegments(data, granularity)
    36  	return hm
    37  }
    38  
    39  func (hm *GenericHeatmap) NumMutations() int {
    40  	// At least one mutation.
    41  	n := 1
    42  	// + up to about one mutation every 4 KB of heatmap size.
    43  	n += hm.r.Intn(hm.length/(4<<10) + 1)
    44  	// + up to 4 mutations at random so that even small images can get more than one.
    45  	n += hm.r.Intn(5)
    46  	// But don't do too many as it will most likely corrupt the image.
    47  	return min(n, 10)
    48  }
    49  
    50  func (hm *GenericHeatmap) ChooseLocation() int {
    51  	// Uniformly choose an index within one of the segments.
    52  	heatmapIdx := hm.r.Intn(hm.length)
    53  	rawIdx := translateIdx(heatmapIdx, hm.segments)
    54  	return rawIdx
    55  }
    56  
    57  type GenericHeatmap struct {
    58  	r        *rand.Rand
    59  	segments []segment // "Interesting" parts of the data.
    60  	length   int       // Sum of all segment lengths.
    61  }
    62  
    63  type segment struct {
    64  	offset int
    65  	length int
    66  }
    67  
    68  const granularity = 64 // Chunk size in bytes for processing the data.
    69  
    70  // Determine the "interesting" segments of data, also returning their combined length.
    71  func calculateLengthAndSegments(data []byte, granularity int) (int, []segment) {
    72  	// Offset and length of current segment, total length of all segments, length of original data.
    73  	offset, currentLength, totalLength, rawLength := 0, 0, 0, len(data)
    74  	segments := []segment{}
    75  
    76  	// Save a segment.
    77  	saveSegment := func() {
    78  		if currentLength != 0 {
    79  			segments = append(segments, segment{offset: offset, length: currentLength})
    80  			offset, totalLength, currentLength = offset+currentLength, totalLength+currentLength, 0
    81  		}
    82  	}
    83  
    84  	for len(data) > 0 {
    85  		var chunk []byte
    86  		if len(data) < granularity {
    87  			chunk, data = data, nil
    88  		} else {
    89  			chunk, data = data[:granularity], data[granularity:]
    90  		}
    91  
    92  		// Check if buffer contains only a single value.
    93  		byt0, isConstant := chunk[0], true
    94  		for _, byt := range chunk {
    95  			if byt != byt0 {
    96  				isConstant = false
    97  				break
    98  			}
    99  		}
   100  
   101  		if !isConstant {
   102  			// Non-constant - extend the current segment.
   103  			currentLength += len(chunk)
   104  		} else {
   105  			// Save current segment.
   106  			saveSegment()
   107  			// Skip past the constant bytes.
   108  			offset += len(chunk)
   109  		}
   110  	}
   111  
   112  	// Save final segment.
   113  	saveSegment()
   114  
   115  	if len(segments) == 0 {
   116  		// We found no segments, i.e. the data is all "boring". Fall back to a
   117  		// uniform probability distribution over the original data by considering it
   118  		// as one long segment.
   119  		return rawLength, append(segments, segment{offset: 0, length: rawLength})
   120  	}
   121  
   122  	return totalLength, segments
   123  }
   124  
   125  // Convert from an index into "interesting" segments to an index into raw data.
   126  // I.e. view `idx` as an index into the concatenated segments, and translate
   127  // this to an index into the original underlying data. E.g.:
   128  //
   129  //	segs = []segment{{offset: 10, length: 20}, {offset: 50, length: 10}}
   130  //	translateIdx(25, segs) = 5
   131  //
   132  // I.e. we index element 5 of the second segment, so element 55 of the raw data.
   133  func translateIdx(idx int, segs []segment) int {
   134  	if idx < 0 {
   135  		panic(fmt.Sprintf("translateIdx: negative index %v", idx))
   136  	}
   137  	savedIdx := idx
   138  	for _, seg := range segs {
   139  		if idx < seg.length {
   140  			return seg.offset + idx
   141  		}
   142  		idx -= seg.length
   143  	}
   144  	panic(fmt.Sprintf("translateIdx: index out of range %v", savedIdx))
   145  }