github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/prog/heatmap.go (about) 1 // Copyright 2022 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package prog 5 6 import ( 7 "fmt" 8 "math/rand" 9 ) 10 11 // Our heatmaps are a flexible mechanism to assign a probability distribution to 12 // some collection of bytes. Usage: 13 // 1. Choose a heatmap and initialize it: `hm := MakeXYZHeatmap(data)`. 14 // Different heatmaps implement different probability distributions 15 // (for now there is only one). 16 // 2. Select random indices according to the probability distribution: 17 // `idx := hm.ChooseLocation(r)`. 18 type Heatmap interface { 19 NumMutations() int 20 ChooseLocation() int 21 } 22 23 // Generic heatmaps model a probability distribution based on sparse data, 24 // prioritising selection of regions which are not a single repeated byte. It 25 // views data as a series of chunks of length `granularity`, ignoring chunks 26 // which are a single repeated byte. Indices are chosen uniformly amongst the 27 // remaining "interesting" segments. 28 func MakeGenericHeatmap(data []byte, r *rand.Rand) Heatmap { 29 if len(data) == 0 { 30 panic("cannot create a GenericHeatmap with no data") 31 } 32 hm := &GenericHeatmap{ 33 r: r, 34 } 35 hm.length, hm.segments = calculateLengthAndSegments(data, granularity) 36 return hm 37 } 38 39 func (hm *GenericHeatmap) NumMutations() int { 40 // At least one mutation. 41 n := 1 42 // + up to about one mutation every 4 KB of heatmap size. 43 n += hm.r.Intn(hm.length/(4<<10) + 1) 44 // + up to 4 mutations at random so that even small images can get more than one. 45 n += hm.r.Intn(5) 46 // But don't do too many as it will most likely corrupt the image. 47 return min(n, 10) 48 } 49 50 func (hm *GenericHeatmap) ChooseLocation() int { 51 // Uniformly choose an index within one of the segments. 52 heatmapIdx := hm.r.Intn(hm.length) 53 rawIdx := translateIdx(heatmapIdx, hm.segments) 54 return rawIdx 55 } 56 57 type GenericHeatmap struct { 58 r *rand.Rand 59 segments []segment // "Interesting" parts of the data. 60 length int // Sum of all segment lengths. 61 } 62 63 type segment struct { 64 offset int 65 length int 66 } 67 68 const granularity = 64 // Chunk size in bytes for processing the data. 69 70 // Determine the "interesting" segments of data, also returning their combined length. 71 func calculateLengthAndSegments(data []byte, granularity int) (int, []segment) { 72 // Offset and length of current segment, total length of all segments, length of original data. 73 offset, currentLength, totalLength, rawLength := 0, 0, 0, len(data) 74 segments := []segment{} 75 76 // Save a segment. 77 saveSegment := func() { 78 if currentLength != 0 { 79 segments = append(segments, segment{offset: offset, length: currentLength}) 80 offset, totalLength, currentLength = offset+currentLength, totalLength+currentLength, 0 81 } 82 } 83 84 for len(data) > 0 { 85 var chunk []byte 86 if len(data) < granularity { 87 chunk, data = data, nil 88 } else { 89 chunk, data = data[:granularity], data[granularity:] 90 } 91 92 // Check if buffer contains only a single value. 93 byt0, isConstant := chunk[0], true 94 for _, byt := range chunk { 95 if byt != byt0 { 96 isConstant = false 97 break 98 } 99 } 100 101 if !isConstant { 102 // Non-constant - extend the current segment. 103 currentLength += len(chunk) 104 } else { 105 // Save current segment. 106 saveSegment() 107 // Skip past the constant bytes. 108 offset += len(chunk) 109 } 110 } 111 112 // Save final segment. 113 saveSegment() 114 115 if len(segments) == 0 { 116 // We found no segments, i.e. the data is all "boring". Fall back to a 117 // uniform probability distribution over the original data by considering it 118 // as one long segment. 119 return rawLength, append(segments, segment{offset: 0, length: rawLength}) 120 } 121 122 return totalLength, segments 123 } 124 125 // Convert from an index into "interesting" segments to an index into raw data. 126 // I.e. view `idx` as an index into the concatenated segments, and translate 127 // this to an index into the original underlying data. E.g.: 128 // 129 // segs = []segment{{offset: 10, length: 20}, {offset: 50, length: 10}} 130 // translateIdx(25, segs) = 5 131 // 132 // I.e. we index element 5 of the second segment, so element 55 of the raw data. 133 func translateIdx(idx int, segs []segment) int { 134 if idx < 0 { 135 panic(fmt.Sprintf("translateIdx: negative index %v", idx)) 136 } 137 savedIdx := idx 138 for _, seg := range segs { 139 if idx < seg.length { 140 return seg.offset + idx 141 } 142 idx -= seg.length 143 } 144 panic(fmt.Sprintf("translateIdx: index out of range %v", savedIdx)) 145 }