github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/prog/heatmap.go (about) 1 // Copyright 2022 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package prog 5 6 import ( 7 "fmt" 8 "math/rand" 9 ) 10 11 // Our heatmaps are a flexible mechanism to assign a probability distribution to 12 // some collection of bytes. Usage: 13 // 1. Choose a heatmap and initialize it: `hm := MakeXYZHeatmap(data)`. 14 // Different heatmaps implement different probability distributions 15 // (for now there is only one). 16 // 2. Select random indices according to the probability distribution: 17 // `idx := hm.ChooseLocation(r)`. 18 type Heatmap interface { 19 NumMutations() int 20 ChooseLocation() int 21 } 22 23 // Generic heatmaps model a probability distribution based on sparse data, 24 // prioritising selection of regions which are not a single repeated byte. It 25 // views data as a series of chunks of length `granularity`, ignoring chunks 26 // which are a single repeated byte. Indices are chosen uniformly amongst the 27 // remaining "interesting" segments. 28 func MakeGenericHeatmap(data []byte, r *rand.Rand) Heatmap { 29 if len(data) == 0 { 30 panic("cannot create a GenericHeatmap with no data") 31 } 32 hm := &GenericHeatmap{ 33 r: r, 34 } 35 hm.length, hm.segments = calculateLengthAndSegments(data, granularity) 36 return hm 37 } 38 39 func (hm *GenericHeatmap) NumMutations() int { 40 // At least one mutation. 41 n := 1 42 // + up to about one mutation every 4 KB of heatmap size. 43 n += hm.r.Intn(hm.length/(4<<10) + 1) 44 // + up to 4 mutations at random so that even small images can get more than one. 45 n += hm.r.Intn(5) 46 // But don't do too many as it will most likely corrupt the image. 47 if max := 10; n > max { 48 n = max 49 } 50 return n 51 } 52 53 func (hm *GenericHeatmap) ChooseLocation() int { 54 // Uniformly choose an index within one of the segments. 55 heatmapIdx := hm.r.Intn(hm.length) 56 rawIdx := translateIdx(heatmapIdx, hm.segments) 57 return rawIdx 58 } 59 60 type GenericHeatmap struct { 61 r *rand.Rand 62 segments []segment // "Interesting" parts of the data. 63 length int // Sum of all segment lengths. 64 } 65 66 type segment struct { 67 offset int 68 length int 69 } 70 71 const granularity = 64 // Chunk size in bytes for processing the data. 72 73 // Determine the "interesting" segments of data, also returning their combined length. 74 func calculateLengthAndSegments(data []byte, granularity int) (int, []segment) { 75 // Offset and length of current segment, total length of all segments, length of original data. 76 offset, currentLength, totalLength, rawLength := 0, 0, 0, len(data) 77 segments := []segment{} 78 79 // Save a segment. 80 saveSegment := func() { 81 if currentLength != 0 { 82 segments = append(segments, segment{offset: offset, length: currentLength}) 83 offset, totalLength, currentLength = offset+currentLength, totalLength+currentLength, 0 84 } 85 } 86 87 for len(data) > 0 { 88 var chunk []byte 89 if len(data) < granularity { 90 chunk, data = data, nil 91 } else { 92 chunk, data = data[:granularity], data[granularity:] 93 } 94 95 // Check if buffer contains only a single value. 96 byt0, isConstant := chunk[0], true 97 for _, byt := range chunk { 98 if byt != byt0 { 99 isConstant = false 100 break 101 } 102 } 103 104 if !isConstant { 105 // Non-constant - extend the current segment. 106 currentLength += len(chunk) 107 } else { 108 // Save current segment. 109 saveSegment() 110 // Skip past the constant bytes. 111 offset += len(chunk) 112 } 113 } 114 115 // Save final segment. 116 saveSegment() 117 118 if len(segments) == 0 { 119 // We found no segments, i.e. the data is all "boring". Fall back to a 120 // uniform probability distribution over the original data by considering it 121 // as one long segment. 122 return rawLength, append(segments, segment{offset: 0, length: rawLength}) 123 } 124 125 return totalLength, segments 126 } 127 128 // Convert from an index into "interesting" segments to an index into raw data. 129 // I.e. view `idx` as an index into the concatenated segments, and translate 130 // this to an index into the original underlying data. E.g.: 131 // 132 // segs = []segment{{offset: 10, length: 20}, {offset: 50, length: 10}} 133 // translateIdx(25, segs) = 5 134 // 135 // I.e. we index element 5 of the second segment, so element 55 of the raw data. 136 func translateIdx(idx int, segs []segment) int { 137 if idx < 0 { 138 panic(fmt.Sprintf("translateIdx: negative index %v", idx)) 139 } 140 savedIdx := idx 141 for _, seg := range segs { 142 if idx < seg.length { 143 return seg.offset + idx 144 } 145 idx -= seg.length 146 } 147 panic(fmt.Sprintf("translateIdx: index out of range %v", savedIdx)) 148 }