github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/corpus/corpus.go (about) 1 // Copyright 2024 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package corpus 5 6 import ( 7 "context" 8 "fmt" 9 "maps" 10 "sync" 11 12 "github.com/google/syzkaller/pkg/cover" 13 "github.com/google/syzkaller/pkg/hash" 14 "github.com/google/syzkaller/pkg/signal" 15 "github.com/google/syzkaller/pkg/stat" 16 "github.com/google/syzkaller/prog" 17 ) 18 19 // Corpus object represents a set of syzkaller-found programs that 20 // cover the kernel up to the currently reached frontiers. 21 type Corpus struct { 22 ctx context.Context 23 mu sync.RWMutex 24 progsMap map[string]*Item 25 signal signal.Signal // total signal of all items 26 cover cover.Cover // total coverage of all items 27 updates chan<- NewItemEvent 28 29 *ProgramsList 30 StatProgs *stat.Val 31 StatSignal *stat.Val 32 StatCover *stat.Val 33 34 focusAreas []*focusAreaState 35 } 36 37 type focusAreaState struct { 38 FocusArea 39 *ProgramsList 40 } 41 42 type FocusArea struct { 43 Name string // can be empty 44 CoverPCs map[uint64]struct{} 45 Weight float64 46 } 47 48 func NewCorpus(ctx context.Context) *Corpus { 49 return NewMonitoredCorpus(ctx, nil) 50 } 51 52 func NewMonitoredCorpus(ctx context.Context, updates chan<- NewItemEvent) *Corpus { 53 return NewFocusedCorpus(ctx, updates, nil) 54 } 55 56 func NewFocusedCorpus(ctx context.Context, updates chan<- NewItemEvent, areas []FocusArea) *Corpus { 57 corpus := &Corpus{ 58 ctx: ctx, 59 progsMap: make(map[string]*Item), 60 updates: updates, 61 ProgramsList: &ProgramsList{}, 62 } 63 corpus.StatProgs = stat.New("corpus", "Number of test programs in the corpus", stat.Console, 64 stat.Link("/corpus"), stat.Graph("corpus"), stat.LenOf(&corpus.progsMap, &corpus.mu)) 65 corpus.StatSignal = stat.New("signal", "Fuzzing signal in the corpus", 66 stat.LenOf(&corpus.signal, &corpus.mu)) 67 corpus.StatCover = stat.New("coverage", "Source coverage in the corpus", stat.Console, 68 stat.Link("/cover"), stat.Prometheus("syz_corpus_cover"), stat.LenOf(&corpus.cover, &corpus.mu)) 69 for _, area := range areas { 70 obj := &ProgramsList{} 71 if len(areas) > 1 && area.Name != "" { 72 // Only show extra statistics if there's more than one area. 73 stat.New("corpus ["+area.Name+"]", 74 fmt.Sprintf("Corpus programs of the focus area %q", area.Name), 75 stat.Console, stat.Graph("corpus"), 76 stat.LenOf(&obj.progs, &corpus.mu)) 77 } 78 corpus.focusAreas = append(corpus.focusAreas, &focusAreaState{ 79 FocusArea: area, 80 ProgramsList: obj, 81 }) 82 } 83 return corpus 84 } 85 86 // It may happen that a single program is relevant because of several 87 // sysalls. In that case, there will be several ItemUpdate entities. 88 type ItemUpdate struct { 89 Call int 90 RawCover []uint64 91 } 92 93 // Item objects are to be treated as immutable, otherwise it's just 94 // too hard to synchonize accesses to them across the whole project. 95 // When Corpus updates one of its items, it saves a copy of it. 96 type Item struct { 97 Sig string 98 Call int 99 Prog *prog.Prog 100 HasAny bool // whether the prog contains squashed arguments 101 Signal signal.Signal 102 Cover []uint64 103 Updates []ItemUpdate 104 105 areas map[*focusAreaState]struct{} 106 } 107 108 func (item Item) StringCall() string { 109 return item.Prog.CallName(item.Call) 110 } 111 112 type NewInput struct { 113 Prog *prog.Prog 114 Call int 115 Signal signal.Signal 116 Cover []uint64 117 RawCover []uint64 118 } 119 120 type NewItemEvent struct { 121 Sig string 122 Exists bool 123 ProgData []byte 124 NewCover []uint64 125 } 126 127 func (corpus *Corpus) Save(inp NewInput) { 128 progData := inp.Prog.Serialize() 129 sig := hash.String(progData) 130 131 corpus.mu.Lock() 132 defer corpus.mu.Unlock() 133 134 update := ItemUpdate{ 135 Call: inp.Call, 136 RawCover: inp.RawCover, 137 } 138 exists := false 139 if old, ok := corpus.progsMap[sig]; ok { 140 exists = true 141 newSignal := old.Signal.Copy() 142 newSignal.Merge(inp.Signal) 143 var newCover cover.Cover 144 newCover.Merge(old.Cover) 145 newCover.Merge(inp.Cover) 146 newItem := &Item{ 147 Sig: sig, 148 Prog: old.Prog, 149 Call: old.Call, 150 HasAny: old.HasAny, 151 Signal: newSignal, 152 Cover: newCover.Serialize(), 153 Updates: append([]ItemUpdate{}, old.Updates...), 154 areas: maps.Clone(old.areas), 155 } 156 const maxUpdates = 32 157 if len(newItem.Updates) < maxUpdates { 158 newItem.Updates = append(newItem.Updates, update) 159 } 160 corpus.progsMap[sig] = newItem 161 corpus.applyFocusAreas(newItem, inp.Cover) 162 } else { 163 item := &Item{ 164 Sig: sig, 165 Call: inp.Call, 166 Prog: inp.Prog, 167 HasAny: inp.Prog.ContainsAny(), 168 Signal: inp.Signal, 169 Cover: inp.Cover, 170 Updates: []ItemUpdate{update}, 171 } 172 corpus.progsMap[sig] = item 173 corpus.applyFocusAreas(item, inp.Cover) 174 corpus.saveProgram(inp.Prog, inp.Signal) 175 } 176 corpus.signal.Merge(inp.Signal) 177 newCover := corpus.cover.MergeDiff(inp.Cover) 178 if corpus.updates != nil { 179 select { 180 case <-corpus.ctx.Done(): 181 case corpus.updates <- NewItemEvent{ 182 Sig: sig, 183 Exists: exists, 184 ProgData: progData, 185 NewCover: newCover, 186 }: 187 } 188 } 189 } 190 191 func (corpus *Corpus) applyFocusAreas(item *Item, coverDelta []uint64) { 192 for _, area := range corpus.focusAreas { 193 matches := false 194 for _, pc := range coverDelta { 195 if _, ok := area.CoverPCs[pc]; ok { 196 matches = true 197 break 198 } 199 } 200 if !matches { 201 continue 202 } 203 area.saveProgram(item.Prog, item.Signal) 204 if item.areas == nil { 205 item.areas = make(map[*focusAreaState]struct{}) 206 item.areas[area] = struct{}{} 207 } 208 } 209 } 210 211 func (corpus *Corpus) Signal() signal.Signal { 212 corpus.mu.RLock() 213 defer corpus.mu.RUnlock() 214 return corpus.signal.Copy() 215 } 216 217 func (corpus *Corpus) Items() []*Item { 218 corpus.mu.RLock() 219 defer corpus.mu.RUnlock() 220 ret := make([]*Item, 0, len(corpus.progsMap)) 221 for _, item := range corpus.progsMap { 222 ret = append(ret, item) 223 } 224 return ret 225 } 226 227 func (corpus *Corpus) Item(sig string) *Item { 228 corpus.mu.RLock() 229 defer corpus.mu.RUnlock() 230 return corpus.progsMap[sig] 231 } 232 233 type CallCov struct { 234 Count int 235 Cover cover.Cover 236 } 237 238 func (corpus *Corpus) CallCover() map[string]*CallCov { 239 corpus.mu.RLock() 240 defer corpus.mu.RUnlock() 241 calls := make(map[string]*CallCov) 242 for _, inp := range corpus.progsMap { 243 call := inp.StringCall() 244 if calls[call] == nil { 245 calls[call] = new(CallCov) 246 } 247 cc := calls[call] 248 cc.Count++ 249 cc.Cover.Merge(inp.Cover) 250 } 251 return calls 252 } 253 254 func (corpus *Corpus) ProgsPerArea() map[string]int { 255 corpus.mu.RLock() 256 defer corpus.mu.RUnlock() 257 ret := map[string]int{} 258 for _, item := range corpus.focusAreas { 259 ret[item.Name] = len(item.progs) 260 } 261 return ret 262 } 263 264 func (corpus *Corpus) Cover() []uint64 { 265 return corpus.cover.Serialize() 266 }