github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/syz-hub/state/state.go (about) 1 // Copyright 2016 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package state 5 6 import ( 7 "fmt" 8 "os" 9 "path/filepath" 10 "sort" 11 "strconv" 12 "time" 13 14 "github.com/google/syzkaller/pkg/db" 15 "github.com/google/syzkaller/pkg/hash" 16 "github.com/google/syzkaller/pkg/log" 17 "github.com/google/syzkaller/pkg/osutil" 18 "github.com/google/syzkaller/pkg/rpctype" 19 "github.com/google/syzkaller/prog" 20 ) 21 22 // State holds all internal syz-hub state including corpus, 23 // reproducers and information about managers. 24 // It is persisted to and can be restored from a directory. 25 type State struct { 26 corpusSeq uint64 27 reproSeq uint64 28 dir string 29 Corpus *db.DB 30 Repros *db.DB 31 Managers map[string]*Manager 32 } 33 34 // Manager represents one syz-manager instance. 35 type Manager struct { 36 name string 37 Domain string 38 corpusSeq uint64 39 reproSeq uint64 40 corpusFile string 41 corpusSeqFile string 42 reproSeqFile string 43 domainFile string 44 ownRepros map[string]bool 45 Connected time.Time 46 Added int 47 Deleted int 48 New int 49 SentRepros int 50 RecvRepros int 51 Calls map[string]struct{} 52 Corpus *db.DB 53 } 54 55 // Make creates State and initializes it from dir. 56 func Make(dir string) (*State, error) { 57 st := &State{ 58 dir: dir, 59 Managers: make(map[string]*Manager), 60 } 61 62 osutil.MkdirAll(st.dir) 63 var err error 64 st.Corpus, st.corpusSeq, err = loadDB(filepath.Join(st.dir, "corpus.db"), "corpus", true) 65 if err != nil { 66 log.Fatal(err) 67 } 68 st.Repros, st.reproSeq, err = loadDB(filepath.Join(st.dir, "repro.db"), "repro", true) 69 if err != nil { 70 log.Fatal(err) 71 } 72 73 managersDir := filepath.Join(st.dir, "manager") 74 osutil.MkdirAll(managersDir) 75 managers, err := os.ReadDir(managersDir) 76 if err != nil { 77 return nil, fmt.Errorf("failed to read %v dir: %w", managersDir, err) 78 } 79 for _, manager := range managers { 80 _, err := st.createManager(manager.Name()) 81 if err != nil { 82 return nil, err 83 } 84 } 85 log.Logf(0, "purging corpus...") 86 st.purgeCorpus() 87 log.Logf(0, "done, %v programs", len(st.Corpus.Records)) 88 return st, err 89 } 90 91 func (st *State) Flush() { 92 if err := st.Corpus.Flush(); err != nil { 93 log.Logf(0, "failed to flush corpus database: %v", err) 94 } 95 for _, mgr := range st.Managers { 96 if err := mgr.Corpus.Flush(); err != nil { 97 log.Logf(0, "failed to flush corpus database: %v", err) 98 } 99 } 100 } 101 102 func loadDB(file, name string, progs bool) (*db.DB, uint64, error) { 103 log.Logf(0, "reading %v...", name) 104 db, err := db.Open(file, true) 105 if err != nil { 106 return nil, 0, fmt.Errorf("failed to open %v database: %w", name, err) 107 } 108 log.Logf(0, "read %v programs", len(db.Records)) 109 var maxSeq uint64 110 for key, rec := range db.Records { 111 if progs { 112 _, ncalls, err := prog.CallSet(rec.Val) 113 if err != nil { 114 log.Logf(0, "bad file: can't parse call set: %v\n%q", err, rec.Val) 115 db.Delete(key) 116 continue 117 } 118 if ncalls > prog.MaxCalls { 119 log.Logf(0, "bad file: too many calls: %v", ncalls) 120 db.Delete(key) 121 continue 122 } 123 if sig := hash.Hash(rec.Val); sig.String() != key { 124 log.Logf(0, "bad file: hash %v, want hash %v", key, sig.String()) 125 db.Delete(key) 126 continue 127 } 128 } 129 if maxSeq < rec.Seq { 130 maxSeq = rec.Seq 131 } 132 } 133 if err := db.Flush(); err != nil { 134 return nil, 0, fmt.Errorf("failed to flush corpus database: %w", err) 135 } 136 return db, maxSeq, nil 137 } 138 139 func (st *State) createManager(name string) (*Manager, error) { 140 dir := filepath.Join(st.dir, "manager", name) 141 osutil.MkdirAll(dir) 142 mgr := &Manager{ 143 name: name, 144 corpusFile: filepath.Join(dir, "corpus.db"), 145 corpusSeqFile: filepath.Join(dir, "seq"), 146 reproSeqFile: filepath.Join(dir, "repro.seq"), 147 domainFile: filepath.Join(dir, "domain"), 148 ownRepros: make(map[string]bool), 149 } 150 mgr.corpusSeq = loadSeqFile(mgr.corpusSeqFile) 151 if st.corpusSeq < mgr.corpusSeq { 152 st.corpusSeq = mgr.corpusSeq 153 } 154 mgr.reproSeq = loadSeqFile(mgr.reproSeqFile) 155 if mgr.reproSeq == 0 { 156 mgr.reproSeq = st.reproSeq 157 } 158 if st.reproSeq < mgr.reproSeq { 159 st.reproSeq = mgr.reproSeq 160 } 161 domainData, _ := os.ReadFile(mgr.domainFile) 162 mgr.Domain = string(domainData) 163 corpus, _, err := loadDB(mgr.corpusFile, name, false) 164 if err != nil { 165 return nil, fmt.Errorf("failed to open manager corpus %v: %w", mgr.corpusFile, err) 166 } 167 mgr.Corpus = corpus 168 log.Logf(0, "created manager %v: domain=%v corpus=%v, corpusSeq=%v, reproSeq=%v", 169 mgr.name, mgr.Domain, len(mgr.Corpus.Records), mgr.corpusSeq, mgr.reproSeq) 170 st.Managers[name] = mgr 171 return mgr, nil 172 } 173 174 func (st *State) Connect(name, domain string, fresh bool, calls []string, corpus [][]byte) error { 175 mgr := st.Managers[name] 176 if mgr == nil { 177 var err error 178 mgr, err = st.createManager(name) 179 if err != nil { 180 return err 181 } 182 } 183 mgr.Connected = time.Now() 184 mgr.Domain = domain 185 writeFile(mgr.domainFile, []byte(mgr.Domain)) 186 if fresh { 187 mgr.corpusSeq = 0 188 mgr.reproSeq = st.reproSeq 189 } 190 saveSeqFile(mgr.corpusSeqFile, mgr.corpusSeq) 191 saveSeqFile(mgr.reproSeqFile, mgr.reproSeq) 192 193 mgr.Calls = make(map[string]struct{}) 194 for _, c := range calls { 195 mgr.Calls[c] = struct{}{} 196 } 197 198 os.Remove(mgr.corpusFile) 199 var err error 200 mgr.Corpus, err = db.Open(mgr.corpusFile, true) 201 if err != nil { 202 log.Logf(0, "failed to open corpus database: %v", err) 203 return err 204 } 205 st.addInputs(mgr, corpus) 206 st.purgeCorpus() 207 return nil 208 } 209 210 func (st *State) Sync(name string, add [][]byte, del []string) (string, []rpctype.HubInput, int, error) { 211 mgr := st.Managers[name] 212 if mgr == nil || mgr.Connected.IsZero() { 213 return "", nil, 0, fmt.Errorf("unconnected manager %v", name) 214 } 215 if len(del) != 0 { 216 for _, sig := range del { 217 mgr.Corpus.Delete(sig) 218 } 219 if err := mgr.Corpus.Flush(); err != nil { 220 log.Logf(0, "failed to flush corpus database: %v", err) 221 } 222 st.purgeCorpus() 223 } 224 st.addInputs(mgr, add) 225 progs, more, err := st.pendingInputs(mgr) 226 mgr.Added += len(add) 227 mgr.Deleted += len(del) 228 mgr.New += len(progs) 229 return mgr.Domain, progs, more, err 230 } 231 232 func (st *State) AddRepro(name string, repro []byte) error { 233 mgr := st.Managers[name] 234 if mgr == nil || mgr.Connected.IsZero() { 235 return fmt.Errorf("unconnected manager %v", name) 236 } 237 if _, _, err := prog.CallSet(repro); err != nil { 238 log.Logf(0, "manager %v: failed to extract call set: %v, program:\n%v", 239 mgr.name, err, string(repro)) 240 return nil 241 } 242 sig := hash.String(repro) 243 if _, ok := st.Repros.Records[sig]; ok { 244 return nil 245 } 246 mgr.ownRepros[sig] = true 247 mgr.SentRepros++ 248 if mgr.reproSeq == st.reproSeq { 249 mgr.reproSeq++ 250 saveSeqFile(mgr.reproSeqFile, mgr.reproSeq) 251 } 252 st.reproSeq++ 253 st.Repros.Save(sig, repro, st.reproSeq) 254 if err := st.Repros.Flush(); err != nil { 255 log.Logf(0, "failed to flush repro database: %v", err) 256 } 257 return nil 258 } 259 260 func (st *State) PendingRepro(name string) ([]byte, error) { 261 mgr := st.Managers[name] 262 if mgr == nil || mgr.Connected.IsZero() { 263 return nil, fmt.Errorf("unconnected manager %v", name) 264 } 265 if mgr.reproSeq == st.reproSeq { 266 return nil, nil 267 } 268 var repro []byte 269 minSeq := ^uint64(0) 270 for key, rec := range st.Repros.Records { 271 if mgr.reproSeq >= rec.Seq { 272 continue 273 } 274 if mgr.ownRepros[key] { 275 continue 276 } 277 calls, _, err := prog.CallSet(rec.Val) 278 if err != nil { 279 return nil, fmt.Errorf("failed to extract call set: %w\nprogram: %s", err, rec.Val) 280 } 281 if !managerSupportsAllCalls(mgr.Calls, calls) { 282 continue 283 } 284 if minSeq > rec.Seq { 285 minSeq = rec.Seq 286 repro = rec.Val 287 } 288 } 289 if repro == nil { 290 mgr.reproSeq = st.reproSeq 291 saveSeqFile(mgr.reproSeqFile, mgr.reproSeq) 292 return nil, nil 293 } 294 mgr.RecvRepros++ 295 mgr.reproSeq = minSeq 296 saveSeqFile(mgr.reproSeqFile, mgr.reproSeq) 297 return repro, nil 298 } 299 300 func (st *State) pendingInputs(mgr *Manager) ([]rpctype.HubInput, int, error) { 301 if mgr.corpusSeq == st.corpusSeq { 302 return nil, 0, nil 303 } 304 type Record struct { 305 Key string 306 Val []byte 307 Seq uint64 308 } 309 var records []Record 310 for key, rec := range st.Corpus.Records { 311 if mgr.corpusSeq >= rec.Seq { 312 continue 313 } 314 if _, ok := mgr.Corpus.Records[key]; ok { 315 continue 316 } 317 calls, _, err := prog.CallSet(rec.Val) 318 if err != nil { 319 return nil, 0, fmt.Errorf("failed to extract call set: %w\nprogram: %s", err, rec.Val) 320 } 321 if !managerSupportsAllCalls(mgr.Calls, calls) { 322 continue 323 } 324 records = append(records, Record{key, rec.Val, rec.Seq}) 325 } 326 maxSeq := st.corpusSeq 327 more := 0 328 const ( 329 // Send at most that many records (rounded up to next seq number). 330 maxRecords = 100 331 // If we have way too many records to send (more than capRecords), 332 // cap total number to capRecords and give up sending all. 333 // Otherwise new managers will never chew all this on a busy hub. 334 capRecords = 100000 335 ) 336 if len(records) > maxRecords { 337 sort.Slice(records, func(i, j int) bool { 338 return records[i].Seq < records[j].Seq 339 }) 340 if len(records) > capRecords { 341 records = records[len(records)-capRecords:] 342 } 343 pos := maxRecords 344 maxSeq = records[pos].Seq 345 for pos+1 < len(records) && records[pos+1].Seq == maxSeq { 346 pos++ 347 } 348 pos++ 349 more = len(records) - pos 350 records = records[:pos] 351 } 352 progs := make([]rpctype.HubInput, 0, len(records)) 353 for _, rec := range records { 354 progs = append(progs, rpctype.HubInput{ 355 Domain: st.inputDomain(rec.Key, mgr.Domain), 356 Prog: rec.Val, 357 }) 358 } 359 mgr.corpusSeq = maxSeq 360 saveSeqFile(mgr.corpusSeqFile, mgr.corpusSeq) 361 return progs, more, nil 362 } 363 364 func (st *State) inputDomain(key, self string) string { 365 domain := "" 366 for _, mgr := range st.Managers { 367 same := mgr.Domain == self 368 if !same && domain != "" { 369 continue 370 } 371 if _, ok := mgr.Corpus.Records[key]; !ok { 372 continue 373 } 374 domain = mgr.Domain 375 if same { 376 break 377 } 378 } 379 return domain 380 } 381 382 func (st *State) addInputs(mgr *Manager, inputs [][]byte) { 383 if len(inputs) == 0 { 384 return 385 } 386 st.corpusSeq++ 387 for _, input := range inputs { 388 st.addInput(mgr, input) 389 } 390 if err := mgr.Corpus.Flush(); err != nil { 391 log.Logf(0, "failed to flush corpus database: %v", err) 392 } 393 if err := st.Corpus.Flush(); err != nil { 394 log.Logf(0, "failed to flush corpus database: %v", err) 395 } 396 } 397 398 func (st *State) addInput(mgr *Manager, input []byte) { 399 _, ncalls, err := prog.CallSet(input) 400 if err != nil { 401 log.Logf(0, "manager %v: failed to extract call set: %v, program:\n%v", mgr.name, err, string(input)) 402 return 403 } 404 if want := prog.MaxCalls; ncalls > want { 405 log.Logf(0, "manager %v: too long program, ignoring (%v/%v)", mgr.name, ncalls, want) 406 return 407 } 408 sig := hash.String(input) 409 mgr.Corpus.Save(sig, nil, 0) 410 if _, ok := st.Corpus.Records[sig]; !ok { 411 st.Corpus.Save(sig, input, st.corpusSeq) 412 } 413 } 414 415 func (st *State) purgeCorpus() { 416 used := make(map[string]bool) 417 for _, mgr := range st.Managers { 418 for sig := range mgr.Corpus.Records { 419 used[sig] = true 420 } 421 } 422 for key := range st.Corpus.Records { 423 if used[key] { 424 continue 425 } 426 st.Corpus.Delete(key) 427 } 428 if err := st.Corpus.Flush(); err != nil { 429 log.Logf(0, "failed to flush corpus database: %v", err) 430 } 431 } 432 433 func managerSupportsAllCalls(mgr, prog map[string]struct{}) bool { 434 for c := range prog { 435 if _, ok := mgr[c]; !ok { 436 return false 437 } 438 } 439 return true 440 } 441 442 func writeFile(name string, data []byte) { 443 if err := osutil.WriteFile(name, data); err != nil { 444 log.Logf(0, "failed to write file %v: %v", name, err) 445 } 446 } 447 448 func saveSeqFile(filename string, seq uint64) { 449 writeFile(filename, []byte(fmt.Sprint(seq))) 450 } 451 452 func loadSeqFile(filename string) uint64 { 453 str, _ := os.ReadFile(filename) 454 seq, _ := strconv.ParseUint(string(str), 10, 64) 455 return seq 456 }