github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/syz-hub/state/state.go (about) 1 // Copyright 2016 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package state 5 6 import ( 7 "fmt" 8 "os" 9 "path/filepath" 10 "sort" 11 "strconv" 12 "strings" 13 "time" 14 15 "github.com/google/syzkaller/pkg/db" 16 "github.com/google/syzkaller/pkg/hash" 17 "github.com/google/syzkaller/pkg/log" 18 "github.com/google/syzkaller/pkg/osutil" 19 "github.com/google/syzkaller/pkg/rpctype" 20 "github.com/google/syzkaller/prog" 21 ) 22 23 // State holds all internal syz-hub state including corpus, 24 // reproducers and information about managers. 25 // It is persisted to and can be restored from a directory. 26 type State struct { 27 corpusSeq uint64 28 reproSeq uint64 29 dir string 30 Corpus *db.DB 31 Repros *db.DB 32 Managers map[string]*Manager 33 } 34 35 // Manager represents one syz-manager instance. 36 type Manager struct { 37 name string 38 dir string 39 HTTP string 40 Domain string 41 corpusSeq uint64 42 reproSeq uint64 43 corpusFile string 44 corpusSeqFile string 45 reproSeqFile string 46 domainFile string 47 ownRepros map[string]bool 48 Connected time.Time 49 Added int 50 Deleted int 51 New int 52 SentRepros int 53 RecvRepros int 54 Calls map[string]struct{} 55 Corpus *db.DB 56 } 57 58 // Make creates State and initializes it from dir. 59 func Make(dir string) (*State, error) { 60 st := &State{ 61 dir: dir, 62 Managers: make(map[string]*Manager), 63 } 64 65 osutil.MkdirAll(st.dir) 66 var err error 67 st.Corpus, st.corpusSeq, err = loadDB(filepath.Join(st.dir, "corpus.db"), "corpus", true) 68 if err != nil { 69 log.Fatal(err) 70 } 71 st.Repros, st.reproSeq, err = loadDB(filepath.Join(st.dir, "repro.db"), "repro", true) 72 if err != nil { 73 log.Fatal(err) 74 } 75 76 managersDir := filepath.Join(st.dir, "manager") 77 osutil.MkdirAll(managersDir) 78 managers, err := os.ReadDir(managersDir) 79 if err != nil { 80 return nil, fmt.Errorf("failed to read %v dir: %w", managersDir, err) 81 } 82 for _, manager := range managers { 83 if strings.HasSuffix(manager.Name(), purgedSuffix) { 84 continue 85 } 86 _, err := st.createManager(manager.Name()) 87 if err != nil { 88 return nil, err 89 } 90 } 91 if err := st.PurgeOldManagers(); err != nil { 92 return nil, err 93 } 94 log.Logf(0, "purging corpus...") 95 st.purgeCorpus() 96 log.Logf(0, "done, %v programs", len(st.Corpus.Records)) 97 return st, err 98 } 99 100 func (st *State) Flush() { 101 if err := st.Corpus.Flush(); err != nil { 102 log.Logf(0, "failed to flush corpus database: %v", err) 103 } 104 for _, mgr := range st.Managers { 105 if err := mgr.Corpus.Flush(); err != nil { 106 log.Logf(0, "failed to flush corpus database: %v", err) 107 } 108 } 109 } 110 111 func loadDB(file, name string, progs bool) (*db.DB, uint64, error) { 112 log.Logf(0, "reading %v...", name) 113 db, err := db.Open(file, true) 114 if err != nil { 115 return nil, 0, fmt.Errorf("failed to open %v database: %w", name, err) 116 } 117 log.Logf(0, "read %v programs", len(db.Records)) 118 var maxSeq uint64 119 for key, rec := range db.Records { 120 if progs { 121 _, ncalls, err := prog.CallSet(rec.Val) 122 if err != nil { 123 log.Logf(0, "bad file: can't parse call set: %v\n%q", err, rec.Val) 124 db.Delete(key) 125 continue 126 } 127 if ncalls > prog.MaxCalls { 128 log.Logf(0, "bad file: too many calls: %v", ncalls) 129 db.Delete(key) 130 continue 131 } 132 if sig := hash.Hash(rec.Val); sig.String() != key { 133 log.Logf(0, "bad file: hash %v, want hash %v", key, sig.String()) 134 db.Delete(key) 135 continue 136 } 137 } 138 maxSeq = max(maxSeq, rec.Seq) 139 } 140 if err := db.Flush(); err != nil { 141 return nil, 0, fmt.Errorf("failed to flush corpus database: %w", err) 142 } 143 return db, maxSeq, nil 144 } 145 146 func (st *State) createManager(name string) (*Manager, error) { 147 dir := filepath.Join(st.dir, "manager", name) 148 osutil.MkdirAll(dir) 149 mgr := &Manager{ 150 name: name, 151 dir: dir, 152 corpusFile: filepath.Join(dir, "corpus.db"), 153 corpusSeqFile: filepath.Join(dir, "seq"), 154 reproSeqFile: filepath.Join(dir, "repro.seq"), 155 domainFile: filepath.Join(dir, "domain"), 156 ownRepros: make(map[string]bool), 157 } 158 mgr.corpusSeq = loadSeqFile(mgr.corpusSeqFile) 159 st.corpusSeq = max(st.corpusSeq, mgr.corpusSeq) 160 mgr.reproSeq = loadSeqFile(mgr.reproSeqFile) 161 if mgr.reproSeq == 0 { 162 mgr.reproSeq = st.reproSeq 163 } 164 st.reproSeq = max(st.reproSeq, mgr.reproSeq) 165 domainData, _ := os.ReadFile(mgr.domainFile) 166 mgr.Domain = string(domainData) 167 corpus, _, err := loadDB(mgr.corpusFile, name, false) 168 if err != nil { 169 return nil, fmt.Errorf("failed to open manager corpus %v: %w", mgr.corpusFile, err) 170 } 171 mgr.Corpus = corpus 172 log.Logf(0, "created manager %v: domain=%v corpus=%v, corpusSeq=%v, reproSeq=%v", 173 mgr.name, mgr.Domain, len(mgr.Corpus.Records), mgr.corpusSeq, mgr.reproSeq) 174 st.Managers[name] = mgr 175 return mgr, nil 176 } 177 178 const purgedSuffix = ".purged" 179 180 func (st *State) PurgeOldManagers() error { 181 const ( 182 timeDay = 24 * time.Hour 183 purgePeriod = 30 * timeDay 184 ) 185 purgedSomething := false 186 for _, mgr := range st.Managers { 187 info, err := os.Stat(mgr.corpusSeqFile) 188 if err != nil { 189 return err 190 } 191 if time.Since(info.ModTime()) < purgePeriod { 192 continue 193 } 194 log.Logf(0, "purging manager %v as it was inactive for %v days", mgr.name, int(purgePeriod/timeDay)) 195 oldDir := mgr.dir + purgedSuffix 196 os.RemoveAll(oldDir) 197 if err := os.Rename(mgr.dir, oldDir); err != nil { 198 return err 199 } 200 delete(st.Managers, mgr.name) 201 purgedSomething = true 202 } 203 if !purgedSomething { 204 return nil 205 } 206 corpus := len(st.Corpus.Records) 207 st.purgeCorpus() 208 log.Logf(0, "reduced corpus from %v to %v programs", corpus, len(st.Corpus.Records)) 209 return nil 210 } 211 212 func (st *State) Connect(name, http, domain string, fresh bool, calls []string, corpus [][]byte) error { 213 mgr := st.Managers[name] 214 if mgr == nil { 215 var err error 216 mgr, err = st.createManager(name) 217 if err != nil { 218 return err 219 } 220 } 221 mgr.HTTP = http 222 mgr.Connected = time.Now() 223 mgr.Domain = domain 224 writeFile(mgr.domainFile, []byte(mgr.Domain)) 225 if fresh { 226 mgr.corpusSeq = 0 227 mgr.reproSeq = st.reproSeq 228 } 229 saveSeqFile(mgr.corpusSeqFile, mgr.corpusSeq) 230 saveSeqFile(mgr.reproSeqFile, mgr.reproSeq) 231 232 mgr.Calls = make(map[string]struct{}) 233 for _, c := range calls { 234 mgr.Calls[c] = struct{}{} 235 } 236 237 os.Remove(mgr.corpusFile) 238 var err error 239 mgr.Corpus, err = db.Open(mgr.corpusFile, true) 240 if err != nil { 241 log.Logf(0, "failed to open corpus database: %v", err) 242 return err 243 } 244 st.addInputs(mgr, corpus) 245 st.purgeCorpus() 246 return nil 247 } 248 249 func (st *State) Sync(name string, add [][]byte, del []string) (string, []rpctype.HubInput, int, error) { 250 mgr := st.Managers[name] 251 if mgr == nil || mgr.Connected.IsZero() { 252 return "", nil, 0, fmt.Errorf("unconnected manager %v", name) 253 } 254 if len(del) != 0 { 255 for _, sig := range del { 256 mgr.Corpus.Delete(sig) 257 } 258 if err := mgr.Corpus.Flush(); err != nil { 259 log.Logf(0, "failed to flush corpus database: %v", err) 260 } 261 st.purgeCorpus() 262 } 263 st.addInputs(mgr, add) 264 progs, more, err := st.pendingInputs(mgr) 265 mgr.Added += len(add) 266 mgr.Deleted += len(del) 267 mgr.New += len(progs) 268 // Update seq file b/c PurgeOldManagers looks at it to detect inactive managers. 269 saveSeqFile(mgr.corpusSeqFile, mgr.corpusSeq) 270 return mgr.Domain, progs, more, err 271 } 272 273 func (st *State) AddRepro(name string, repro []byte) error { 274 mgr := st.Managers[name] 275 if mgr == nil || mgr.Connected.IsZero() { 276 return fmt.Errorf("unconnected manager %v", name) 277 } 278 if _, _, err := prog.CallSet(repro); err != nil { 279 log.Logf(0, "manager %v: failed to extract call set: %v, program:\n%v", 280 mgr.name, err, string(repro)) 281 return nil 282 } 283 sig := hash.String(repro) 284 if _, ok := st.Repros.Records[sig]; ok { 285 return nil 286 } 287 mgr.ownRepros[sig] = true 288 mgr.SentRepros++ 289 if mgr.reproSeq == st.reproSeq { 290 mgr.reproSeq++ 291 saveSeqFile(mgr.reproSeqFile, mgr.reproSeq) 292 } 293 st.reproSeq++ 294 st.Repros.Save(sig, repro, st.reproSeq) 295 if err := st.Repros.Flush(); err != nil { 296 log.Logf(0, "failed to flush repro database: %v", err) 297 } 298 return nil 299 } 300 301 func (st *State) PendingRepro(name string) ([]byte, error) { 302 mgr := st.Managers[name] 303 if mgr == nil || mgr.Connected.IsZero() { 304 return nil, fmt.Errorf("unconnected manager %v", name) 305 } 306 if mgr.reproSeq == st.reproSeq { 307 return nil, nil 308 } 309 var repro []byte 310 minSeq := ^uint64(0) 311 for key, rec := range st.Repros.Records { 312 if mgr.reproSeq >= rec.Seq { 313 continue 314 } 315 if mgr.ownRepros[key] { 316 continue 317 } 318 calls, _, err := prog.CallSet(rec.Val) 319 if err != nil { 320 return nil, fmt.Errorf("failed to extract call set: %w\nprogram: %s", err, rec.Val) 321 } 322 if !managerSupportsAllCalls(mgr.Calls, calls) { 323 continue 324 } 325 if minSeq > rec.Seq { 326 minSeq = rec.Seq 327 repro = rec.Val 328 } 329 } 330 if repro == nil { 331 mgr.reproSeq = st.reproSeq 332 saveSeqFile(mgr.reproSeqFile, mgr.reproSeq) 333 return nil, nil 334 } 335 mgr.RecvRepros++ 336 mgr.reproSeq = minSeq 337 saveSeqFile(mgr.reproSeqFile, mgr.reproSeq) 338 return repro, nil 339 } 340 341 func (st *State) pendingInputs(mgr *Manager) ([]rpctype.HubInput, int, error) { 342 if mgr.corpusSeq == st.corpusSeq { 343 return nil, 0, nil 344 } 345 type Record struct { 346 Key string 347 Val []byte 348 Seq uint64 349 } 350 var records []Record 351 for key, rec := range st.Corpus.Records { 352 if mgr.corpusSeq >= rec.Seq { 353 continue 354 } 355 if _, ok := mgr.Corpus.Records[key]; ok { 356 continue 357 } 358 calls, _, err := prog.CallSet(rec.Val) 359 if err != nil { 360 return nil, 0, fmt.Errorf("failed to extract call set: %w\nprogram: %s", err, rec.Val) 361 } 362 if !managerSupportsAllCalls(mgr.Calls, calls) { 363 continue 364 } 365 records = append(records, Record{key, rec.Val, rec.Seq}) 366 } 367 maxSeq := st.corpusSeq 368 more := 0 369 const ( 370 // Send at most that many records (rounded up to next seq number). 371 maxRecords = 100 372 // If we have way too many records to send (more than capRecords), 373 // cap total number to capRecords and give up sending all. 374 // Otherwise new managers will never chew all this on a busy hub. 375 capRecords = 100000 376 ) 377 if len(records) > maxRecords { 378 sort.Slice(records, func(i, j int) bool { 379 return records[i].Seq < records[j].Seq 380 }) 381 if len(records) > capRecords { 382 records = records[len(records)-capRecords:] 383 } 384 pos := maxRecords 385 maxSeq = records[pos].Seq 386 for pos+1 < len(records) && records[pos+1].Seq == maxSeq { 387 pos++ 388 } 389 pos++ 390 more = len(records) - pos 391 records = records[:pos] 392 } 393 progs := make([]rpctype.HubInput, 0, len(records)) 394 for _, rec := range records { 395 progs = append(progs, rpctype.HubInput{ 396 Domain: st.inputDomain(rec.Key, mgr.Domain), 397 Prog: rec.Val, 398 }) 399 } 400 mgr.corpusSeq = maxSeq 401 saveSeqFile(mgr.corpusSeqFile, mgr.corpusSeq) 402 return progs, more, nil 403 } 404 405 func (st *State) inputDomain(key, self string) string { 406 domain := "" 407 for _, mgr := range st.Managers { 408 same := mgr.Domain == self 409 if !same && domain != "" { 410 continue 411 } 412 if _, ok := mgr.Corpus.Records[key]; !ok { 413 continue 414 } 415 domain = mgr.Domain 416 if same { 417 break 418 } 419 } 420 return domain 421 } 422 423 func (st *State) addInputs(mgr *Manager, inputs [][]byte) { 424 if len(inputs) == 0 { 425 return 426 } 427 st.corpusSeq++ 428 for _, input := range inputs { 429 st.addInput(mgr, input) 430 } 431 if err := mgr.Corpus.Flush(); err != nil { 432 log.Logf(0, "failed to flush corpus database: %v", err) 433 } 434 if err := st.Corpus.Flush(); err != nil { 435 log.Logf(0, "failed to flush corpus database: %v", err) 436 } 437 } 438 439 func (st *State) addInput(mgr *Manager, input []byte) { 440 _, ncalls, err := prog.CallSet(input) 441 if err != nil { 442 log.Logf(0, "manager %v: failed to extract call set: %v, program:\n%v", mgr.name, err, string(input)) 443 return 444 } 445 if want := prog.MaxCalls; ncalls > want { 446 log.Logf(0, "manager %v: too long program, ignoring (%v/%v)", mgr.name, ncalls, want) 447 return 448 } 449 sig := hash.String(input) 450 mgr.Corpus.Save(sig, nil, 0) 451 if _, ok := st.Corpus.Records[sig]; !ok { 452 st.Corpus.Save(sig, input, st.corpusSeq) 453 } 454 } 455 456 func (st *State) purgeCorpus() { 457 used := make(map[string]bool) 458 for _, mgr := range st.Managers { 459 for sig := range mgr.Corpus.Records { 460 used[sig] = true 461 } 462 } 463 for key := range st.Corpus.Records { 464 if used[key] { 465 continue 466 } 467 st.Corpus.Delete(key) 468 } 469 if err := st.Corpus.Flush(); err != nil { 470 log.Logf(0, "failed to flush corpus database: %v", err) 471 } 472 } 473 474 func managerSupportsAllCalls(mgr, prog map[string]struct{}) bool { 475 for c := range prog { 476 if _, ok := mgr[c]; !ok { 477 return false 478 } 479 } 480 return true 481 } 482 483 func writeFile(name string, data []byte) { 484 if err := osutil.WriteFile(name, data); err != nil { 485 log.Logf(0, "failed to write file %v: %v", name, err) 486 } 487 } 488 489 func saveSeqFile(filename string, seq uint64) { 490 writeFile(filename, []byte(fmt.Sprint(seq))) 491 } 492 493 func loadSeqFile(filename string) uint64 { 494 str, _ := os.ReadFile(filename) 495 seq, _ := strconv.ParseUint(string(str), 10, 64) 496 return seq 497 }