github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/syz-hub/state/state.go (about)

     1  // Copyright 2016 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package state
     5  
     6  import (
     7  	"fmt"
     8  	"os"
     9  	"path/filepath"
    10  	"sort"
    11  	"strconv"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/google/syzkaller/pkg/db"
    16  	"github.com/google/syzkaller/pkg/hash"
    17  	"github.com/google/syzkaller/pkg/log"
    18  	"github.com/google/syzkaller/pkg/osutil"
    19  	"github.com/google/syzkaller/pkg/rpctype"
    20  	"github.com/google/syzkaller/prog"
    21  )
    22  
    23  // State holds all internal syz-hub state including corpus,
    24  // reproducers and information about managers.
    25  // It is persisted to and can be restored from a directory.
    26  type State struct {
    27  	corpusSeq uint64
    28  	reproSeq  uint64
    29  	dir       string
    30  	Corpus    *db.DB
    31  	Repros    *db.DB
    32  	Managers  map[string]*Manager
    33  }
    34  
    35  // Manager represents one syz-manager instance.
    36  type Manager struct {
    37  	name          string
    38  	dir           string
    39  	HTTP          string
    40  	Domain        string
    41  	corpusSeq     uint64
    42  	reproSeq      uint64
    43  	corpusFile    string
    44  	corpusSeqFile string
    45  	reproSeqFile  string
    46  	domainFile    string
    47  	ownRepros     map[string]bool
    48  	Connected     time.Time
    49  	Added         int
    50  	Deleted       int
    51  	New           int
    52  	SentRepros    int
    53  	RecvRepros    int
    54  	Calls         map[string]struct{}
    55  	Corpus        *db.DB
    56  }
    57  
    58  // Make creates State and initializes it from dir.
    59  func Make(dir string) (*State, error) {
    60  	st := &State{
    61  		dir:      dir,
    62  		Managers: make(map[string]*Manager),
    63  	}
    64  
    65  	osutil.MkdirAll(st.dir)
    66  	var err error
    67  	st.Corpus, st.corpusSeq, err = loadDB(filepath.Join(st.dir, "corpus.db"), "corpus", true)
    68  	if err != nil {
    69  		log.Fatal(err)
    70  	}
    71  	st.Repros, st.reproSeq, err = loadDB(filepath.Join(st.dir, "repro.db"), "repro", true)
    72  	if err != nil {
    73  		log.Fatal(err)
    74  	}
    75  
    76  	managersDir := filepath.Join(st.dir, "manager")
    77  	osutil.MkdirAll(managersDir)
    78  	managers, err := os.ReadDir(managersDir)
    79  	if err != nil {
    80  		return nil, fmt.Errorf("failed to read %v dir: %w", managersDir, err)
    81  	}
    82  	for _, manager := range managers {
    83  		if strings.HasSuffix(manager.Name(), purgedSuffix) {
    84  			continue
    85  		}
    86  		_, err := st.createManager(manager.Name())
    87  		if err != nil {
    88  			return nil, err
    89  		}
    90  	}
    91  	if err := st.PurgeOldManagers(); err != nil {
    92  		return nil, err
    93  	}
    94  	log.Logf(0, "purging corpus...")
    95  	st.purgeCorpus()
    96  	log.Logf(0, "done, %v programs", len(st.Corpus.Records))
    97  	return st, err
    98  }
    99  
   100  func (st *State) Flush() {
   101  	if err := st.Corpus.Flush(); err != nil {
   102  		log.Logf(0, "failed to flush corpus database: %v", err)
   103  	}
   104  	for _, mgr := range st.Managers {
   105  		if err := mgr.Corpus.Flush(); err != nil {
   106  			log.Logf(0, "failed to flush corpus database: %v", err)
   107  		}
   108  	}
   109  }
   110  
   111  func loadDB(file, name string, progs bool) (*db.DB, uint64, error) {
   112  	log.Logf(0, "reading %v...", name)
   113  	db, err := db.Open(file, true)
   114  	if err != nil {
   115  		return nil, 0, fmt.Errorf("failed to open %v database: %w", name, err)
   116  	}
   117  	log.Logf(0, "read %v programs", len(db.Records))
   118  	var maxSeq uint64
   119  	for key, rec := range db.Records {
   120  		if progs {
   121  			_, ncalls, err := prog.CallSet(rec.Val)
   122  			if err != nil {
   123  				log.Logf(0, "bad file: can't parse call set: %v\n%q", err, rec.Val)
   124  				db.Delete(key)
   125  				continue
   126  			}
   127  			if ncalls > prog.MaxCalls {
   128  				log.Logf(0, "bad file: too many calls: %v", ncalls)
   129  				db.Delete(key)
   130  				continue
   131  			}
   132  			if sig := hash.Hash(rec.Val); sig.String() != key {
   133  				log.Logf(0, "bad file: hash %v, want hash %v", key, sig.String())
   134  				db.Delete(key)
   135  				continue
   136  			}
   137  		}
   138  		maxSeq = max(maxSeq, rec.Seq)
   139  	}
   140  	if err := db.Flush(); err != nil {
   141  		return nil, 0, fmt.Errorf("failed to flush corpus database: %w", err)
   142  	}
   143  	return db, maxSeq, nil
   144  }
   145  
   146  func (st *State) createManager(name string) (*Manager, error) {
   147  	dir := filepath.Join(st.dir, "manager", name)
   148  	osutil.MkdirAll(dir)
   149  	mgr := &Manager{
   150  		name:          name,
   151  		dir:           dir,
   152  		corpusFile:    filepath.Join(dir, "corpus.db"),
   153  		corpusSeqFile: filepath.Join(dir, "seq"),
   154  		reproSeqFile:  filepath.Join(dir, "repro.seq"),
   155  		domainFile:    filepath.Join(dir, "domain"),
   156  		ownRepros:     make(map[string]bool),
   157  	}
   158  	mgr.corpusSeq = loadSeqFile(mgr.corpusSeqFile)
   159  	st.corpusSeq = max(st.corpusSeq, mgr.corpusSeq)
   160  	mgr.reproSeq = loadSeqFile(mgr.reproSeqFile)
   161  	if mgr.reproSeq == 0 {
   162  		mgr.reproSeq = st.reproSeq
   163  	}
   164  	st.reproSeq = max(st.reproSeq, mgr.reproSeq)
   165  	domainData, _ := os.ReadFile(mgr.domainFile)
   166  	mgr.Domain = string(domainData)
   167  	corpus, _, err := loadDB(mgr.corpusFile, name, false)
   168  	if err != nil {
   169  		return nil, fmt.Errorf("failed to open manager corpus %v: %w", mgr.corpusFile, err)
   170  	}
   171  	mgr.Corpus = corpus
   172  	log.Logf(0, "created manager %v: domain=%v corpus=%v, corpusSeq=%v, reproSeq=%v",
   173  		mgr.name, mgr.Domain, len(mgr.Corpus.Records), mgr.corpusSeq, mgr.reproSeq)
   174  	st.Managers[name] = mgr
   175  	return mgr, nil
   176  }
   177  
   178  const purgedSuffix = ".purged"
   179  
   180  func (st *State) PurgeOldManagers() error {
   181  	const (
   182  		timeDay     = 24 * time.Hour
   183  		purgePeriod = 30 * timeDay
   184  	)
   185  	purgedSomething := false
   186  	for _, mgr := range st.Managers {
   187  		info, err := os.Stat(mgr.corpusSeqFile)
   188  		if err != nil {
   189  			return err
   190  		}
   191  		if time.Since(info.ModTime()) < purgePeriod {
   192  			continue
   193  		}
   194  		log.Logf(0, "purging manager %v as it was inactive for %v days", mgr.name, int(purgePeriod/timeDay))
   195  		oldDir := mgr.dir + purgedSuffix
   196  		os.RemoveAll(oldDir)
   197  		if err := os.Rename(mgr.dir, oldDir); err != nil {
   198  			return err
   199  		}
   200  		delete(st.Managers, mgr.name)
   201  		purgedSomething = true
   202  	}
   203  	if !purgedSomething {
   204  		return nil
   205  	}
   206  	corpus := len(st.Corpus.Records)
   207  	st.purgeCorpus()
   208  	log.Logf(0, "reduced corpus from %v to %v programs", corpus, len(st.Corpus.Records))
   209  	return nil
   210  }
   211  
   212  func (st *State) Connect(name, http, domain string, fresh bool, calls []string, corpus [][]byte) error {
   213  	mgr := st.Managers[name]
   214  	if mgr == nil {
   215  		var err error
   216  		mgr, err = st.createManager(name)
   217  		if err != nil {
   218  			return err
   219  		}
   220  	}
   221  	mgr.HTTP = http
   222  	mgr.Connected = time.Now()
   223  	mgr.Domain = domain
   224  	writeFile(mgr.domainFile, []byte(mgr.Domain))
   225  	if fresh {
   226  		mgr.corpusSeq = 0
   227  		mgr.reproSeq = st.reproSeq
   228  	}
   229  	saveSeqFile(mgr.corpusSeqFile, mgr.corpusSeq)
   230  	saveSeqFile(mgr.reproSeqFile, mgr.reproSeq)
   231  
   232  	mgr.Calls = make(map[string]struct{})
   233  	for _, c := range calls {
   234  		mgr.Calls[c] = struct{}{}
   235  	}
   236  
   237  	os.Remove(mgr.corpusFile)
   238  	var err error
   239  	mgr.Corpus, err = db.Open(mgr.corpusFile, true)
   240  	if err != nil {
   241  		log.Logf(0, "failed to open corpus database: %v", err)
   242  		return err
   243  	}
   244  	st.addInputs(mgr, corpus)
   245  	st.purgeCorpus()
   246  	return nil
   247  }
   248  
   249  func (st *State) Sync(name string, add [][]byte, del []string) (string, []rpctype.HubInput, int, error) {
   250  	mgr := st.Managers[name]
   251  	if mgr == nil || mgr.Connected.IsZero() {
   252  		return "", nil, 0, fmt.Errorf("unconnected manager %v", name)
   253  	}
   254  	if len(del) != 0 {
   255  		for _, sig := range del {
   256  			mgr.Corpus.Delete(sig)
   257  		}
   258  		if err := mgr.Corpus.Flush(); err != nil {
   259  			log.Logf(0, "failed to flush corpus database: %v", err)
   260  		}
   261  		st.purgeCorpus()
   262  	}
   263  	st.addInputs(mgr, add)
   264  	progs, more, err := st.pendingInputs(mgr)
   265  	mgr.Added += len(add)
   266  	mgr.Deleted += len(del)
   267  	mgr.New += len(progs)
   268  	// Update seq file b/c PurgeOldManagers looks at it to detect inactive managers.
   269  	saveSeqFile(mgr.corpusSeqFile, mgr.corpusSeq)
   270  	return mgr.Domain, progs, more, err
   271  }
   272  
   273  func (st *State) AddRepro(name string, repro []byte) error {
   274  	mgr := st.Managers[name]
   275  	if mgr == nil || mgr.Connected.IsZero() {
   276  		return fmt.Errorf("unconnected manager %v", name)
   277  	}
   278  	if _, _, err := prog.CallSet(repro); err != nil {
   279  		log.Logf(0, "manager %v: failed to extract call set: %v, program:\n%v",
   280  			mgr.name, err, string(repro))
   281  		return nil
   282  	}
   283  	sig := hash.String(repro)
   284  	if _, ok := st.Repros.Records[sig]; ok {
   285  		return nil
   286  	}
   287  	mgr.ownRepros[sig] = true
   288  	mgr.SentRepros++
   289  	if mgr.reproSeq == st.reproSeq {
   290  		mgr.reproSeq++
   291  		saveSeqFile(mgr.reproSeqFile, mgr.reproSeq)
   292  	}
   293  	st.reproSeq++
   294  	st.Repros.Save(sig, repro, st.reproSeq)
   295  	if err := st.Repros.Flush(); err != nil {
   296  		log.Logf(0, "failed to flush repro database: %v", err)
   297  	}
   298  	return nil
   299  }
   300  
   301  func (st *State) PendingRepro(name string) ([]byte, error) {
   302  	mgr := st.Managers[name]
   303  	if mgr == nil || mgr.Connected.IsZero() {
   304  		return nil, fmt.Errorf("unconnected manager %v", name)
   305  	}
   306  	if mgr.reproSeq == st.reproSeq {
   307  		return nil, nil
   308  	}
   309  	var repro []byte
   310  	minSeq := ^uint64(0)
   311  	for key, rec := range st.Repros.Records {
   312  		if mgr.reproSeq >= rec.Seq {
   313  			continue
   314  		}
   315  		if mgr.ownRepros[key] {
   316  			continue
   317  		}
   318  		calls, _, err := prog.CallSet(rec.Val)
   319  		if err != nil {
   320  			return nil, fmt.Errorf("failed to extract call set: %w\nprogram: %s", err, rec.Val)
   321  		}
   322  		if !managerSupportsAllCalls(mgr.Calls, calls) {
   323  			continue
   324  		}
   325  		if minSeq > rec.Seq {
   326  			minSeq = rec.Seq
   327  			repro = rec.Val
   328  		}
   329  	}
   330  	if repro == nil {
   331  		mgr.reproSeq = st.reproSeq
   332  		saveSeqFile(mgr.reproSeqFile, mgr.reproSeq)
   333  		return nil, nil
   334  	}
   335  	mgr.RecvRepros++
   336  	mgr.reproSeq = minSeq
   337  	saveSeqFile(mgr.reproSeqFile, mgr.reproSeq)
   338  	return repro, nil
   339  }
   340  
   341  func (st *State) pendingInputs(mgr *Manager) ([]rpctype.HubInput, int, error) {
   342  	if mgr.corpusSeq == st.corpusSeq {
   343  		return nil, 0, nil
   344  	}
   345  	type Record struct {
   346  		Key string
   347  		Val []byte
   348  		Seq uint64
   349  	}
   350  	var records []Record
   351  	for key, rec := range st.Corpus.Records {
   352  		if mgr.corpusSeq >= rec.Seq {
   353  			continue
   354  		}
   355  		if _, ok := mgr.Corpus.Records[key]; ok {
   356  			continue
   357  		}
   358  		calls, _, err := prog.CallSet(rec.Val)
   359  		if err != nil {
   360  			return nil, 0, fmt.Errorf("failed to extract call set: %w\nprogram: %s", err, rec.Val)
   361  		}
   362  		if !managerSupportsAllCalls(mgr.Calls, calls) {
   363  			continue
   364  		}
   365  		records = append(records, Record{key, rec.Val, rec.Seq})
   366  	}
   367  	maxSeq := st.corpusSeq
   368  	more := 0
   369  	const (
   370  		// Send at most that many records (rounded up to next seq number).
   371  		maxRecords = 100
   372  		// If we have way too many records to send (more than capRecords),
   373  		// cap total number to capRecords and give up sending all.
   374  		// Otherwise new managers will never chew all this on a busy hub.
   375  		capRecords = 100000
   376  	)
   377  	if len(records) > maxRecords {
   378  		sort.Slice(records, func(i, j int) bool {
   379  			return records[i].Seq < records[j].Seq
   380  		})
   381  		if len(records) > capRecords {
   382  			records = records[len(records)-capRecords:]
   383  		}
   384  		pos := maxRecords
   385  		maxSeq = records[pos].Seq
   386  		for pos+1 < len(records) && records[pos+1].Seq == maxSeq {
   387  			pos++
   388  		}
   389  		pos++
   390  		more = len(records) - pos
   391  		records = records[:pos]
   392  	}
   393  	progs := make([]rpctype.HubInput, 0, len(records))
   394  	for _, rec := range records {
   395  		progs = append(progs, rpctype.HubInput{
   396  			Domain: st.inputDomain(rec.Key, mgr.Domain),
   397  			Prog:   rec.Val,
   398  		})
   399  	}
   400  	mgr.corpusSeq = maxSeq
   401  	saveSeqFile(mgr.corpusSeqFile, mgr.corpusSeq)
   402  	return progs, more, nil
   403  }
   404  
   405  func (st *State) inputDomain(key, self string) string {
   406  	domain := ""
   407  	for _, mgr := range st.Managers {
   408  		same := mgr.Domain == self
   409  		if !same && domain != "" {
   410  			continue
   411  		}
   412  		if _, ok := mgr.Corpus.Records[key]; !ok {
   413  			continue
   414  		}
   415  		domain = mgr.Domain
   416  		if same {
   417  			break
   418  		}
   419  	}
   420  	return domain
   421  }
   422  
   423  func (st *State) addInputs(mgr *Manager, inputs [][]byte) {
   424  	if len(inputs) == 0 {
   425  		return
   426  	}
   427  	st.corpusSeq++
   428  	for _, input := range inputs {
   429  		st.addInput(mgr, input)
   430  	}
   431  	if err := mgr.Corpus.Flush(); err != nil {
   432  		log.Logf(0, "failed to flush corpus database: %v", err)
   433  	}
   434  	if err := st.Corpus.Flush(); err != nil {
   435  		log.Logf(0, "failed to flush corpus database: %v", err)
   436  	}
   437  }
   438  
   439  func (st *State) addInput(mgr *Manager, input []byte) {
   440  	_, ncalls, err := prog.CallSet(input)
   441  	if err != nil {
   442  		log.Logf(0, "manager %v: failed to extract call set: %v, program:\n%v", mgr.name, err, string(input))
   443  		return
   444  	}
   445  	if want := prog.MaxCalls; ncalls > want {
   446  		log.Logf(0, "manager %v: too long program, ignoring (%v/%v)", mgr.name, ncalls, want)
   447  		return
   448  	}
   449  	sig := hash.String(input)
   450  	mgr.Corpus.Save(sig, nil, 0)
   451  	if _, ok := st.Corpus.Records[sig]; !ok {
   452  		st.Corpus.Save(sig, input, st.corpusSeq)
   453  	}
   454  }
   455  
   456  func (st *State) purgeCorpus() {
   457  	used := make(map[string]bool)
   458  	for _, mgr := range st.Managers {
   459  		for sig := range mgr.Corpus.Records {
   460  			used[sig] = true
   461  		}
   462  	}
   463  	for key := range st.Corpus.Records {
   464  		if used[key] {
   465  			continue
   466  		}
   467  		st.Corpus.Delete(key)
   468  	}
   469  	if err := st.Corpus.Flush(); err != nil {
   470  		log.Logf(0, "failed to flush corpus database: %v", err)
   471  	}
   472  }
   473  
   474  func managerSupportsAllCalls(mgr, prog map[string]struct{}) bool {
   475  	for c := range prog {
   476  		if _, ok := mgr[c]; !ok {
   477  			return false
   478  		}
   479  	}
   480  	return true
   481  }
   482  
   483  func writeFile(name string, data []byte) {
   484  	if err := osutil.WriteFile(name, data); err != nil {
   485  		log.Logf(0, "failed to write file %v: %v", name, err)
   486  	}
   487  }
   488  
   489  func saveSeqFile(filename string, seq uint64) {
   490  	writeFile(filename, []byte(fmt.Sprint(seq)))
   491  }
   492  
   493  func loadSeqFile(filename string) uint64 {
   494  	str, _ := os.ReadFile(filename)
   495  	seq, _ := strconv.ParseUint(string(str), 10, 64)
   496  	return seq
   497  }