github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/syz-hub/state/state.go (about)

     1  // Copyright 2016 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package state
     5  
     6  import (
     7  	"fmt"
     8  	"os"
     9  	"path/filepath"
    10  	"sort"
    11  	"strconv"
    12  	"time"
    13  
    14  	"github.com/google/syzkaller/pkg/db"
    15  	"github.com/google/syzkaller/pkg/hash"
    16  	"github.com/google/syzkaller/pkg/log"
    17  	"github.com/google/syzkaller/pkg/osutil"
    18  	"github.com/google/syzkaller/pkg/rpctype"
    19  	"github.com/google/syzkaller/prog"
    20  )
    21  
    22  // State holds all internal syz-hub state including corpus,
    23  // reproducers and information about managers.
    24  // It is persisted to and can be restored from a directory.
    25  type State struct {
    26  	corpusSeq uint64
    27  	reproSeq  uint64
    28  	dir       string
    29  	Corpus    *db.DB
    30  	Repros    *db.DB
    31  	Managers  map[string]*Manager
    32  }
    33  
    34  // Manager represents one syz-manager instance.
    35  type Manager struct {
    36  	name          string
    37  	Domain        string
    38  	corpusSeq     uint64
    39  	reproSeq      uint64
    40  	corpusFile    string
    41  	corpusSeqFile string
    42  	reproSeqFile  string
    43  	domainFile    string
    44  	ownRepros     map[string]bool
    45  	Connected     time.Time
    46  	Added         int
    47  	Deleted       int
    48  	New           int
    49  	SentRepros    int
    50  	RecvRepros    int
    51  	Calls         map[string]struct{}
    52  	Corpus        *db.DB
    53  }
    54  
    55  // Make creates State and initializes it from dir.
    56  func Make(dir string) (*State, error) {
    57  	st := &State{
    58  		dir:      dir,
    59  		Managers: make(map[string]*Manager),
    60  	}
    61  
    62  	osutil.MkdirAll(st.dir)
    63  	var err error
    64  	st.Corpus, st.corpusSeq, err = loadDB(filepath.Join(st.dir, "corpus.db"), "corpus", true)
    65  	if err != nil {
    66  		log.Fatal(err)
    67  	}
    68  	st.Repros, st.reproSeq, err = loadDB(filepath.Join(st.dir, "repro.db"), "repro", true)
    69  	if err != nil {
    70  		log.Fatal(err)
    71  	}
    72  
    73  	managersDir := filepath.Join(st.dir, "manager")
    74  	osutil.MkdirAll(managersDir)
    75  	managers, err := os.ReadDir(managersDir)
    76  	if err != nil {
    77  		return nil, fmt.Errorf("failed to read %v dir: %w", managersDir, err)
    78  	}
    79  	for _, manager := range managers {
    80  		_, err := st.createManager(manager.Name())
    81  		if err != nil {
    82  			return nil, err
    83  		}
    84  	}
    85  	log.Logf(0, "purging corpus...")
    86  	st.purgeCorpus()
    87  	log.Logf(0, "done, %v programs", len(st.Corpus.Records))
    88  	return st, err
    89  }
    90  
    91  func (st *State) Flush() {
    92  	if err := st.Corpus.Flush(); err != nil {
    93  		log.Logf(0, "failed to flush corpus database: %v", err)
    94  	}
    95  	for _, mgr := range st.Managers {
    96  		if err := mgr.Corpus.Flush(); err != nil {
    97  			log.Logf(0, "failed to flush corpus database: %v", err)
    98  		}
    99  	}
   100  }
   101  
   102  func loadDB(file, name string, progs bool) (*db.DB, uint64, error) {
   103  	log.Logf(0, "reading %v...", name)
   104  	db, err := db.Open(file, true)
   105  	if err != nil {
   106  		return nil, 0, fmt.Errorf("failed to open %v database: %w", name, err)
   107  	}
   108  	log.Logf(0, "read %v programs", len(db.Records))
   109  	var maxSeq uint64
   110  	for key, rec := range db.Records {
   111  		if progs {
   112  			_, ncalls, err := prog.CallSet(rec.Val)
   113  			if err != nil {
   114  				log.Logf(0, "bad file: can't parse call set: %v\n%q", err, rec.Val)
   115  				db.Delete(key)
   116  				continue
   117  			}
   118  			if ncalls > prog.MaxCalls {
   119  				log.Logf(0, "bad file: too many calls: %v", ncalls)
   120  				db.Delete(key)
   121  				continue
   122  			}
   123  			if sig := hash.Hash(rec.Val); sig.String() != key {
   124  				log.Logf(0, "bad file: hash %v, want hash %v", key, sig.String())
   125  				db.Delete(key)
   126  				continue
   127  			}
   128  		}
   129  		if maxSeq < rec.Seq {
   130  			maxSeq = rec.Seq
   131  		}
   132  	}
   133  	if err := db.Flush(); err != nil {
   134  		return nil, 0, fmt.Errorf("failed to flush corpus database: %w", err)
   135  	}
   136  	return db, maxSeq, nil
   137  }
   138  
   139  func (st *State) createManager(name string) (*Manager, error) {
   140  	dir := filepath.Join(st.dir, "manager", name)
   141  	osutil.MkdirAll(dir)
   142  	mgr := &Manager{
   143  		name:          name,
   144  		corpusFile:    filepath.Join(dir, "corpus.db"),
   145  		corpusSeqFile: filepath.Join(dir, "seq"),
   146  		reproSeqFile:  filepath.Join(dir, "repro.seq"),
   147  		domainFile:    filepath.Join(dir, "domain"),
   148  		ownRepros:     make(map[string]bool),
   149  	}
   150  	mgr.corpusSeq = loadSeqFile(mgr.corpusSeqFile)
   151  	if st.corpusSeq < mgr.corpusSeq {
   152  		st.corpusSeq = mgr.corpusSeq
   153  	}
   154  	mgr.reproSeq = loadSeqFile(mgr.reproSeqFile)
   155  	if mgr.reproSeq == 0 {
   156  		mgr.reproSeq = st.reproSeq
   157  	}
   158  	if st.reproSeq < mgr.reproSeq {
   159  		st.reproSeq = mgr.reproSeq
   160  	}
   161  	domainData, _ := os.ReadFile(mgr.domainFile)
   162  	mgr.Domain = string(domainData)
   163  	corpus, _, err := loadDB(mgr.corpusFile, name, false)
   164  	if err != nil {
   165  		return nil, fmt.Errorf("failed to open manager corpus %v: %w", mgr.corpusFile, err)
   166  	}
   167  	mgr.Corpus = corpus
   168  	log.Logf(0, "created manager %v: domain=%v corpus=%v, corpusSeq=%v, reproSeq=%v",
   169  		mgr.name, mgr.Domain, len(mgr.Corpus.Records), mgr.corpusSeq, mgr.reproSeq)
   170  	st.Managers[name] = mgr
   171  	return mgr, nil
   172  }
   173  
   174  func (st *State) Connect(name, domain string, fresh bool, calls []string, corpus [][]byte) error {
   175  	mgr := st.Managers[name]
   176  	if mgr == nil {
   177  		var err error
   178  		mgr, err = st.createManager(name)
   179  		if err != nil {
   180  			return err
   181  		}
   182  	}
   183  	mgr.Connected = time.Now()
   184  	mgr.Domain = domain
   185  	writeFile(mgr.domainFile, []byte(mgr.Domain))
   186  	if fresh {
   187  		mgr.corpusSeq = 0
   188  		mgr.reproSeq = st.reproSeq
   189  	}
   190  	saveSeqFile(mgr.corpusSeqFile, mgr.corpusSeq)
   191  	saveSeqFile(mgr.reproSeqFile, mgr.reproSeq)
   192  
   193  	mgr.Calls = make(map[string]struct{})
   194  	for _, c := range calls {
   195  		mgr.Calls[c] = struct{}{}
   196  	}
   197  
   198  	os.Remove(mgr.corpusFile)
   199  	var err error
   200  	mgr.Corpus, err = db.Open(mgr.corpusFile, true)
   201  	if err != nil {
   202  		log.Logf(0, "failed to open corpus database: %v", err)
   203  		return err
   204  	}
   205  	st.addInputs(mgr, corpus)
   206  	st.purgeCorpus()
   207  	return nil
   208  }
   209  
   210  func (st *State) Sync(name string, add [][]byte, del []string) (string, []rpctype.HubInput, int, error) {
   211  	mgr := st.Managers[name]
   212  	if mgr == nil || mgr.Connected.IsZero() {
   213  		return "", nil, 0, fmt.Errorf("unconnected manager %v", name)
   214  	}
   215  	if len(del) != 0 {
   216  		for _, sig := range del {
   217  			mgr.Corpus.Delete(sig)
   218  		}
   219  		if err := mgr.Corpus.Flush(); err != nil {
   220  			log.Logf(0, "failed to flush corpus database: %v", err)
   221  		}
   222  		st.purgeCorpus()
   223  	}
   224  	st.addInputs(mgr, add)
   225  	progs, more, err := st.pendingInputs(mgr)
   226  	mgr.Added += len(add)
   227  	mgr.Deleted += len(del)
   228  	mgr.New += len(progs)
   229  	return mgr.Domain, progs, more, err
   230  }
   231  
   232  func (st *State) AddRepro(name string, repro []byte) error {
   233  	mgr := st.Managers[name]
   234  	if mgr == nil || mgr.Connected.IsZero() {
   235  		return fmt.Errorf("unconnected manager %v", name)
   236  	}
   237  	if _, _, err := prog.CallSet(repro); err != nil {
   238  		log.Logf(0, "manager %v: failed to extract call set: %v, program:\n%v",
   239  			mgr.name, err, string(repro))
   240  		return nil
   241  	}
   242  	sig := hash.String(repro)
   243  	if _, ok := st.Repros.Records[sig]; ok {
   244  		return nil
   245  	}
   246  	mgr.ownRepros[sig] = true
   247  	mgr.SentRepros++
   248  	if mgr.reproSeq == st.reproSeq {
   249  		mgr.reproSeq++
   250  		saveSeqFile(mgr.reproSeqFile, mgr.reproSeq)
   251  	}
   252  	st.reproSeq++
   253  	st.Repros.Save(sig, repro, st.reproSeq)
   254  	if err := st.Repros.Flush(); err != nil {
   255  		log.Logf(0, "failed to flush repro database: %v", err)
   256  	}
   257  	return nil
   258  }
   259  
   260  func (st *State) PendingRepro(name string) ([]byte, error) {
   261  	mgr := st.Managers[name]
   262  	if mgr == nil || mgr.Connected.IsZero() {
   263  		return nil, fmt.Errorf("unconnected manager %v", name)
   264  	}
   265  	if mgr.reproSeq == st.reproSeq {
   266  		return nil, nil
   267  	}
   268  	var repro []byte
   269  	minSeq := ^uint64(0)
   270  	for key, rec := range st.Repros.Records {
   271  		if mgr.reproSeq >= rec.Seq {
   272  			continue
   273  		}
   274  		if mgr.ownRepros[key] {
   275  			continue
   276  		}
   277  		calls, _, err := prog.CallSet(rec.Val)
   278  		if err != nil {
   279  			return nil, fmt.Errorf("failed to extract call set: %w\nprogram: %s", err, rec.Val)
   280  		}
   281  		if !managerSupportsAllCalls(mgr.Calls, calls) {
   282  			continue
   283  		}
   284  		if minSeq > rec.Seq {
   285  			minSeq = rec.Seq
   286  			repro = rec.Val
   287  		}
   288  	}
   289  	if repro == nil {
   290  		mgr.reproSeq = st.reproSeq
   291  		saveSeqFile(mgr.reproSeqFile, mgr.reproSeq)
   292  		return nil, nil
   293  	}
   294  	mgr.RecvRepros++
   295  	mgr.reproSeq = minSeq
   296  	saveSeqFile(mgr.reproSeqFile, mgr.reproSeq)
   297  	return repro, nil
   298  }
   299  
   300  func (st *State) pendingInputs(mgr *Manager) ([]rpctype.HubInput, int, error) {
   301  	if mgr.corpusSeq == st.corpusSeq {
   302  		return nil, 0, nil
   303  	}
   304  	type Record struct {
   305  		Key string
   306  		Val []byte
   307  		Seq uint64
   308  	}
   309  	var records []Record
   310  	for key, rec := range st.Corpus.Records {
   311  		if mgr.corpusSeq >= rec.Seq {
   312  			continue
   313  		}
   314  		if _, ok := mgr.Corpus.Records[key]; ok {
   315  			continue
   316  		}
   317  		calls, _, err := prog.CallSet(rec.Val)
   318  		if err != nil {
   319  			return nil, 0, fmt.Errorf("failed to extract call set: %w\nprogram: %s", err, rec.Val)
   320  		}
   321  		if !managerSupportsAllCalls(mgr.Calls, calls) {
   322  			continue
   323  		}
   324  		records = append(records, Record{key, rec.Val, rec.Seq})
   325  	}
   326  	maxSeq := st.corpusSeq
   327  	more := 0
   328  	const (
   329  		// Send at most that many records (rounded up to next seq number).
   330  		maxRecords = 100
   331  		// If we have way too many records to send (more than capRecords),
   332  		// cap total number to capRecords and give up sending all.
   333  		// Otherwise new managers will never chew all this on a busy hub.
   334  		capRecords = 100000
   335  	)
   336  	if len(records) > maxRecords {
   337  		sort.Slice(records, func(i, j int) bool {
   338  			return records[i].Seq < records[j].Seq
   339  		})
   340  		if len(records) > capRecords {
   341  			records = records[len(records)-capRecords:]
   342  		}
   343  		pos := maxRecords
   344  		maxSeq = records[pos].Seq
   345  		for pos+1 < len(records) && records[pos+1].Seq == maxSeq {
   346  			pos++
   347  		}
   348  		pos++
   349  		more = len(records) - pos
   350  		records = records[:pos]
   351  	}
   352  	progs := make([]rpctype.HubInput, 0, len(records))
   353  	for _, rec := range records {
   354  		progs = append(progs, rpctype.HubInput{
   355  			Domain: st.inputDomain(rec.Key, mgr.Domain),
   356  			Prog:   rec.Val,
   357  		})
   358  	}
   359  	mgr.corpusSeq = maxSeq
   360  	saveSeqFile(mgr.corpusSeqFile, mgr.corpusSeq)
   361  	return progs, more, nil
   362  }
   363  
   364  func (st *State) inputDomain(key, self string) string {
   365  	domain := ""
   366  	for _, mgr := range st.Managers {
   367  		same := mgr.Domain == self
   368  		if !same && domain != "" {
   369  			continue
   370  		}
   371  		if _, ok := mgr.Corpus.Records[key]; !ok {
   372  			continue
   373  		}
   374  		domain = mgr.Domain
   375  		if same {
   376  			break
   377  		}
   378  	}
   379  	return domain
   380  }
   381  
   382  func (st *State) addInputs(mgr *Manager, inputs [][]byte) {
   383  	if len(inputs) == 0 {
   384  		return
   385  	}
   386  	st.corpusSeq++
   387  	for _, input := range inputs {
   388  		st.addInput(mgr, input)
   389  	}
   390  	if err := mgr.Corpus.Flush(); err != nil {
   391  		log.Logf(0, "failed to flush corpus database: %v", err)
   392  	}
   393  	if err := st.Corpus.Flush(); err != nil {
   394  		log.Logf(0, "failed to flush corpus database: %v", err)
   395  	}
   396  }
   397  
   398  func (st *State) addInput(mgr *Manager, input []byte) {
   399  	_, ncalls, err := prog.CallSet(input)
   400  	if err != nil {
   401  		log.Logf(0, "manager %v: failed to extract call set: %v, program:\n%v", mgr.name, err, string(input))
   402  		return
   403  	}
   404  	if want := prog.MaxCalls; ncalls > want {
   405  		log.Logf(0, "manager %v: too long program, ignoring (%v/%v)", mgr.name, ncalls, want)
   406  		return
   407  	}
   408  	sig := hash.String(input)
   409  	mgr.Corpus.Save(sig, nil, 0)
   410  	if _, ok := st.Corpus.Records[sig]; !ok {
   411  		st.Corpus.Save(sig, input, st.corpusSeq)
   412  	}
   413  }
   414  
   415  func (st *State) purgeCorpus() {
   416  	used := make(map[string]bool)
   417  	for _, mgr := range st.Managers {
   418  		for sig := range mgr.Corpus.Records {
   419  			used[sig] = true
   420  		}
   421  	}
   422  	for key := range st.Corpus.Records {
   423  		if used[key] {
   424  			continue
   425  		}
   426  		st.Corpus.Delete(key)
   427  	}
   428  	if err := st.Corpus.Flush(); err != nil {
   429  		log.Logf(0, "failed to flush corpus database: %v", err)
   430  	}
   431  }
   432  
   433  func managerSupportsAllCalls(mgr, prog map[string]struct{}) bool {
   434  	for c := range prog {
   435  		if _, ok := mgr[c]; !ok {
   436  			return false
   437  		}
   438  	}
   439  	return true
   440  }
   441  
   442  func writeFile(name string, data []byte) {
   443  	if err := osutil.WriteFile(name, data); err != nil {
   444  		log.Logf(0, "failed to write file %v: %v", name, err)
   445  	}
   446  }
   447  
   448  func saveSeqFile(filename string, seq uint64) {
   449  	writeFile(filename, []byte(fmt.Sprint(seq)))
   450  }
   451  
   452  func loadSeqFile(filename string) uint64 {
   453  	str, _ := os.ReadFile(filename)
   454  	seq, _ := strconv.ParseUint(string(str), 10, 64)
   455  	return seq
   456  }