github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/syz-manager/hub.go (about)

     1  // Copyright 2018 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package main
     5  
     6  import (
     7  	"fmt"
     8  	"net/http"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/google/syzkaller/pkg/auth"
    13  	"github.com/google/syzkaller/pkg/corpus"
    14  	"github.com/google/syzkaller/pkg/flatrpc"
    15  	"github.com/google/syzkaller/pkg/fuzzer"
    16  	"github.com/google/syzkaller/pkg/log"
    17  	"github.com/google/syzkaller/pkg/manager"
    18  	"github.com/google/syzkaller/pkg/mgrconfig"
    19  	"github.com/google/syzkaller/pkg/report"
    20  	"github.com/google/syzkaller/pkg/report/crash"
    21  	"github.com/google/syzkaller/pkg/rpctype"
    22  	"github.com/google/syzkaller/pkg/stat"
    23  	"github.com/google/syzkaller/prog"
    24  )
    25  
    26  type keyGetter func() (string, error)
    27  
    28  func pickGetter(key string) keyGetter {
    29  	if key != "" {
    30  		return func() (string, error) { return key, nil }
    31  	}
    32  	// Attempts oauth when the configured hub_key is empty.
    33  	tokenCache, err := auth.MakeCache(http.NewRequest, http.DefaultClient.Do)
    34  	if err != nil {
    35  		log.Fatalf("failed to make auth cache %v", err)
    36  	}
    37  	return func() (string, error) {
    38  		return tokenCache.Get(time.Now())
    39  	}
    40  }
    41  
    42  func (mgr *Manager) hubSyncLoop(keyGet keyGetter, enabledSyscalls map[*prog.Syscall]bool) {
    43  	hc := &HubConnector{
    44  		mgr:           mgr,
    45  		cfg:           mgr.cfg,
    46  		target:        mgr.target,
    47  		domain:        mgr.cfg.TargetOS + "/" + mgr.cfg.HubDomain,
    48  		enabledCalls:  enabledSyscalls,
    49  		leak:          mgr.enabledFeatures&flatrpc.FeatureLeak != 0,
    50  		fresh:         mgr.fresh,
    51  		hubReproQueue: mgr.externalReproQueue,
    52  		keyGet:        keyGet,
    53  
    54  		statRecvProg:      stat.New("hub recv prog", "", stat.Graph("hub progs")),
    55  		statRecvProgDrop:  stat.New("hub recv prog drop", "", stat.NoGraph),
    56  		statSendRepro:     stat.New("hub send repro", "", stat.Graph("hub repros")),
    57  		statRecvRepro:     stat.New("hub recv repro", "", stat.Graph("hub repros")),
    58  		statRecvReproDrop: stat.New("hub recv repro drop", "", stat.NoGraph),
    59  	}
    60  	if mgr.cfg.Reproduce && mgr.dash != nil {
    61  		// Request reproducers from hub only if there is nothing else to reproduce.
    62  		hc.needMoreRepros = mgr.reproLoop.Empty
    63  	}
    64  	hc.loop()
    65  }
    66  
    67  type HubConnector struct {
    68  	mgr            HubManagerView
    69  	cfg            *mgrconfig.Config
    70  	target         *prog.Target
    71  	domain         string
    72  	enabledCalls   map[*prog.Syscall]bool
    73  	leak           bool
    74  	fresh          bool
    75  	newRepros      [][]byte
    76  	hubReproQueue  chan *manager.Crash
    77  	needMoreRepros func() bool
    78  	keyGet         keyGetter
    79  
    80  	statRecvProg      *stat.Val
    81  	statRecvProgDrop  *stat.Val
    82  	statSendRepro     *stat.Val
    83  	statRecvRepro     *stat.Val
    84  	statRecvReproDrop *stat.Val
    85  }
    86  
    87  // HubManagerView restricts interface between HubConnector and Manager.
    88  type HubManagerView interface {
    89  	getMinimizedCorpus() []*corpus.Item
    90  	getNewRepros() [][]byte
    91  	addNewCandidates(candidates []fuzzer.Candidate)
    92  	needMoreCandidates() bool
    93  	hubIsUnreachable()
    94  }
    95  
    96  func (hc *HubConnector) loop() {
    97  	var hub *rpctype.RPCClient
    98  	var doneOnce bool
    99  	var connectTime time.Time
   100  	for query := 0; ; time.Sleep(10 * time.Minute) {
   101  		if hub == nil {
   102  			var corpus []*corpus.Item
   103  			// If we are using fake coverage, don't send our corpus to the hub.
   104  			// It should be lower quality than coverage-guided corpus.
   105  			// However still send repros and accept new inputs.
   106  			if hc.cfg.Cover {
   107  				corpus = hc.mgr.getMinimizedCorpus()
   108  			}
   109  			var err error
   110  			if hub, err = hc.connect(corpus); err != nil {
   111  				log.Logf(0, "failed to connect to hub at %v: %v", hc.cfg.HubAddr, err)
   112  			} else {
   113  				log.Logf(0, "connected to hub at %v, corpus %v", hc.cfg.HubAddr, len(corpus))
   114  				connectTime = time.Now()
   115  			}
   116  		}
   117  		if hub != nil && hc.mgr.needMoreCandidates() {
   118  			repros := hc.mgr.getNewRepros()
   119  			hc.newRepros = append(hc.newRepros, repros...)
   120  			if err := hc.sync(hub); err != nil {
   121  				log.Logf(0, "hub sync failed: %v", err)
   122  				hub.Close()
   123  				hub = nil
   124  			} else {
   125  				doneOnce = true
   126  			}
   127  		}
   128  		query++
   129  		const maxAttempts = 3
   130  		if hub == nil && query >= maxAttempts && !doneOnce {
   131  			hc.mgr.hubIsUnreachable()
   132  		}
   133  		// We used to send corpus updates (added/removed elements) to the hub in each sync.
   134  		// But that produced too much churn since hub algorithm is O(N^2) (distributing everything
   135  		// to everybody), and lots of new inputs are later removed (either we can't reproduce coverage
   136  		// after restart, or inputs removed during corpus minimization). So now we don't send new inputs
   137  		// in each sync, instead we aim at sending corpus once after initial triage. This solves
   138  		// the problem with non-reproducible/removed inputs. Typical instance life-time on syzbot is <24h,
   139  		// for such instances we send the corpus once. If an instance somehow lives for longer, then we
   140  		// re-connect and re-send once in a while (e.g. a local long-running instance).
   141  		if hub != nil && time.Since(connectTime) > 30*time.Hour {
   142  			log.Logf(0, "re-syncing with hub")
   143  			hub.Close()
   144  			hub = nil
   145  		}
   146  	}
   147  }
   148  
   149  func (hc *HubConnector) connect(corpus []*corpus.Item) (*rpctype.RPCClient, error) {
   150  	key, err := hc.keyGet()
   151  	if err != nil {
   152  		return nil, err
   153  	}
   154  	hub, err := rpctype.NewRPCClient(hc.cfg.HubAddr)
   155  	if err != nil {
   156  		return nil, err
   157  	}
   158  	http := publicWebAddr(hc.cfg.HTTP)
   159  	a := &rpctype.HubConnectArgs{
   160  		Client:  hc.cfg.HubClient,
   161  		Key:     key,
   162  		Manager: hc.cfg.Name,
   163  		HTTP:    http,
   164  		Domain:  hc.domain,
   165  		Fresh:   hc.fresh,
   166  	}
   167  	for call := range hc.enabledCalls {
   168  		a.Calls = append(a.Calls, call.Name)
   169  	}
   170  	for _, inp := range corpus {
   171  		a.Corpus = append(a.Corpus, inp.Prog.Serialize())
   172  	}
   173  	// Never send more than this, this is never healthy but happens episodically
   174  	// due to various reasons: problems with fallback coverage, bugs in kcov,
   175  	// fuzzer exploiting our infrastructure, etc.
   176  	const max = 100 * 1000
   177  	if len(a.Corpus) > max {
   178  		a.Corpus = a.Corpus[:max]
   179  	}
   180  	err = hub.Call("Hub.Connect", a, nil)
   181  	// Hub.Connect request can be very large, so do it on a transient connection
   182  	// (rpc connection buffers never shrink).
   183  	hub.Close()
   184  	if err != nil {
   185  		return nil, err
   186  	}
   187  	hub, err = rpctype.NewRPCClient(hc.cfg.HubAddr)
   188  	if err != nil {
   189  		return nil, err
   190  	}
   191  	hc.fresh = false
   192  	return hub, nil
   193  }
   194  
   195  func (hc *HubConnector) sync(hub *rpctype.RPCClient) error {
   196  	key, err := hc.keyGet()
   197  	if err != nil {
   198  		return err
   199  	}
   200  	a := &rpctype.HubSyncArgs{
   201  		Client:  hc.cfg.HubClient,
   202  		Key:     key,
   203  		Manager: hc.cfg.Name,
   204  	}
   205  	if hc.needMoreRepros != nil {
   206  		a.NeedRepros = hc.needMoreRepros()
   207  	}
   208  	a.Repros = hc.newRepros
   209  	for {
   210  		r := new(rpctype.HubSyncRes)
   211  		if err := hub.Call("Hub.Sync", a, r); err != nil {
   212  			return err
   213  		}
   214  		minimized, smashed, progDropped := hc.processProgs(r.Inputs)
   215  		reproDropped := hc.processRepros(r.Repros)
   216  		hc.statSendRepro.Add(len(a.Repros))
   217  		hc.statRecvProg.Add(len(r.Inputs) - progDropped)
   218  		hc.statRecvProgDrop.Add(progDropped)
   219  		hc.statRecvRepro.Add(len(r.Repros) - reproDropped)
   220  		hc.statRecvReproDrop.Add(reproDropped)
   221  		log.Logf(0, "hub sync: repros %v;"+
   222  			" recv: progs %v (min %v, smash %v), repros %v; more %v",
   223  			len(a.Repros), len(r.Inputs)-progDropped, minimized, smashed,
   224  			len(r.Repros)-reproDropped, r.More)
   225  		a.Add = nil
   226  		a.Del = nil
   227  		a.Repros = nil
   228  		a.NeedRepros = false
   229  		hc.newRepros = nil
   230  		if len(r.Inputs)+r.More == 0 {
   231  			return nil
   232  		}
   233  	}
   234  }
   235  
   236  func (hc *HubConnector) processProgs(inputs []rpctype.HubInput) (minimized, smashed, dropped int) {
   237  	candidates := make([]fuzzer.Candidate, 0, len(inputs))
   238  	for _, inp := range inputs {
   239  		p, err := hc.parseProgram(inp.Prog)
   240  		if err != nil {
   241  			log.Logf(0, "rejecting program from hub: %v\n%s", err, inp.Prog)
   242  			dropped++
   243  			continue
   244  		}
   245  		min, smash := matchDomains(hc.domain, inp.Domain)
   246  		var flags fuzzer.ProgFlags
   247  		if min && len(p.Calls) < manager.ReminimizeThreshold {
   248  			minimized++
   249  			flags |= fuzzer.ProgMinimized
   250  		}
   251  		if smash {
   252  			smashed++
   253  			flags |= fuzzer.ProgSmashed
   254  		}
   255  		candidates = append(candidates, fuzzer.Candidate{
   256  			Prog:  p,
   257  			Flags: flags,
   258  		})
   259  	}
   260  	hc.mgr.addNewCandidates(candidates)
   261  	return
   262  }
   263  
   264  func matchDomains(self, input string) (bool, bool) {
   265  	if self == "" || input == "" {
   266  		return true, true
   267  	}
   268  	min0, smash0 := splitDomains(self)
   269  	min1, smash1 := splitDomains(input)
   270  	min := min0 != min1
   271  	smash := min || smash0 != smash1
   272  	return min, smash
   273  }
   274  
   275  func splitDomains(domain string) (string, string) {
   276  	delim0 := strings.IndexByte(domain, '/')
   277  	if delim0 == -1 {
   278  		return domain, ""
   279  	}
   280  	if delim0 == len(domain)-1 {
   281  		return domain[:delim0], ""
   282  	}
   283  	delim1 := strings.IndexByte(domain[delim0+1:], '/')
   284  	if delim1 == -1 {
   285  		return domain, ""
   286  	}
   287  	return domain[:delim0+delim1+1], domain[delim0+delim1+2:]
   288  }
   289  
   290  func (hc *HubConnector) processRepros(repros [][]byte) int {
   291  	dropped := 0
   292  	for _, repro := range repros {
   293  		_, err := hc.parseProgram(repro)
   294  		if err != nil {
   295  			log.Logf(0, "rejecting repro from hub: %v\n%s", err, repro)
   296  			dropped++
   297  			continue
   298  		}
   299  		// On a leak instance we override repro type to leak,
   300  		// because otherwise repro package won't even enable leak detection
   301  		// and we won't reproduce leaks from other instances.
   302  		typ := crash.UnknownType
   303  		if hc.leak {
   304  			typ = crash.MemoryLeak
   305  		}
   306  		hc.hubReproQueue <- &manager.Crash{
   307  			FromHub: true,
   308  			Report: &report.Report{
   309  				Type:   typ,
   310  				Output: repro,
   311  			},
   312  		}
   313  	}
   314  	return dropped
   315  }
   316  
   317  func (hc *HubConnector) parseProgram(data []byte) (*prog.Prog, error) {
   318  	p, err := manager.ParseSeed(hc.target, data)
   319  	if err != nil {
   320  		return nil, err
   321  	}
   322  	if !p.OnlyContains(hc.enabledCalls) {
   323  		return nil, fmt.Errorf("contains disabled calls")
   324  	}
   325  	return p, nil
   326  }