github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/syz-manager/hub.go (about) 1 // Copyright 2018 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package main 5 6 import ( 7 "fmt" 8 "net/http" 9 "strings" 10 "time" 11 12 "github.com/google/syzkaller/pkg/auth" 13 "github.com/google/syzkaller/pkg/corpus" 14 "github.com/google/syzkaller/pkg/flatrpc" 15 "github.com/google/syzkaller/pkg/fuzzer" 16 "github.com/google/syzkaller/pkg/log" 17 "github.com/google/syzkaller/pkg/manager" 18 "github.com/google/syzkaller/pkg/mgrconfig" 19 "github.com/google/syzkaller/pkg/report" 20 "github.com/google/syzkaller/pkg/report/crash" 21 "github.com/google/syzkaller/pkg/rpctype" 22 "github.com/google/syzkaller/pkg/stat" 23 "github.com/google/syzkaller/prog" 24 ) 25 26 type keyGetter func() (string, error) 27 28 func pickGetter(key string) keyGetter { 29 if key != "" { 30 return func() (string, error) { return key, nil } 31 } 32 // Attempts oauth when the configured hub_key is empty. 33 tokenCache, err := auth.MakeCache(http.NewRequest, http.DefaultClient.Do) 34 if err != nil { 35 log.Fatalf("failed to make auth cache %v", err) 36 } 37 return func() (string, error) { 38 return tokenCache.Get(time.Now()) 39 } 40 } 41 42 func (mgr *Manager) hubSyncLoop(keyGet keyGetter, enabledSyscalls map[*prog.Syscall]bool) { 43 hc := &HubConnector{ 44 mgr: mgr, 45 cfg: mgr.cfg, 46 target: mgr.target, 47 domain: mgr.cfg.TargetOS + "/" + mgr.cfg.HubDomain, 48 enabledCalls: enabledSyscalls, 49 leak: mgr.enabledFeatures&flatrpc.FeatureLeak != 0, 50 fresh: mgr.fresh, 51 hubReproQueue: mgr.externalReproQueue, 52 keyGet: keyGet, 53 54 statRecvProg: stat.New("hub recv prog", "", stat.Graph("hub progs")), 55 statRecvProgDrop: stat.New("hub recv prog drop", "", stat.NoGraph), 56 statSendRepro: stat.New("hub send repro", "", stat.Graph("hub repros")), 57 statRecvRepro: stat.New("hub recv repro", "", stat.Graph("hub repros")), 58 statRecvReproDrop: stat.New("hub recv repro drop", "", stat.NoGraph), 59 } 60 if mgr.cfg.Reproduce && mgr.dash != nil { 61 // Request reproducers from hub only if there is nothing else to reproduce. 62 hc.needMoreRepros = mgr.reproLoop.Empty 63 } 64 hc.loop() 65 } 66 67 type HubConnector struct { 68 mgr HubManagerView 69 cfg *mgrconfig.Config 70 target *prog.Target 71 domain string 72 enabledCalls map[*prog.Syscall]bool 73 leak bool 74 fresh bool 75 newRepros [][]byte 76 hubReproQueue chan *manager.Crash 77 needMoreRepros func() bool 78 keyGet keyGetter 79 80 statRecvProg *stat.Val 81 statRecvProgDrop *stat.Val 82 statSendRepro *stat.Val 83 statRecvRepro *stat.Val 84 statRecvReproDrop *stat.Val 85 } 86 87 // HubManagerView restricts interface between HubConnector and Manager. 88 type HubManagerView interface { 89 getMinimizedCorpus() []*corpus.Item 90 getNewRepros() [][]byte 91 addNewCandidates(candidates []fuzzer.Candidate) 92 needMoreCandidates() bool 93 hubIsUnreachable() 94 } 95 96 func (hc *HubConnector) loop() { 97 var hub *rpctype.RPCClient 98 var doneOnce bool 99 var connectTime time.Time 100 for query := 0; ; time.Sleep(10 * time.Minute) { 101 if hub == nil { 102 var corpus []*corpus.Item 103 // If we are using fake coverage, don't send our corpus to the hub. 104 // It should be lower quality than coverage-guided corpus. 105 // However still send repros and accept new inputs. 106 if hc.cfg.Cover { 107 corpus = hc.mgr.getMinimizedCorpus() 108 } 109 var err error 110 if hub, err = hc.connect(corpus); err != nil { 111 log.Logf(0, "failed to connect to hub at %v: %v", hc.cfg.HubAddr, err) 112 } else { 113 log.Logf(0, "connected to hub at %v, corpus %v", hc.cfg.HubAddr, len(corpus)) 114 connectTime = time.Now() 115 } 116 } 117 if hub != nil && hc.mgr.needMoreCandidates() { 118 repros := hc.mgr.getNewRepros() 119 hc.newRepros = append(hc.newRepros, repros...) 120 if err := hc.sync(hub); err != nil { 121 log.Logf(0, "hub sync failed: %v", err) 122 hub.Close() 123 hub = nil 124 } else { 125 doneOnce = true 126 } 127 } 128 query++ 129 const maxAttempts = 3 130 if hub == nil && query >= maxAttempts && !doneOnce { 131 hc.mgr.hubIsUnreachable() 132 } 133 // We used to send corpus updates (added/removed elements) to the hub in each sync. 134 // But that produced too much churn since hub algorithm is O(N^2) (distributing everything 135 // to everybody), and lots of new inputs are later removed (either we can't reproduce coverage 136 // after restart, or inputs removed during corpus minimization). So now we don't send new inputs 137 // in each sync, instead we aim at sending corpus once after initial triage. This solves 138 // the problem with non-reproducible/removed inputs. Typical instance life-time on syzbot is <24h, 139 // for such instances we send the corpus once. If an instance somehow lives for longer, then we 140 // re-connect and re-send once in a while (e.g. a local long-running instance). 141 if hub != nil && time.Since(connectTime) > 30*time.Hour { 142 log.Logf(0, "re-syncing with hub") 143 hub.Close() 144 hub = nil 145 } 146 } 147 } 148 149 func (hc *HubConnector) connect(corpus []*corpus.Item) (*rpctype.RPCClient, error) { 150 key, err := hc.keyGet() 151 if err != nil { 152 return nil, err 153 } 154 hub, err := rpctype.NewRPCClient(hc.cfg.HubAddr) 155 if err != nil { 156 return nil, err 157 } 158 http := publicWebAddr(hc.cfg.HTTP) 159 a := &rpctype.HubConnectArgs{ 160 Client: hc.cfg.HubClient, 161 Key: key, 162 Manager: hc.cfg.Name, 163 HTTP: http, 164 Domain: hc.domain, 165 Fresh: hc.fresh, 166 } 167 for call := range hc.enabledCalls { 168 a.Calls = append(a.Calls, call.Name) 169 } 170 for _, inp := range corpus { 171 a.Corpus = append(a.Corpus, inp.Prog.Serialize()) 172 } 173 // Never send more than this, this is never healthy but happens episodically 174 // due to various reasons: problems with fallback coverage, bugs in kcov, 175 // fuzzer exploiting our infrastructure, etc. 176 const max = 100 * 1000 177 if len(a.Corpus) > max { 178 a.Corpus = a.Corpus[:max] 179 } 180 err = hub.Call("Hub.Connect", a, nil) 181 // Hub.Connect request can be very large, so do it on a transient connection 182 // (rpc connection buffers never shrink). 183 hub.Close() 184 if err != nil { 185 return nil, err 186 } 187 hub, err = rpctype.NewRPCClient(hc.cfg.HubAddr) 188 if err != nil { 189 return nil, err 190 } 191 hc.fresh = false 192 return hub, nil 193 } 194 195 func (hc *HubConnector) sync(hub *rpctype.RPCClient) error { 196 key, err := hc.keyGet() 197 if err != nil { 198 return err 199 } 200 a := &rpctype.HubSyncArgs{ 201 Client: hc.cfg.HubClient, 202 Key: key, 203 Manager: hc.cfg.Name, 204 } 205 if hc.needMoreRepros != nil { 206 a.NeedRepros = hc.needMoreRepros() 207 } 208 a.Repros = hc.newRepros 209 for { 210 r := new(rpctype.HubSyncRes) 211 if err := hub.Call("Hub.Sync", a, r); err != nil { 212 return err 213 } 214 minimized, smashed, progDropped := hc.processProgs(r.Inputs) 215 reproDropped := hc.processRepros(r.Repros) 216 hc.statSendRepro.Add(len(a.Repros)) 217 hc.statRecvProg.Add(len(r.Inputs) - progDropped) 218 hc.statRecvProgDrop.Add(progDropped) 219 hc.statRecvRepro.Add(len(r.Repros) - reproDropped) 220 hc.statRecvReproDrop.Add(reproDropped) 221 log.Logf(0, "hub sync: repros %v;"+ 222 " recv: progs %v (min %v, smash %v), repros %v; more %v", 223 len(a.Repros), len(r.Inputs)-progDropped, minimized, smashed, 224 len(r.Repros)-reproDropped, r.More) 225 a.Add = nil 226 a.Del = nil 227 a.Repros = nil 228 a.NeedRepros = false 229 hc.newRepros = nil 230 if len(r.Inputs)+r.More == 0 { 231 return nil 232 } 233 } 234 } 235 236 func (hc *HubConnector) processProgs(inputs []rpctype.HubInput) (minimized, smashed, dropped int) { 237 candidates := make([]fuzzer.Candidate, 0, len(inputs)) 238 for _, inp := range inputs { 239 p, err := hc.parseProgram(inp.Prog) 240 if err != nil { 241 log.Logf(0, "rejecting program from hub: %v\n%s", err, inp.Prog) 242 dropped++ 243 continue 244 } 245 min, smash := matchDomains(hc.domain, inp.Domain) 246 var flags fuzzer.ProgFlags 247 if min && len(p.Calls) < manager.ReminimizeThreshold { 248 minimized++ 249 flags |= fuzzer.ProgMinimized 250 } 251 if smash { 252 smashed++ 253 flags |= fuzzer.ProgSmashed 254 } 255 candidates = append(candidates, fuzzer.Candidate{ 256 Prog: p, 257 Flags: flags, 258 }) 259 } 260 hc.mgr.addNewCandidates(candidates) 261 return 262 } 263 264 func matchDomains(self, input string) (bool, bool) { 265 if self == "" || input == "" { 266 return true, true 267 } 268 min0, smash0 := splitDomains(self) 269 min1, smash1 := splitDomains(input) 270 min := min0 != min1 271 smash := min || smash0 != smash1 272 return min, smash 273 } 274 275 func splitDomains(domain string) (string, string) { 276 delim0 := strings.IndexByte(domain, '/') 277 if delim0 == -1 { 278 return domain, "" 279 } 280 if delim0 == len(domain)-1 { 281 return domain[:delim0], "" 282 } 283 delim1 := strings.IndexByte(domain[delim0+1:], '/') 284 if delim1 == -1 { 285 return domain, "" 286 } 287 return domain[:delim0+delim1+1], domain[delim0+delim1+2:] 288 } 289 290 func (hc *HubConnector) processRepros(repros [][]byte) int { 291 dropped := 0 292 for _, repro := range repros { 293 _, err := hc.parseProgram(repro) 294 if err != nil { 295 log.Logf(0, "rejecting repro from hub: %v\n%s", err, repro) 296 dropped++ 297 continue 298 } 299 // On a leak instance we override repro type to leak, 300 // because otherwise repro package won't even enable leak detection 301 // and we won't reproduce leaks from other instances. 302 typ := crash.UnknownType 303 if hc.leak { 304 typ = crash.MemoryLeak 305 } 306 hc.hubReproQueue <- &manager.Crash{ 307 FromHub: true, 308 Report: &report.Report{ 309 Type: typ, 310 Output: repro, 311 }, 312 } 313 } 314 return dropped 315 } 316 317 func (hc *HubConnector) parseProgram(data []byte) (*prog.Prog, error) { 318 p, err := manager.ParseSeed(hc.target, data) 319 if err != nil { 320 return nil, err 321 } 322 if !p.OnlyContains(hc.enabledCalls) { 323 return nil, fmt.Errorf("contains disabled calls") 324 } 325 return p, nil 326 }