github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/rpcserver/runner.go (about) 1 // Copyright 2024 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package rpcserver 5 6 import ( 7 "bytes" 8 "errors" 9 "fmt" 10 "os" 11 "slices" 12 "sync" 13 "time" 14 15 "github.com/google/syzkaller/pkg/cover" 16 "github.com/google/syzkaller/pkg/flatrpc" 17 "github.com/google/syzkaller/pkg/fuzzer/queue" 18 "github.com/google/syzkaller/pkg/log" 19 "github.com/google/syzkaller/pkg/osutil" 20 "github.com/google/syzkaller/pkg/report" 21 "github.com/google/syzkaller/pkg/stat" 22 "github.com/google/syzkaller/prog" 23 "github.com/google/syzkaller/sys/targets" 24 "github.com/google/syzkaller/vm/dispatcher" 25 ) 26 27 type Runner struct { 28 id int 29 source *queue.Distributor 30 procs int 31 cover bool 32 coverEdges bool 33 filterSignal bool 34 debug bool 35 debugTimeouts bool 36 sysTarget *targets.Target 37 stats *runnerStats 38 finished chan bool 39 injectExec chan<- bool 40 infoc chan chan []byte 41 canonicalizer *cover.CanonicalizerInstance 42 nextRequestID int64 43 requests map[int64]*queue.Request 44 executing map[int64]bool 45 hanged map[int64]bool 46 lastExec *LastExecuting 47 updInfo dispatcher.UpdateInfo 48 resultCh chan error 49 50 // The mutex protects all the fields below. 51 mu sync.Mutex 52 conn *flatrpc.Conn 53 stopped bool 54 machineInfo []byte 55 } 56 57 type runnerStats struct { 58 statExecs *stat.Val 59 statExecRetries *stat.Val 60 statExecutorRestarts *stat.Val 61 statExecBufferTooSmall *stat.Val 62 statNoExecRequests *stat.Val 63 statNoExecDuration *stat.Val 64 } 65 66 type handshakeConfig struct { 67 VMLess bool 68 Timeouts targets.Timeouts 69 LeakFrames []string 70 RaceFrames []string 71 Files []string 72 Features flatrpc.Feature 73 74 // Callback() is called in the middle of the handshake process. 75 // The return arguments are the coverage filter and the (possible) error. 76 Callback func(*flatrpc.InfoRequestRawT) (handshakeResult, error) 77 } 78 79 type handshakeResult struct { 80 Files []*flatrpc.FileInfo 81 Features []*flatrpc.FeatureInfo 82 CovFilter []uint64 83 MachineInfo []byte 84 Canonicalizer *cover.CanonicalizerInstance 85 } 86 87 func (runner *Runner) Handshake(conn *flatrpc.Conn, cfg *handshakeConfig) (handshakeResult, error) { 88 if runner.updInfo != nil { 89 runner.updInfo(func(info *dispatcher.Info) { 90 info.Status = "handshake" 91 }) 92 } 93 94 connectReply := &flatrpc.ConnectReply{ 95 Debug: runner.debug, 96 Cover: runner.cover, 97 CoverEdges: runner.coverEdges, 98 Kernel64Bit: runner.sysTarget.PtrSize == 8, 99 Procs: int32(runner.procs), 100 Slowdown: int32(cfg.Timeouts.Slowdown), 101 SyscallTimeoutMs: int32(cfg.Timeouts.Syscall / time.Millisecond), 102 ProgramTimeoutMs: int32(cfg.Timeouts.Program / time.Millisecond), 103 LeakFrames: cfg.LeakFrames, 104 RaceFrames: cfg.RaceFrames, 105 Files: cfg.Files, 106 Features: cfg.Features, 107 } 108 if err := flatrpc.Send(conn, connectReply); err != nil { 109 return handshakeResult{}, err 110 } 111 infoReq, err := flatrpc.Recv[*flatrpc.InfoRequestRaw](conn) 112 if err != nil { 113 return handshakeResult{}, err 114 } 115 ret, err := cfg.Callback(infoReq) 116 if err != nil { 117 return handshakeResult{}, err 118 } 119 infoReply := &flatrpc.InfoReply{ 120 CoverFilter: ret.CovFilter, 121 } 122 if err := flatrpc.Send(conn, infoReply); err != nil { 123 return handshakeResult{}, err 124 } 125 runner.mu.Lock() 126 runner.conn = conn 127 runner.machineInfo = ret.MachineInfo 128 runner.canonicalizer = ret.Canonicalizer 129 runner.mu.Unlock() 130 131 if runner.updInfo != nil { 132 runner.updInfo(func(info *dispatcher.Info) { 133 info.MachineInfo = runner.MachineInfo 134 info.DetailedStatus = runner.QueryStatus 135 }) 136 } 137 return ret, nil 138 } 139 140 func (runner *Runner) ConnectionLoop() error { 141 if runner.updInfo != nil { 142 runner.updInfo(func(info *dispatcher.Info) { 143 info.Status = "executing" 144 }) 145 } 146 147 runner.mu.Lock() 148 stopped := runner.stopped 149 if !stopped { 150 runner.finished = make(chan bool) 151 } 152 runner.mu.Unlock() 153 154 if stopped { 155 // The instance was shut down in between, see the shutdown code. 156 return nil 157 } 158 defer close(runner.finished) 159 160 var infoc chan []byte 161 defer func() { 162 if infoc != nil { 163 infoc <- []byte("VM has crashed") 164 } 165 }() 166 for { 167 if infoc == nil { 168 select { 169 case infoc = <-runner.infoc: 170 err := runner.sendStateRequest() 171 if err != nil { 172 return err 173 } 174 default: 175 } 176 } 177 for len(runner.requests)-len(runner.executing) < 2*runner.procs { 178 req := runner.source.Next(runner.id) 179 if req == nil { 180 break 181 } 182 if err := runner.sendRequest(req); err != nil { 183 return err 184 } 185 } 186 if len(runner.requests) == 0 { 187 if !runner.Alive() { 188 return nil 189 } 190 // The runner has no new requests, so don't wait to receive anything from it. 191 time.Sleep(10 * time.Millisecond) 192 continue 193 } 194 raw, err := wrappedRecv[*flatrpc.ExecutorMessageRaw](runner) 195 if err != nil { 196 return err 197 } 198 if raw.Msg == nil || raw.Msg.Value == nil { 199 return errors.New("received no message") 200 } 201 switch msg := raw.Msg.Value.(type) { 202 case *flatrpc.ExecutingMessage: 203 err = runner.handleExecutingMessage(msg) 204 case *flatrpc.ExecResult: 205 err = runner.handleExecResult(msg) 206 case *flatrpc.StateResult: 207 buf := new(bytes.Buffer) 208 fmt.Fprintf(buf, "pending requests on the VM:") 209 for id := range runner.requests { 210 fmt.Fprintf(buf, " %v", id) 211 } 212 fmt.Fprintf(buf, "\n\n") 213 result := append(buf.Bytes(), msg.Data...) 214 if infoc != nil { 215 infoc <- result 216 infoc = nil 217 } else { 218 // The request was solicited in detectTimeout(). 219 log.Logf(0, "status result: %s", result) 220 } 221 default: 222 return fmt.Errorf("received unknown message type %T", msg) 223 } 224 if err != nil { 225 return err 226 } 227 } 228 } 229 230 func wrappedRecv[Raw flatrpc.RecvType[T], T any](runner *Runner) (*T, error) { 231 if runner.debugTimeouts { 232 abort := runner.detectTimeout() 233 defer close(abort) 234 } 235 return flatrpc.Recv[Raw](runner.conn) 236 } 237 238 func (runner *Runner) detectTimeout() chan struct{} { 239 abort := make(chan struct{}) 240 go func() { 241 select { 242 case <-time.After(time.Minute): 243 log.Logf(0, "timed out waiting for executor reply, aborting the connection in 1 minute") 244 go func() { 245 time.Sleep(time.Minute) 246 runner.conn.Close() 247 }() 248 err := runner.sendStateRequest() 249 if err != nil { 250 log.Logf(0, "failed to send state request: %v", err) 251 return 252 } 253 254 case <-abort: 255 return 256 case <-runner.finished: 257 return 258 } 259 }() 260 return abort 261 } 262 263 func (runner *Runner) sendStateRequest() error { 264 msg := &flatrpc.HostMessage{ 265 Msg: &flatrpc.HostMessages{ 266 Type: flatrpc.HostMessagesRawStateRequest, 267 Value: &flatrpc.StateRequest{}, 268 }, 269 } 270 return flatrpc.Send(runner.conn, msg) 271 } 272 273 func (runner *Runner) sendRequest(req *queue.Request) error { 274 if err := req.Validate(); err != nil { 275 panic(err) 276 } 277 runner.nextRequestID++ 278 id := runner.nextRequestID 279 var flags flatrpc.RequestFlag 280 if req.ReturnOutput { 281 flags |= flatrpc.RequestFlagReturnOutput 282 } 283 if req.ReturnError { 284 flags |= flatrpc.RequestFlagReturnError 285 } 286 allSignal := make([]int32, len(req.ReturnAllSignal)) 287 for i, call := range req.ReturnAllSignal { 288 allSignal[i] = int32(call) 289 } 290 opts := req.ExecOpts 291 if runner.debug { 292 opts.EnvFlags |= flatrpc.ExecEnvDebug 293 } 294 var data []byte 295 switch req.Type { 296 case flatrpc.RequestTypeProgram: 297 progData, err := req.Prog.SerializeForExec() 298 if err != nil { 299 // It's bad if we systematically fail to serialize programs, 300 // but so far we don't have a better handling than counting this. 301 // This error is observed a lot on the seeded syz_mount_image calls. 302 runner.stats.statExecBufferTooSmall.Add(1) 303 req.Done(&queue.Result{ 304 Status: queue.ExecFailure, 305 Err: fmt.Errorf("program serialization failed: %w", err), 306 }) 307 return nil 308 } 309 data = progData 310 case flatrpc.RequestTypeBinary: 311 fileData, err := os.ReadFile(req.BinaryFile) 312 if err != nil { 313 req.Done(&queue.Result{ 314 Status: queue.ExecFailure, 315 Err: err, 316 }) 317 return nil 318 } 319 data = fileData 320 case flatrpc.RequestTypeGlob: 321 data = append([]byte(req.GlobPattern), 0) 322 flags |= flatrpc.RequestFlagReturnOutput 323 default: 324 panic("unhandled request type") 325 } 326 var avoid uint64 327 for _, id := range req.Avoid { 328 if id.VM == runner.id { 329 avoid |= uint64(1 << id.Proc) 330 } 331 } 332 msg := &flatrpc.HostMessage{ 333 Msg: &flatrpc.HostMessages{ 334 Type: flatrpc.HostMessagesRawExecRequest, 335 Value: &flatrpc.ExecRequest{ 336 Id: id, 337 Type: req.Type, 338 Avoid: avoid, 339 Data: data, 340 Flags: flags, 341 ExecOpts: &opts, 342 AllSignal: allSignal, 343 }, 344 }, 345 } 346 runner.requests[id] = req 347 return flatrpc.Send(runner.conn, msg) 348 } 349 350 func (runner *Runner) handleExecutingMessage(msg *flatrpc.ExecutingMessage) error { 351 req := runner.requests[msg.Id] 352 if req == nil { 353 if runner.hanged[msg.Id] { 354 return nil 355 } 356 return fmt.Errorf("can't find executing request %v", msg.Id) 357 } 358 proc := int(msg.ProcId) 359 if proc < 0 || proc >= prog.MaxPids { 360 return fmt.Errorf("got bad proc id %v", proc) 361 } 362 runner.stats.statExecs.Add(1) 363 if msg.Try == 0 { 364 if msg.WaitDuration != 0 { 365 runner.stats.statNoExecRequests.Add(1) 366 // Cap wait duration to 1 second to avoid extreme peaks on the graph 367 // which make it impossible to see real data (the rest becomes a flat line). 368 runner.stats.statNoExecDuration.Add(int(min(msg.WaitDuration, 1e9))) 369 } 370 } else { 371 runner.stats.statExecRetries.Add(1) 372 } 373 var data []byte 374 switch req.Type { 375 case flatrpc.RequestTypeProgram: 376 data = req.Prog.Serialize() 377 case flatrpc.RequestTypeBinary: 378 data = []byte(fmt.Sprintf("executing binary %v\n", req.BinaryFile)) 379 case flatrpc.RequestTypeGlob: 380 data = []byte(fmt.Sprintf("expanding glob: %v\n", req.GlobPattern)) 381 default: 382 panic(fmt.Sprintf("unhandled request type %v", req.Type)) 383 } 384 runner.lastExec.Note(int(msg.Id), proc, data, osutil.MonotonicNano()) 385 select { 386 case runner.injectExec <- true: 387 default: 388 } 389 runner.executing[msg.Id] = true 390 return nil 391 } 392 393 func (runner *Runner) handleExecResult(msg *flatrpc.ExecResult) error { 394 req := runner.requests[msg.Id] 395 if req == nil { 396 if runner.hanged[msg.Id] { 397 // Got result for a program that was previously reported hanged 398 // (probably execution was just extremely slow). Can't report result 399 // to pkg/fuzzer since it already handled completion of the request, 400 // but shouldn't report an error and crash the VM as well. 401 delete(runner.hanged, msg.Id) 402 return nil 403 } 404 return fmt.Errorf("can't find executed request %v", msg.Id) 405 } 406 delete(runner.requests, msg.Id) 407 delete(runner.executing, msg.Id) 408 if req.Type == flatrpc.RequestTypeProgram && msg.Info != nil { 409 for len(msg.Info.Calls) < len(req.Prog.Calls) { 410 msg.Info.Calls = append(msg.Info.Calls, &flatrpc.CallInfo{ 411 Error: 999, 412 }) 413 } 414 msg.Info.Calls = msg.Info.Calls[:len(req.Prog.Calls)] 415 if msg.Info.Freshness == 0 { 416 runner.stats.statExecutorRestarts.Add(1) 417 } 418 for _, call := range msg.Info.Calls { 419 runner.convertCallInfo(call) 420 } 421 if len(msg.Info.ExtraRaw) != 0 { 422 msg.Info.Extra = msg.Info.ExtraRaw[0] 423 for _, info := range msg.Info.ExtraRaw[1:] { 424 // All processing in the fuzzer later will convert signal/cover to maps and dedup, 425 // so there is little point in deduping here. 426 msg.Info.Extra.Cover = append(msg.Info.Extra.Cover, info.Cover...) 427 msg.Info.Extra.Signal = append(msg.Info.Extra.Signal, info.Signal...) 428 } 429 msg.Info.ExtraRaw = nil 430 runner.convertCallInfo(msg.Info.Extra) 431 } 432 if !runner.cover && req.ExecOpts.ExecFlags&flatrpc.ExecFlagCollectSignal != 0 { 433 // Coverage collection is disabled, but signal was requested => use a substitute signal. 434 // Note that we do it after all the processing above in order to prevent it from being 435 // filtered out. 436 addFallbackSignal(req.Prog, msg.Info) 437 } 438 } 439 status := queue.Success 440 var resErr error 441 if msg.Error != "" { 442 status = queue.ExecFailure 443 resErr = errors.New(msg.Error) 444 } else if msg.Hanged { 445 status = queue.Hanged 446 if req.Type == flatrpc.RequestTypeProgram { 447 // We only track the latest executed programs. 448 runner.lastExec.Hanged(int(msg.Id), int(msg.Proc), req.Prog.Serialize(), osutil.MonotonicNano()) 449 } 450 runner.hanged[msg.Id] = true 451 } 452 req.Done(&queue.Result{ 453 Executor: queue.ExecutorID{ 454 VM: runner.id, 455 Proc: int(msg.Proc), 456 }, 457 Status: status, 458 Info: msg.Info, 459 Output: slices.Clone(msg.Output), 460 Err: resErr, 461 }) 462 return nil 463 } 464 465 func (runner *Runner) convertCallInfo(call *flatrpc.CallInfo) { 466 call.Cover = runner.canonicalizer.Canonicalize(call.Cover) 467 call.Signal = runner.canonicalizer.Canonicalize(call.Signal) 468 469 call.Comps = slices.DeleteFunc(call.Comps, func(cmp *flatrpc.Comparison) bool { 470 converted := runner.canonicalizer.Canonicalize([]uint64{cmp.Pc}) 471 if len(converted) == 0 { 472 return true 473 } 474 cmp.Pc = converted[0] 475 return false 476 }) 477 478 // Check signal belongs to kernel addresses. 479 // Mismatching addresses can mean either corrupted VM memory, or that the fuzzer somehow 480 // managed to inject output signal. If we see any bogus signal, drop whole signal 481 // (we don't want programs that can inject bogus coverage to end up in the corpus). 482 var kernelAddresses targets.KernelAddresses 483 if runner.filterSignal { 484 kernelAddresses = runner.sysTarget.KernelAddresses 485 } 486 textStart, textEnd := kernelAddresses.TextStart, kernelAddresses.TextEnd 487 if textStart != 0 { 488 for _, sig := range call.Signal { 489 if sig < textStart || sig > textEnd { 490 call.Signal = []uint64{} 491 call.Cover = []uint64{} 492 break 493 } 494 } 495 } 496 497 // Filter out kernel physical memory addresses. 498 // These are internal kernel comparisons and should not be interesting. 499 dataStart, dataEnd := kernelAddresses.DataStart, kernelAddresses.DataEnd 500 if len(call.Comps) != 0 && (textStart != 0 || dataStart != 0) { 501 if runner.sysTarget.PtrSize == 4 { 502 // These will appear sign-extended in comparison operands. 503 textStart = uint64(int64(int32(textStart))) 504 textEnd = uint64(int64(int32(textEnd))) 505 dataStart = uint64(int64(int32(dataStart))) 506 dataEnd = uint64(int64(int32(dataEnd))) 507 } 508 isKptr := func(val uint64) bool { 509 return val >= textStart && val <= textEnd || val >= dataStart && val <= dataEnd || val == 0 510 } 511 call.Comps = slices.DeleteFunc(call.Comps, func(cmp *flatrpc.Comparison) bool { 512 return isKptr(cmp.Op1) && isKptr(cmp.Op2) 513 }) 514 } 515 } 516 517 func (runner *Runner) SendSignalUpdate(plus []uint64) error { 518 msg := &flatrpc.HostMessage{ 519 Msg: &flatrpc.HostMessages{ 520 Type: flatrpc.HostMessagesRawSignalUpdate, 521 Value: &flatrpc.SignalUpdate{ 522 NewMax: runner.canonicalizer.Decanonicalize(plus), 523 }, 524 }, 525 } 526 return flatrpc.Send(runner.conn, msg) 527 } 528 529 func (runner *Runner) SendCorpusTriaged() error { 530 msg := &flatrpc.HostMessage{ 531 Msg: &flatrpc.HostMessages{ 532 Type: flatrpc.HostMessagesRawCorpusTriaged, 533 Value: &flatrpc.CorpusTriaged{}, 534 }, 535 } 536 return flatrpc.Send(runner.conn, msg) 537 } 538 539 func (runner *Runner) Stop() { 540 runner.mu.Lock() 541 runner.stopped = true 542 conn := runner.conn 543 runner.mu.Unlock() 544 if conn != nil { 545 conn.Close() 546 } 547 } 548 549 func (runner *Runner) Shutdown(crashed bool, extraExecs ...report.ExecutorInfo) []ExecRecord { 550 runner.mu.Lock() 551 runner.stopped = true 552 finished := runner.finished 553 runner.mu.Unlock() 554 555 if finished != nil { 556 // Wait for the connection goroutine to finish and stop touching data. 557 <-finished 558 } 559 records := runner.lastExec.Collect() 560 for _, info := range extraExecs { 561 req := runner.requests[int64(info.ExecID)] 562 // If the request is in executing, it's also already in the records slice. 563 if req != nil && !runner.executing[int64(info.ExecID)] { 564 records = append(records, ExecRecord{ 565 ID: info.ExecID, 566 Proc: info.ProcID, 567 Prog: req.Prog.Serialize(), 568 }) 569 } 570 } 571 for id, req := range runner.requests { 572 status := queue.Restarted 573 if crashed && runner.executing[id] { 574 status = queue.Crashed 575 } 576 req.Done(&queue.Result{Status: status}) 577 } 578 return records 579 } 580 581 func (runner *Runner) MachineInfo() []byte { 582 runner.mu.Lock() 583 defer runner.mu.Unlock() 584 return runner.machineInfo 585 } 586 587 func (runner *Runner) QueryStatus() []byte { 588 resc := make(chan []byte, 1) 589 timeout := time.After(time.Minute) 590 select { 591 case runner.infoc <- resc: 592 case <-timeout: 593 return []byte("VM loop is not responding") 594 } 595 select { 596 case res := <-resc: 597 return res 598 case <-timeout: 599 return []byte("VM is not responding") 600 } 601 } 602 603 func (runner *Runner) Alive() bool { 604 runner.mu.Lock() 605 defer runner.mu.Unlock() 606 return runner.conn != nil && !runner.stopped 607 } 608 609 // addFallbackSignal computes simple fallback signal in cases we don't have real coverage signal. 610 // We use syscall number or-ed with returned errno value as signal. 611 // At least this gives us all combinations of syscall+errno. 612 func addFallbackSignal(p *prog.Prog, info *flatrpc.ProgInfo) { 613 callInfos := make([]prog.CallInfo, len(info.Calls)) 614 for i, inf := range info.Calls { 615 if inf.Flags&flatrpc.CallFlagExecuted != 0 { 616 callInfos[i].Flags |= prog.CallExecuted 617 } 618 if inf.Flags&flatrpc.CallFlagFinished != 0 { 619 callInfos[i].Flags |= prog.CallFinished 620 } 621 if inf.Flags&flatrpc.CallFlagBlocked != 0 { 622 callInfos[i].Flags |= prog.CallBlocked 623 } 624 callInfos[i].Errno = int(inf.Error) 625 } 626 p.FallbackSignal(callInfos) 627 for i, inf := range callInfos { 628 info.Calls[i].Signal = inf.Signal 629 } 630 }