github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/executor/executor_runner.h (about) 1 // Copyright 2024 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 #include <cstring> 5 #include <fcntl.h> 6 #include <signal.h> 7 #include <sys/mman.h> 8 #include <sys/resource.h> 9 #include <unistd.h> 10 11 #include <algorithm> 12 #include <deque> 13 #include <iomanip> 14 #include <memory> 15 #include <optional> 16 #include <sstream> 17 #include <string> 18 #include <utility> 19 #include <vector> 20 21 #include "pkg/flatrpc/flatrpc.h" 22 23 inline std::ostream& operator<<(std::ostream& ss, const rpc::ExecRequestRawT& req) 24 { 25 return ss << "id=" << req.id 26 << " flags=0x" << std::hex << static_cast<uint64>(req.flags) 27 << " env_flags=0x" << std::hex << static_cast<uint64>(req.exec_opts->env_flags()) 28 << " exec_flags=0x" << std::hex << static_cast<uint64>(req.exec_opts->exec_flags()) 29 << " data_size=" << std::dec << req.data.size() 30 << "\n"; 31 } 32 33 // ProcIDPool allows to reuse a set of unique proc IDs across a set of subprocesses. 34 // 35 // When a subprocess hangs, it's a bit unclear what to do (we don't have means to kill 36 // the whole tree of its children, and waiting for all them will presumably hang as well). 37 // Later there may appear a "task hung" report from the kernel, so we don't want to terminate 38 // the VM immidiatly. But the "task hung" report may also not appear at all, so we can't 39 // just wait for a hanged subprocesses forever. 40 // 41 // So in that case we kill/wait just the top subprocesses, and give it a new proc ID 42 // (since some resources associated with the old proc ID may still be used by the old 43 // unterminated test processes). However, we don't have infinite number of proc IDs, 44 // so we recycle them in FIFO order. This is not ideal, but it looks like the best 45 // practical solution. 46 class ProcIDPool 47 { 48 public: 49 ProcIDPool(int num_procs) 50 { 51 // Theoretically we have 32 procs (prog.MaxPids), but there are some limitations in descriptions 52 // that make them work well only for up to 10 procs. For example, we form /dev/loopN 53 // device name using proc['0', 1, int8]. When these limitations are fixed, 54 // we can use all 32 here (prog.MaxPids) 55 constexpr int kNumGoodProcs = 10; 56 for (int i = 0; i < std::max(num_procs, kNumGoodProcs); i++) 57 ids_.push_back(i); 58 mask_ = 0; 59 } 60 61 int Alloc(int old = -1) 62 { 63 if (old >= 0) { 64 mask_ &= ~(1UL << old); 65 ids_.push_back(old); 66 } 67 if (ids_.empty()) 68 fail("out of proc ids"); 69 int id = ids_.front(); 70 ids_.pop_front(); 71 mask_ |= 1UL << id; 72 return id; 73 } 74 75 uint64 Mask() 76 { 77 return mask_; 78 } 79 80 private: 81 std::deque<int> ids_; 82 uint64 mask_; 83 84 ProcIDPool(const ProcIDPool&) = delete; 85 ProcIDPool& operator=(const ProcIDPool&) = delete; 86 }; 87 88 class ProcOpts 89 { 90 public: 91 bool use_cover_edges = false; 92 bool is_kernel_64_bit = false; 93 uint32 slowdown = 0; 94 uint32 syscall_timeout_ms = 0; 95 uint32 program_timeout_ms = 0; 96 97 private: 98 friend std::ostream& operator<<(std::ostream& ss, const ProcOpts& opts) 99 { 100 ss << "use_cover_edges=" << opts.use_cover_edges 101 << " is_kernel_64_bit=" << opts.is_kernel_64_bit 102 << " slowdown=" << opts.slowdown 103 << " syscall_timeout_ms=" << opts.syscall_timeout_ms 104 << " program_timeout_ms=" << opts.program_timeout_ms; 105 return ss; 106 } 107 }; 108 109 // Proc represents one subprocess that runs tests (re-execed syz-executor with 'exec' argument). 110 // The object is persistent and re-starts subprocess when it crashes. 111 class Proc 112 { 113 public: 114 Proc(Connection& conn, const char* bin, ProcIDPool& proc_id_pool, int& restarting, const bool& corpus_triaged, int max_signal_fd, 115 int cover_filter_fd, ProcOpts opts) 116 : conn_(conn), 117 bin_(bin), 118 proc_id_pool_(proc_id_pool), 119 id_(proc_id_pool.Alloc()), 120 restarting_(restarting), 121 corpus_triaged_(corpus_triaged), 122 max_signal_fd_(max_signal_fd), 123 cover_filter_fd_(cover_filter_fd), 124 opts_(opts), 125 req_shmem_(kMaxInput), 126 resp_shmem_(kMaxOutput), 127 resp_mem_(static_cast<OutputData*>(resp_shmem_.Mem())) 128 { 129 Start(); 130 } 131 132 bool Execute(rpc::ExecRequestRawT& msg) 133 { 134 if (state_ != State::Started && state_ != State::Idle) 135 return false; 136 if (((~msg.avoid) & proc_id_pool_.Mask()) == 0) 137 msg.avoid = 0; 138 if (msg.avoid & (1ull << id_)) 139 return false; 140 if (msg_) 141 fail("already have pending msg"); 142 if (wait_start_) 143 wait_end_ = current_time_ms(); 144 // Restart every once in a while to not let too much state accumulate. 145 // Also request if request type differs as it affects program timeout. 146 constexpr uint64 kRestartEvery = 600; 147 if (state_ == State::Idle && ((corpus_triaged_ && restarting_ == 0 && freshness_ >= kRestartEvery) || 148 req_type_ != msg.type || 149 exec_env_ != msg.exec_opts->env_flags() || sandbox_arg_ != msg.exec_opts->sandbox_arg())) 150 Restart(); 151 attempts_ = 0; 152 msg_ = std::move(msg); 153 if (state_ == State::Started) 154 Handshake(); 155 else 156 Execute(); 157 return true; 158 } 159 160 void Arm(Select& select) 161 { 162 select.Arm(resp_pipe_); 163 select.Arm(stdout_pipe_); 164 } 165 166 void Ready(Select& select, uint64 now, bool out_of_requests) 167 { 168 if (state_ == State::Handshaking || state_ == State::Executing) { 169 // Check if the subprocess has hung. 170 #if SYZ_EXECUTOR_USES_FORK_SERVER 171 // Child process has an internal timeout and protects against most hangs when 172 // fork server is enabled, so we use quite large timeout. Child process can be slow 173 // due to global locks in namespaces and other things, so let's better wait than 174 // report false misleading crashes. 175 uint64 timeout = 3 * ProgramTimeoutMs(); 176 #else 177 uint64 timeout = ProgramTimeoutMs(); 178 #endif 179 // Sandbox setup can take significant time. 180 if (state_ == State::Handshaking) 181 timeout = 60 * 1000 * opts_.slowdown; 182 if (now > exec_start_ + timeout) { 183 Restart(); 184 return; 185 } 186 } 187 188 if (select.Ready(stdout_pipe_) && !ReadOutput()) { 189 #if SYZ_EXECUTOR_USES_FORK_SERVER 190 // In non-forking mode the subprocess exits after test execution 191 // and the pipe read fails with EOF, so we rely on the resp_pipe_ instead. 192 Restart(); 193 return; 194 #endif 195 } 196 if (select.Ready(resp_pipe_) && !ReadResponse(out_of_requests)) { 197 Restart(); 198 return; 199 } 200 return; 201 } 202 203 enum State : uint8 { 204 // The process has just started. 205 Started, 206 // We sent the process env flags and waiting for handshake reply. 207 Handshaking, 208 // Handshaked and ready to execute programs. 209 Idle, 210 // Currently executing a test program. 211 Executing, 212 }; 213 214 State GetState() const 215 { 216 return state_; 217 } 218 219 private: 220 Connection& conn_; 221 const char* const bin_; 222 ProcIDPool& proc_id_pool_; 223 int id_; 224 int& restarting_; 225 const bool& corpus_triaged_; 226 const int max_signal_fd_; 227 const int cover_filter_fd_; 228 const ProcOpts opts_; 229 State state_ = State::Started; 230 std::optional<Subprocess> process_; 231 ShmemFile req_shmem_; 232 ShmemFile resp_shmem_; 233 OutputData* resp_mem_; 234 int req_pipe_ = -1; 235 int resp_pipe_ = -1; 236 int stdout_pipe_ = -1; 237 rpc::RequestType req_type_ = rpc::RequestType::Program; 238 rpc::ExecEnv exec_env_ = rpc::ExecEnv::NONE; 239 int64_t sandbox_arg_ = 0; 240 std::optional<rpc::ExecRequestRawT> msg_; 241 std::vector<uint8_t> output_; 242 size_t debug_output_pos_ = 0; 243 uint64 attempts_ = 0; 244 uint64 freshness_ = 0; 245 uint64 exec_start_ = 0; 246 uint64 wait_start_ = 0; 247 uint64 wait_end_ = 0; 248 249 friend std::ostream& operator<<(std::ostream& ss, const Proc& proc) 250 { 251 ss << "id=" << proc.id_ 252 << " state=" << static_cast<int>(proc.state_) 253 << " freshness=" << proc.freshness_ 254 << " attempts=" << proc.attempts_ 255 << " exec_start=" << current_time_ms() - proc.exec_start_ 256 << "\n"; 257 if (proc.msg_) 258 ss << "\tcurrent request: " << *proc.msg_; 259 return ss; 260 } 261 262 void ChangeState(State state) 263 { 264 if (state_ == State::Handshaking) 265 restarting_--; 266 if (state == State::Handshaking) 267 restarting_++; 268 state_ = state; 269 } 270 271 void Restart() 272 { 273 debug("proc %d: restarting subprocess, current state %u attempts %llu\n", id_, state_, attempts_); 274 int status = process_->KillAndWait(); 275 process_.reset(); 276 debug("proc %d: subprocess exit status %d\n", id_, status); 277 if (++attempts_ > 20) { 278 while (ReadOutput()) 279 ; 280 // Write the subprocess output first. If it contains own SYFAIL, 281 // we want it to be before our SYZFAIL. 282 ssize_t wrote = write(STDERR_FILENO, output_.data(), output_.size()); 283 if (wrote != static_cast<ssize_t>(output_.size())) 284 fprintf(stderr, "output truncated: %zd/%zd (errno=%d)\n", 285 wrote, output_.size(), errno); 286 uint64 req_id = msg_ ? msg_->id : -1; 287 failmsg("repeatedly failed to execute the program", "proc=%d req=%lld state=%d status=%d", 288 id_, req_id, state_, status); 289 } 290 // Ignore all other errors. 291 // Without fork server executor can legitimately exit (program contains exit_group), 292 // with fork server the top process can exit with kFailStatus if it wants special handling. 293 if (status != kFailStatus) 294 status = 0; 295 if (FailCurrentRequest(status == kFailStatus)) { 296 // Read out all pening output until EOF. 297 if (IsSet(msg_->flags, rpc::RequestFlag::ReturnOutput)) { 298 while (ReadOutput()) 299 ; 300 } 301 bool hanged = SYZ_EXECUTOR_USES_FORK_SERVER && state_ == State::Executing; 302 HandleCompletion(status, hanged); 303 if (hanged) { 304 // If the process has hanged, it may still be using per-proc resources, 305 // so allocate a fresh proc id. 306 int new_id = proc_id_pool_.Alloc(id_); 307 debug("proc %d: changing proc id to %d\n", id_, new_id); 308 id_ = new_id; 309 } 310 } else if (attempts_ > 3) 311 sleep_ms(100 * attempts_); 312 Start(); 313 } 314 315 bool FailCurrentRequest(bool failed) 316 { 317 if (state_ == State::Handshaking) 318 return IsSet(msg_->flags, rpc::RequestFlag::ReturnError); 319 if (state_ == State::Executing) 320 return !failed || IsSet(msg_->flags, rpc::RequestFlag::ReturnError); 321 return false; 322 } 323 324 void Start() 325 { 326 ChangeState(State::Started); 327 freshness_ = 0; 328 int req_pipe[2]; 329 if (pipe(req_pipe)) 330 fail("pipe failed"); 331 int resp_pipe[2]; 332 if (pipe(resp_pipe)) 333 fail("pipe failed"); 334 int stdout_pipe[2]; 335 if (pipe(stdout_pipe)) 336 fail("pipe failed"); 337 338 std::vector<std::pair<int, int>> fds = { 339 {req_pipe[0], STDIN_FILENO}, 340 {resp_pipe[1], STDOUT_FILENO}, 341 {stdout_pipe[1], STDERR_FILENO}, 342 {req_shmem_.FD(), kInFd}, 343 {resp_shmem_.FD(), kOutFd}, 344 {max_signal_fd_, kMaxSignalFd}, 345 {cover_filter_fd_, kCoverFilterFd}, 346 }; 347 const char* argv[] = {bin_, "exec", nullptr}; 348 process_.emplace(argv, fds); 349 350 Select::Prepare(resp_pipe[0]); 351 Select::Prepare(stdout_pipe[0]); 352 353 close(req_pipe[0]); 354 close(resp_pipe[1]); 355 close(stdout_pipe[1]); 356 357 close(req_pipe_); 358 close(resp_pipe_); 359 close(stdout_pipe_); 360 361 req_pipe_ = req_pipe[1]; 362 resp_pipe_ = resp_pipe[0]; 363 stdout_pipe_ = stdout_pipe[0]; 364 365 if (msg_) 366 Handshake(); 367 } 368 369 void Handshake() 370 { 371 if (state_ != State::Started || !msg_) 372 fail("wrong handshake state"); 373 debug("proc %d: handshaking to execute request %llu\n", id_, static_cast<uint64>(msg_->id)); 374 ChangeState(State::Handshaking); 375 exec_start_ = current_time_ms(); 376 req_type_ = msg_->type; 377 exec_env_ = msg_->exec_opts->env_flags() & ~rpc::ExecEnv::ResetState; 378 sandbox_arg_ = msg_->exec_opts->sandbox_arg(); 379 handshake_req req = { 380 .magic = kInMagic, 381 .use_cover_edges = opts_.use_cover_edges, 382 .is_kernel_64_bit = opts_.is_kernel_64_bit, 383 .flags = exec_env_, 384 .pid = static_cast<uint64>(id_), 385 .sandbox_arg = static_cast<uint64>(sandbox_arg_), 386 .syscall_timeout_ms = opts_.syscall_timeout_ms, 387 .program_timeout_ms = ProgramTimeoutMs(), 388 .slowdown_scale = opts_.slowdown, 389 }; 390 if (write(req_pipe_, &req, sizeof(req)) != sizeof(req)) { 391 debug("request pipe write failed (errno=%d)\n", errno); 392 Restart(); 393 } 394 } 395 396 void Execute() 397 { 398 if (state_ != State::Idle || !msg_) 399 fail("wrong state for execute"); 400 401 debug("proc %d: start executing request %llu\n", id_, static_cast<uint64>(msg_->id)); 402 403 rpc::ExecutingMessageRawT exec; 404 exec.id = msg_->id; 405 exec.proc_id = id_; 406 exec.try_ = attempts_; 407 408 if (wait_start_) { 409 exec.wait_duration = (wait_end_ - wait_start_) * 1000 * 1000; 410 wait_end_ = wait_start_ = 0; 411 } 412 413 rpc::ExecutorMessageRawT raw; 414 raw.msg.Set(std::move(exec)); 415 conn_.Send(raw); 416 417 uint64 all_call_signal = 0; 418 bool all_extra_signal = false; 419 for (int32_t call : msg_->all_signal) { 420 // This code assumes that call indices can be represented as bits in uint64 all_call_signal. 421 static_assert(kMaxCalls == 64); 422 if (call < -1 || call >= static_cast<int32_t>(kMaxCalls)) 423 failmsg("bad all_signal call", "call=%d", call); 424 if (call < 0) 425 all_extra_signal = true; 426 else 427 all_call_signal |= 1ull << call; 428 } 429 memcpy(req_shmem_.Mem(), msg_->data.data(), std::min(msg_->data.size(), kMaxInput)); 430 execute_req req{ 431 .magic = kInMagic, 432 .id = static_cast<uint64>(msg_->id), 433 .type = msg_->type, 434 .exec_flags = static_cast<uint64>(msg_->exec_opts->exec_flags()), 435 .all_call_signal = all_call_signal, 436 .all_extra_signal = all_extra_signal, 437 }; 438 exec_start_ = current_time_ms(); 439 ChangeState(State::Executing); 440 if (write(req_pipe_, &req, sizeof(req)) != sizeof(req)) { 441 debug("request pipe write failed (errno=%d)\n", errno); 442 Restart(); 443 } 444 } 445 446 void HandleCompletion(uint32 status, bool hanged = false) 447 { 448 if (!msg_) 449 fail("don't have executed msg"); 450 451 // Note: if the child process crashed during handshake and the request has ReturnError flag, 452 // we have not started executing the request yet. 453 uint64 elapsed = (current_time_ms() - exec_start_) * 1000 * 1000; 454 uint8* prog_data = msg_->data.data(); 455 input_data = prog_data; 456 std::vector<uint8_t>* output = nullptr; 457 if (IsSet(msg_->flags, rpc::RequestFlag::ReturnOutput)) { 458 output = &output_; 459 if (status) { 460 char tmp[128]; 461 snprintf(tmp, sizeof(tmp), "\nprocess exited with status %d\n", status); 462 output_.insert(output_.end(), tmp, tmp + strlen(tmp)); 463 } 464 } 465 uint32 num_calls = 0; 466 if (msg_->type == rpc::RequestType::Program) 467 num_calls = read_input(&prog_data); 468 auto data = finish_output(resp_mem_, id_, msg_->id, num_calls, elapsed, freshness_++, status, hanged, output); 469 conn_.Send(data.data(), data.size()); 470 471 resp_mem_->Reset(); 472 msg_.reset(); 473 output_.clear(); 474 debug_output_pos_ = 0; 475 ChangeState(State::Idle); 476 #if !SYZ_EXECUTOR_USES_FORK_SERVER 477 if (process_) 478 Restart(); 479 #endif 480 } 481 482 bool ReadResponse(bool out_of_requests) 483 { 484 uint32 status; 485 ssize_t n; 486 while ((n = read(resp_pipe_, &status, sizeof(status))) == -1) { 487 if (errno != EINTR && errno != EAGAIN) 488 break; 489 } 490 if (n == 0) { 491 debug("proc %d: response pipe EOF\n", id_); 492 return false; 493 } 494 if (n != sizeof(status)) 495 failmsg("proc resp pipe read failed", "n=%zd", n); 496 if (state_ == State::Handshaking) { 497 debug("proc %d: got handshake reply\n", id_); 498 ChangeState(State::Idle); 499 Execute(); 500 } else if (state_ == State::Executing) { 501 debug("proc %d: got execute reply\n", id_); 502 HandleCompletion(status); 503 if (out_of_requests) 504 wait_start_ = current_time_ms(); 505 } else { 506 debug("got data on response pipe in wrong state %d\n", state_); 507 return false; 508 } 509 return true; 510 } 511 512 bool ReadOutput() 513 { 514 const size_t kChunk = 1024; 515 output_.resize(output_.size() + kChunk); 516 ssize_t n = read(stdout_pipe_, output_.data() + output_.size() - kChunk, kChunk); 517 output_.resize(output_.size() - kChunk + std::max<ssize_t>(n, 0)); 518 if (n < 0) { 519 if (errno == EINTR || errno == EAGAIN) 520 return true; 521 fail("proc stdout read failed"); 522 } 523 if (n == 0) { 524 debug("proc %d: output pipe EOF\n", id_); 525 return false; 526 } 527 if (flag_debug) { 528 const bool has_nl = output_.back() == '\n'; 529 output_.resize(output_.size() + 1); 530 char* output = reinterpret_cast<char*>(output_.data()) + debug_output_pos_; 531 // During machine check we can execute some requests that legitimately fail. 532 // These requests have ReturnError flag, so that the failure is returned 533 // to the caller for analysis. Don't print SYZFAIL in these requests, 534 // otherwise it will be detected as a bug. 535 if (msg_ && IsSet(msg_->flags, rpc::RequestFlag::ReturnError)) { 536 char* syzfail = strstr(output, "SYZFAIL"); 537 if (syzfail) 538 memcpy(syzfail, "NOTFAIL", strlen("NOTFAIL")); 539 } 540 debug("proc %d: got output: %s%s", id_, output, has_nl ? "" : "\n"); 541 output_.resize(output_.size() - 1); 542 debug_output_pos_ = output_.size(); 543 } 544 return true; 545 } 546 547 uint32 ProgramTimeoutMs() const 548 { 549 // Glob requests can expand to >10K files and can take a while to run. 550 return opts_.program_timeout_ms * (req_type_ == rpc::RequestType::Program ? 1 : 10); 551 } 552 }; 553 554 // Runner manages a set of test subprocesses (Proc's), receives new test requests from the manager, 555 // and dispatches them to subprocesses. 556 class Runner 557 { 558 public: 559 Runner(Connection& conn, int vm_index, const char* bin) 560 : conn_(conn), 561 vm_index_(vm_index) 562 { 563 int num_procs = Handshake(); 564 proc_id_pool_.emplace(num_procs); 565 int max_signal_fd = max_signal_ ? max_signal_->FD() : -1; 566 int cover_filter_fd = cover_filter_ ? cover_filter_->FD() : -1; 567 for (int i = 0; i < num_procs; i++) 568 procs_.emplace_back(new Proc(conn, bin, *proc_id_pool_, restarting_, corpus_triaged_, 569 max_signal_fd, cover_filter_fd, proc_opts_)); 570 571 for (;;) 572 Loop(); 573 } 574 575 private: 576 Connection& conn_; 577 const int vm_index_; 578 std::optional<CoverFilter> max_signal_; 579 std::optional<CoverFilter> cover_filter_; 580 std::optional<ProcIDPool> proc_id_pool_; 581 std::vector<std::unique_ptr<Proc>> procs_; 582 std::deque<rpc::ExecRequestRawT> requests_; 583 std::vector<std::string> leak_frames_; 584 int restarting_ = 0; 585 bool corpus_triaged_ = false; 586 #if GOOS_linux 587 bool is_leak_enabled_ = false; 588 uint64 execs_since_leak_check_ = 0; 589 std::vector<char*> char_leak_frames_; 590 #endif 591 ProcOpts proc_opts_{}; 592 593 friend std::ostream& operator<<(std::ostream& ss, const Runner& runner) 594 { 595 ss << "vm_index=" << runner.vm_index_ 596 << " max_signal=" << !!runner.max_signal_ 597 << " cover_filter=" << !!runner.cover_filter_ 598 << " restarting=" << runner.restarting_ 599 << " corpus_triaged=" << runner.corpus_triaged_ 600 << " " << runner.proc_opts_ 601 << "\n"; 602 ss << "procs:\n"; 603 for (const auto& proc : runner.procs_) 604 ss << *proc; 605 ss << "\nqueued requests (" << runner.requests_.size() << "):\n"; 606 for (const auto& req : runner.requests_) 607 ss << req; 608 return ss; 609 } 610 611 void Loop() 612 { 613 Select select; 614 select.Arm(conn_.FD()); 615 for (auto& proc : procs_) 616 proc->Arm(select); 617 // Wait for ready host connection and subprocess pipes. 618 // Timeout is for terminating hanged subprocesses. 619 select.Wait(1000); 620 uint64 now = current_time_ms(); 621 622 if (select.Ready(conn_.FD())) { 623 rpc::HostMessageRawT raw; 624 conn_.Recv(raw); 625 if (auto* msg = raw.msg.AsExecRequest()) 626 Handle(*msg); 627 else if (auto* msg = raw.msg.AsSignalUpdate()) 628 Handle(*msg); 629 else if (auto* msg = raw.msg.AsCorpusTriaged()) 630 Handle(*msg); 631 else if (auto* msg = raw.msg.AsStateRequest()) 632 Handle(*msg); 633 else 634 failmsg("unknown host message type", "type=%d", static_cast<int>(raw.msg.type)); 635 } 636 637 #if GOOS_linux 638 if (IsScheduledForLeakCheck() && AreProcsIdle()) { 639 debug("Running leak check...\n"); 640 check_leaks(char_leak_frames_.data(), char_leak_frames_.size()); 641 debug("Done running leak check\n"); 642 execs_since_leak_check_ = 0; 643 } 644 #endif 645 646 for (auto& proc : procs_) { 647 proc->Ready(select, now, requests_.empty()); 648 if (!IsScheduledForLeakCheck() && !requests_.empty()) { 649 if (proc->Execute(requests_.front())) { 650 requests_.pop_front(); 651 #if GOOS_linux 652 ++execs_since_leak_check_; 653 #endif 654 } 655 } 656 } 657 658 if (restarting_ < 0 || restarting_ > static_cast<int>(procs_.size())) 659 failmsg("bad restarting", "restarting=%d", restarting_); 660 } 661 662 #if GOOS_linux 663 bool IsScheduledForLeakCheck() 664 { 665 const uint64 kRunLeakCheckEvery = 2 * procs_.size(); 666 return is_leak_enabled_ && 667 corpus_triaged_ && 668 execs_since_leak_check_ >= kRunLeakCheckEvery; 669 } 670 671 bool AreProcsIdle() 672 { 673 return std::all_of(procs_.begin(), procs_.end(), [](const std::unique_ptr<Proc>& proc) { 674 return proc->GetState() == Proc::State::Idle; 675 }); 676 } 677 #else 678 constexpr bool IsScheduledForLeakCheck() 679 { 680 return false; 681 } 682 #endif 683 684 // Implementation must match that in pkg/rpcserver/rpcserver.go. 685 uint64 HashAuthCookie(uint64 cookie) 686 { 687 const uint64_t prime1 = 73856093; 688 const uint64_t prime2 = 83492791; 689 690 return (cookie * prime1) ^ prime2; 691 } 692 693 int Handshake() 694 { 695 // Handshake stage 0: get a cookie from the manager. 696 rpc::ConnectHelloRawT conn_hello; 697 conn_.Recv(conn_hello); 698 699 // Handshake stage 1: share basic information about the client. 700 rpc::ConnectRequestRawT conn_req; 701 conn_req.cookie = HashAuthCookie(conn_hello.cookie); 702 conn_req.id = vm_index_; 703 conn_req.arch = GOARCH; 704 conn_req.git_revision = GIT_REVISION; 705 conn_req.syz_revision = SYZ_REVISION; 706 conn_.Send(conn_req); 707 708 rpc::ConnectReplyRawT conn_reply; 709 conn_.Recv(conn_reply); 710 if (conn_reply.debug) 711 flag_debug = true; 712 debug("connected to manager: procs=%d cover_edges=%d kernel_64_bit=%d slowdown=%d syscall_timeout=%u" 713 " program_timeout=%u features=0x%llx\n", 714 conn_reply.procs, conn_reply.cover_edges, conn_reply.kernel_64_bit, 715 conn_reply.slowdown, conn_reply.syscall_timeout_ms, 716 conn_reply.program_timeout_ms, static_cast<uint64>(conn_reply.features)); 717 leak_frames_ = conn_reply.leak_frames; 718 719 proc_opts_.use_cover_edges = conn_reply.cover_edges; 720 proc_opts_.is_kernel_64_bit = is_kernel_64_bit = conn_reply.kernel_64_bit; 721 proc_opts_.slowdown = conn_reply.slowdown; 722 proc_opts_.syscall_timeout_ms = conn_reply.syscall_timeout_ms; 723 proc_opts_.program_timeout_ms = conn_reply.program_timeout_ms; 724 if (conn_reply.cover) 725 max_signal_.emplace(); 726 727 // Handshake stage 2: share information requested by the manager. 728 rpc::InfoRequestRawT info_req; 729 info_req.files = ReadFiles(conn_reply.files); 730 731 // This does any one-time setup for the requested features on the machine. 732 // Note: this can be called multiple times and must be idempotent. 733 #if SYZ_HAVE_FEATURES 734 setup_sysctl(); 735 setup_cgroups(); 736 #endif 737 #if SYZ_HAVE_SETUP_EXT 738 // This can be defined in common_ext.h. 739 setup_ext(); 740 #endif 741 for (const auto& feat : features) { 742 if (!(conn_reply.features & feat.id)) 743 continue; 744 debug("setting up feature %s\n", rpc::EnumNameFeature(feat.id)); 745 const char* reason = feat.setup(); 746 conn_reply.features &= ~feat.id; 747 std::unique_ptr<rpc::FeatureInfoRawT> res(new rpc::FeatureInfoRawT); 748 res->id = feat.id; 749 res->need_setup = true; 750 if (reason) { 751 debug("failed: %s\n", reason); 752 res->reason = reason; 753 } 754 #if GOOS_linux 755 if (feat.id == rpc::Feature::Leak && !reason) { 756 is_leak_enabled_ = true; 757 for (auto& s : leak_frames_) 758 char_leak_frames_.push_back(s.data()); 759 } 760 #endif 761 info_req.features.push_back(std::move(res)); 762 } 763 for (auto id : rpc::EnumValuesFeature()) { 764 if (!(conn_reply.features & id)) 765 continue; 766 std::unique_ptr<rpc::FeatureInfoRawT> res(new rpc::FeatureInfoRawT); 767 res->id = id; 768 res->need_setup = false; 769 info_req.features.push_back(std::move(res)); 770 } 771 772 #if SYZ_HAVE_KCSAN 773 setup_kcsan_filter(conn_reply.race_frames); 774 #endif 775 776 conn_.Send(info_req); 777 778 rpc::InfoReplyRawT info_reply; 779 conn_.Recv(info_reply); 780 debug("received info reply: covfilter=%zu\n", info_reply.cover_filter.size()); 781 if (!info_reply.cover_filter.empty()) { 782 cover_filter_.emplace(); 783 for (auto pc : info_reply.cover_filter) 784 cover_filter_->Insert(pc); 785 } 786 787 Select::Prepare(conn_.FD()); 788 return conn_reply.procs; 789 } 790 791 void Handle(rpc::ExecRequestRawT& msg) 792 { 793 debug("recv exec request %llu: type=%llu flags=0x%llx env=0x%llx exec=0x%llx size=%zu\n", 794 static_cast<uint64>(msg.id), 795 static_cast<uint64>(msg.type), 796 static_cast<uint64>(msg.flags), 797 static_cast<uint64>(msg.exec_opts->env_flags()), 798 static_cast<uint64>(msg.exec_opts->exec_flags()), 799 msg.data.size()); 800 if (msg.type == rpc::RequestType::Binary) { 801 ExecuteBinary(msg); 802 return; 803 } 804 if (!IsScheduledForLeakCheck()) { 805 for (auto& proc : procs_) { 806 if (proc->Execute(msg)) 807 return; 808 } 809 } 810 requests_.push_back(std::move(msg)); 811 } 812 813 void Handle(const rpc::SignalUpdateRawT& msg) 814 { 815 debug("recv signal update: new=%zu\n", msg.new_max.size()); 816 if (!max_signal_) 817 fail("signal update when no signal filter installed"); 818 for (auto pc : msg.new_max) 819 max_signal_->Insert(pc); 820 } 821 822 void Handle(const rpc::CorpusTriagedRawT& msg) 823 { 824 // TODO: repair leak checking (#4728). 825 debug("recv corpus triaged\n"); 826 corpus_triaged_ = true; 827 } 828 829 void Handle(const rpc::StateRequestRawT& msg) 830 { 831 // Debug request about our internal state. 832 std::ostringstream ss; 833 ss << *this; 834 const std::string& str = ss.str(); 835 rpc::StateResultRawT res; 836 res.data.insert(res.data.begin(), str.data(), str.data() + str.size()); 837 rpc::ExecutorMessageRawT raw; 838 raw.msg.Set(std::move(res)); 839 conn_.Send(raw); 840 } 841 842 void ExecuteBinary(rpc::ExecRequestRawT& msg) 843 { 844 rpc::ExecutingMessageRawT exec; 845 exec.id = msg.id; 846 rpc::ExecutorMessageRawT raw; 847 raw.msg.Set(std::move(exec)); 848 conn_.Send(raw); 849 850 char dir_template[] = "syz-bin-dirXXXXXX"; 851 char* dir = mkdtemp(dir_template); 852 if (dir == nullptr) 853 fail("mkdtemp failed"); 854 if (chmod(dir, 0777)) 855 fail("chmod failed"); 856 auto [err, output] = ExecuteBinaryImpl(msg, dir); 857 if (!err.empty()) { 858 char tmp[64]; 859 snprintf(tmp, sizeof(tmp), " (errno %d: %s)", errno, strerror(errno)); 860 err += tmp; 861 } 862 remove_dir(dir); 863 rpc::ExecResultRawT res; 864 res.id = msg.id; 865 res.error = std::move(err); 866 res.output = std::move(output); 867 raw.msg.Set(std::move(res)); 868 conn_.Send(raw); 869 } 870 871 std::tuple<std::string, std::vector<uint8_t>> ExecuteBinaryImpl(rpc::ExecRequestRawT& msg, const char* dir) 872 { 873 // For simplicity we just wait for binary tests to complete blocking everything else. 874 std::string file = std::string(dir) + "/syz-executor"; 875 int fd = open(file.c_str(), O_WRONLY | O_CLOEXEC | O_CREAT, 0755); 876 if (fd == -1) 877 return {"binary file creation failed", {}}; 878 ssize_t wrote = write(fd, msg.data.data(), msg.data.size()); 879 close(fd); 880 if (wrote != static_cast<ssize_t>(msg.data.size())) 881 return {"binary file write failed", {}}; 882 883 int stdin_pipe[2]; 884 if (pipe(stdin_pipe)) 885 fail("pipe failed"); 886 int stdout_pipe[2]; 887 if (pipe(stdout_pipe)) 888 fail("pipe failed"); 889 890 const char* argv[] = {file.c_str(), nullptr}; 891 std::vector<std::pair<int, int>> fds = { 892 {stdin_pipe[0], STDIN_FILENO}, 893 {stdout_pipe[1], STDOUT_FILENO}, 894 {stdout_pipe[1], STDERR_FILENO}, 895 }; 896 Subprocess process(argv, fds); 897 898 close(stdin_pipe[0]); 899 close(stdout_pipe[1]); 900 901 int status = process.WaitAndKill(5 * proc_opts_.program_timeout_ms); 902 903 std::vector<uint8_t> output; 904 for (;;) { 905 const size_t kChunk = 1024; 906 output.resize(output.size() + kChunk); 907 ssize_t n = read(stdout_pipe[0], output.data() + output.size() - kChunk, kChunk); 908 output.resize(output.size() - kChunk + std::max<ssize_t>(n, 0)); 909 if (n <= 0) 910 break; 911 } 912 close(stdin_pipe[1]); 913 close(stdout_pipe[0]); 914 915 return {status == kFailStatus ? "process failed" : "", std::move(output)}; 916 } 917 }; 918 919 static void SigintHandler(int sig) 920 { 921 // GCE VM preemption is signalled as SIGINT, notify syz-manager. 922 exitf("SYZ-EXECUTOR: PREEMPTED"); 923 } 924 925 static void SigchldHandler(int sig) 926 { 927 // We need just blocking syscall preemption. 928 } 929 930 static void FatalHandler(int sig, siginfo_t* info, void* ucontext) 931 { 932 // Print minimal debugging info we can extract reasonably easy. 933 uintptr_t pc = 0xdeadbeef; 934 #if GOOS_linux 935 auto& mctx = static_cast<ucontext_t*>(ucontext)->uc_mcontext; 936 (void)mctx; 937 #if GOARCH_amd64 938 pc = mctx.gregs[REG_RIP]; 939 #elif GOARCH_arm64 940 pc = mctx.pc; 941 #endif 942 #endif 943 const char* name = "unknown signal"; 944 switch (sig) { 945 case SIGSEGV: 946 name = "SIGSEGV"; 947 break; 948 case SIGBUS: 949 name = "SIGBUS"; 950 break; 951 case SIGILL: 952 name = "SIGILL"; 953 break; 954 case SIGFPE: 955 name = "SIGFPE"; 956 break; 957 } 958 // Print the current function PC so that it's possible to map the failing PC 959 // to a symbol in the binary offline (we usually compile as PIE). 960 failmsg(name, "pc-offset:0x%zx pc:%p addr:%p code=%d", 961 reinterpret_cast<uintptr_t>(reinterpret_cast<void*>(FatalHandler)) - pc, 962 reinterpret_cast<void*>(pc), info->si_addr, info->si_code); 963 } 964 965 static void runner(char** argv, int argc) 966 { 967 if (argc != 5) 968 fail("usage: syz-executor runner <index> <manager-addr> <manager-port>"); 969 char* endptr = nullptr; 970 int vm_index = strtol(argv[2], &endptr, 10); 971 if (vm_index < 0 || *endptr != 0) 972 failmsg("failed to parse VM index", "str='%s'", argv[2]); 973 const char* const manager_addr = argv[3]; 974 const char* const manager_port = argv[4]; 975 976 struct rlimit rlim; 977 rlim.rlim_cur = rlim.rlim_max = kFdLimit; 978 if (setrlimit(RLIMIT_NOFILE, &rlim)) 979 fail("setrlimit(RLIMIT_NOFILE) failed"); 980 981 // Ignore all signals we are not interested in. 982 // In particular we want to ignore SIGPIPE, but also everything else since 983 // test processes manage to send random signals using tracepoints with bpf programs. 984 // This is not a bullet-proof protection, but it won't harm either. 985 for (int sig = 0; sig <= 64; sig++) 986 signal(sig, SIG_IGN); 987 if (signal(SIGINT, SigintHandler) == SIG_ERR) 988 fail("signal(SIGINT) failed"); 989 if (signal(SIGTERM, SigintHandler) == SIG_ERR) 990 fail("signal(SIGTERM) failed"); 991 if (signal(SIGCHLD, SigchldHandler) == SIG_ERR) 992 fail("signal(SIGCHLD) failed"); 993 struct sigaction act = {}; 994 act.sa_flags = SA_SIGINFO; 995 act.sa_sigaction = FatalHandler; 996 for (auto sig : {SIGSEGV, SIGBUS, SIGILL, SIGFPE}) { 997 if (sigaction(sig, &act, nullptr)) 998 failmsg("sigaction failed", "sig=%d", sig); 999 } 1000 1001 Connection conn(manager_addr, manager_port); 1002 1003 // This is required to make Subprocess fd remapping logic work. 1004 // kCoverFilterFd is the largest fd we set in the child processes. 1005 for (int fd = conn.FD(); fd < kCoverFilterFd;) 1006 fd = dup(fd); 1007 1008 Runner(conn, vm_index, argv[0]); 1009 }