github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/executor/executor.cc (about) 1 // Copyright 2017 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 // +build 5 6 // Currently this is unused (included only to test building). 7 #include "pkg/flatrpc/flatrpc.h" 8 9 #include <algorithm> 10 #include <errno.h> 11 #include <limits.h> 12 #include <signal.h> 13 #include <stdarg.h> 14 #include <stddef.h> 15 #include <stdint.h> 16 #include <stdio.h> 17 #include <stdlib.h> 18 #include <string.h> 19 #include <time.h> 20 21 #if !GOOS_windows 22 #include <unistd.h> 23 #endif 24 25 #include "defs.h" 26 27 #if defined(__GNUC__) 28 #define SYSCALLAPI 29 #define NORETURN __attribute__((noreturn)) 30 #define PRINTF(fmt, args) __attribute__((format(printf, fmt, args))) 31 #else 32 // Assuming windows/cl. 33 #define SYSCALLAPI WINAPI 34 #define NORETURN __declspec(noreturn) 35 #define PRINTF(fmt, args) 36 #define __thread __declspec(thread) 37 #endif 38 39 #ifndef GIT_REVISION 40 #define GIT_REVISION "unknown" 41 #endif 42 43 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 44 45 // uint64 is impossible to printf without using the clumsy and verbose "%" PRId64. 46 // So we define and use uint64. Note: pkg/csource does s/uint64/uint64/. 47 // Also define uint32/16/8 for consistency. 48 typedef unsigned long long uint64; 49 typedef unsigned int uint32; 50 typedef unsigned short uint16; 51 typedef unsigned char uint8; 52 53 // exit/_exit do not necessary work (e.g. if fuzzer sets seccomp filter that prohibits exit_group). 54 // Use doexit instead. We must redefine exit to something that exists in stdlib, 55 // because some standard libraries contain "using ::exit;", but has different signature. 56 #define exit vsnprintf 57 58 // Dynamic memory allocation reduces test reproducibility across different libc versions and kernels. 59 // malloc will cause unspecified number of additional mmap's at unspecified locations. 60 // For small objects prefer stack allocations, for larger -- either global objects (this may have 61 // issues with concurrency), or controlled mmaps, or make the fuzzer allocate memory. 62 #define malloc do_not_use_malloc 63 #define calloc do_not_use_calloc 64 65 // Note: zircon max fd is 256. 66 // Some common_OS.h files know about this constant for RLIMIT_NOFILE. 67 const int kMaxFd = 250; 68 const int kMaxThreads = 32; 69 const int kInPipeFd = kMaxFd - 1; // remapped from stdin 70 const int kOutPipeFd = kMaxFd - 2; // remapped from stdout 71 const int kCoverFd = kOutPipeFd - kMaxThreads; 72 const int kExtraCoverFd = kCoverFd - 1; 73 const int kMaxArgs = 9; 74 const int kCoverSize = 256 << 10; 75 const int kFailStatus = 67; 76 77 // Two approaches of dealing with kcov memory. 78 const int kCoverOptimizedCount = 12; // the number of kcov instances to be opened inside main() 79 const int kCoverOptimizedPreMmap = 3; // this many will be mmapped inside main(), others - when needed. 80 const int kCoverDefaultCount = 6; // otherwise we only init kcov instances inside main() 81 82 // Logical error (e.g. invalid input program), use as an assert() alternative. 83 // If such error happens 10+ times in a row, it will be detected as a bug by syz-fuzzer. 84 // syz-fuzzer will fail and syz-manager will create a bug for this. 85 // Note: err is used for bug deduplication, thus distinction between err (constant message) 86 // and msg (varying part). 87 static NORETURN void fail(const char* err); 88 static NORETURN PRINTF(2, 3) void failmsg(const char* err, const char* msg, ...); 89 // Just exit (e.g. due to temporal ENOMEM error). 90 static NORETURN PRINTF(1, 2) void exitf(const char* msg, ...); 91 static NORETURN void doexit(int status); 92 #if !GOOS_fuchsia 93 static NORETURN void doexit_thread(int status); 94 #endif 95 96 // Print debug output that is visible when running syz-manager/execprog with -debug flag. 97 // Debug output is supposed to be relatively high-level (syscalls executed, return values, timing, etc) 98 // and is intended mostly for end users. If you need to debug lower-level details, use debug_verbose 99 // function and temporary enable it in your build by changing #if 0 below. 100 // This function does not add \n at the end of msg as opposed to the previous functions. 101 static PRINTF(1, 2) void debug(const char* msg, ...); 102 void debug_dump_data(const char* data, int length); 103 104 #if 0 105 #define debug_verbose(...) debug(__VA_ARGS__) 106 #else 107 #define debug_verbose(...) (void)0 108 #endif 109 110 static void receive_execute(); 111 static void reply_execute(int status); 112 113 #if SYZ_EXECUTOR_USES_FORK_SERVER 114 static void receive_handshake(); 115 static void reply_handshake(); 116 #endif 117 118 #if SYZ_EXECUTOR_USES_SHMEM 119 // The output region is the only thing in executor process for which consistency matters. 120 // If it is corrupted ipc package will fail to parse its contents and panic. 121 // But fuzzer constantly invents new ways of how to corrupt the region, 122 // so we map the region at a (hopefully) hard to guess address with random offset, 123 // surrounded by unmapped pages. 124 // The address chosen must also work on 32-bit kernels with 1GB user address space. 125 const uint64 kOutputBase = 0x1b2bc20000ull; 126 127 #if SYZ_EXECUTOR_USES_FORK_SERVER 128 // Allocating (and forking) virtual memory for each executed process is expensive, so we only mmap 129 // the amount we might possibly need for the specific received prog. 130 const int kMaxOutputComparisons = 14 << 20; // executions with comparsions enabled are usually < 1% of all executions 131 const int kMaxOutputCoverage = 6 << 20; // coverage is needed in ~ up to 1/3 of all executions (depending on corpus rotation) 132 const int kMaxOutputSignal = 4 << 20; 133 const int kMinOutput = 256 << 10; // if we don't need to send signal, the output is rather short. 134 const int kInitialOutput = kMinOutput; // the minimal size to be allocated in the parent process 135 #else 136 // We don't fork and allocate the memory only once, so prepare for the worst case. 137 const int kInitialOutput = 14 << 20; 138 #endif 139 140 // TODO: allocate a smaller amount of memory in the parent once we merge the patches that enable 141 // prog execution with neither signal nor coverage. Likely 64kb will be enough in that case. 142 143 const int kInFd = 3; 144 const int kOutFd = 4; 145 static uint32* output_data; 146 static uint32* output_pos; 147 static int output_size; 148 static void mmap_output(int size); 149 static uint32* write_output(uint32 v); 150 static uint32* write_output_64(uint64 v); 151 static void write_completed(uint32 completed); 152 static uint32 hash(uint32 a); 153 static bool dedup(uint32 sig); 154 #endif // if SYZ_EXECUTOR_USES_SHMEM 155 156 uint64 start_time_ms = 0; 157 158 static bool flag_debug; 159 static bool flag_coverage; 160 static bool flag_sandbox_none; 161 static bool flag_sandbox_setuid; 162 static bool flag_sandbox_namespace; 163 static bool flag_sandbox_android; 164 static bool flag_extra_coverage; 165 static bool flag_net_injection; 166 static bool flag_net_devices; 167 static bool flag_net_reset; 168 static bool flag_cgroups; 169 static bool flag_close_fds; 170 static bool flag_devlink_pci; 171 static bool flag_nic_vf; 172 static bool flag_vhci_injection; 173 static bool flag_wifi; 174 static bool flag_delay_kcov_mmap; 175 176 static bool flag_collect_cover; 177 static bool flag_collect_signal; 178 static bool flag_dedup_cover; 179 static bool flag_threaded; 180 static bool flag_coverage_filter; 181 182 // If true, then executor should write the comparisons data to fuzzer. 183 static bool flag_comparisons; 184 185 // Tunable timeouts, received with execute_req. 186 static uint64 syscall_timeout_ms; 187 static uint64 program_timeout_ms; 188 static uint64 slowdown_scale; 189 190 // Can be used to disginguish whether we're at the initialization stage 191 // or we already execute programs. 192 static bool in_execute_one = false; 193 194 #define SYZ_EXECUTOR 1 195 #include "common.h" 196 197 const int kMaxInput = 4 << 20; // keep in sync with prog.ExecBufferSize 198 const int kMaxCommands = 1000; // prog package knows about this constant (prog.execMaxCommands) 199 200 const uint64 instr_eof = -1; 201 const uint64 instr_copyin = -2; 202 const uint64 instr_copyout = -3; 203 const uint64 instr_setprops = -4; 204 205 const uint64 arg_const = 0; 206 const uint64 arg_addr32 = 1; 207 const uint64 arg_addr64 = 2; 208 const uint64 arg_result = 3; 209 const uint64 arg_data = 4; 210 const uint64 arg_csum = 5; 211 212 const uint64 binary_format_native = 0; 213 const uint64 binary_format_bigendian = 1; 214 const uint64 binary_format_strdec = 2; 215 const uint64 binary_format_strhex = 3; 216 const uint64 binary_format_stroct = 4; 217 218 const uint64 no_copyout = -1; 219 220 static int running; 221 uint32 completed; 222 bool is_kernel_64_bit = true; 223 224 static uint8* input_data; 225 226 // Checksum kinds. 227 static const uint64 arg_csum_inet = 0; 228 229 // Checksum chunk kinds. 230 static const uint64 arg_csum_chunk_data = 0; 231 static const uint64 arg_csum_chunk_const = 1; 232 233 typedef intptr_t(SYSCALLAPI* syscall_t)(intptr_t, intptr_t, intptr_t, intptr_t, intptr_t, intptr_t, intptr_t, intptr_t, intptr_t); 234 235 struct call_t { 236 const char* name; 237 int sys_nr; 238 call_attrs_t attrs; 239 syscall_t call; 240 }; 241 242 struct cover_t { 243 int fd; 244 uint32 size; 245 uint32 mmap_alloc_size; 246 char* data; 247 char* data_end; 248 // Note: On everything but darwin the first value in data is the count of 249 // recorded PCs, followed by the PCs. We therefore set data_offset to the 250 // size of one PC. 251 // On darwin data points to an instance of the ksancov_trace struct. Here we 252 // set data_offset to the offset between data and the structs 'pcs' member, 253 // which contains the PCs. 254 intptr_t data_offset; 255 // Note: On everything but darwin this is 0, as the PCs contained in data 256 // are already correct. XNUs KSANCOV API, however, chose to always squeeze 257 // PCs into 32 bit. To make the recorded PC fit, KSANCOV substracts a fixed 258 // offset (VM_MIN_KERNEL_ADDRESS for AMD64) and then truncates the result to 259 // uint32_t. We get this from the 'offset' member in ksancov_trace. 260 intptr_t pc_offset; 261 }; 262 263 struct thread_t { 264 int id; 265 bool created; 266 event_t ready; 267 event_t done; 268 uint8* copyout_pos; 269 uint64 copyout_index; 270 bool executing; 271 int call_index; 272 int call_num; 273 int num_args; 274 intptr_t args[kMaxArgs]; 275 call_props_t call_props; 276 intptr_t res; 277 uint32 reserrno; 278 bool fault_injected; 279 cover_t cov; 280 bool soft_fail_state; 281 }; 282 283 static thread_t threads[kMaxThreads]; 284 static thread_t* last_scheduled; 285 // Threads use this variable to access information about themselves. 286 static __thread struct thread_t* current_thread; 287 288 static cover_t extra_cov; 289 290 struct res_t { 291 bool executed; 292 uint64 val; 293 }; 294 295 static res_t results[kMaxCommands]; 296 297 const uint64 kInMagic = 0xbadc0ffeebadface; 298 const uint32 kOutMagic = 0xbadf00d; 299 300 struct handshake_req { 301 uint64 magic; 302 uint64 flags; // env flags 303 uint64 pid; 304 uint64 sandbox_arg; 305 }; 306 307 struct handshake_reply { 308 uint32 magic; 309 }; 310 311 struct execute_req { 312 uint64 magic; 313 uint64 env_flags; 314 uint64 exec_flags; 315 uint64 pid; 316 uint64 syscall_timeout_ms; 317 uint64 program_timeout_ms; 318 uint64 slowdown_scale; 319 uint64 prog_size; 320 }; 321 322 struct execute_reply { 323 uint32 magic; 324 uint32 done; 325 uint32 status; 326 }; 327 328 // call_reply.flags 329 const uint32 call_flag_executed = 1 << 0; 330 const uint32 call_flag_finished = 1 << 1; 331 const uint32 call_flag_blocked = 1 << 2; 332 const uint32 call_flag_fault_injected = 1 << 3; 333 334 struct call_reply { 335 execute_reply header; 336 uint32 magic; 337 uint32 call_index; 338 uint32 call_num; 339 uint32 reserrno; 340 uint32 flags; 341 uint32 signal_size; 342 uint32 cover_size; 343 uint32 comps_size; 344 // signal/cover/comps follow 345 }; 346 347 enum { 348 KCOV_CMP_CONST = 1, 349 KCOV_CMP_SIZE1 = 0, 350 KCOV_CMP_SIZE2 = 2, 351 KCOV_CMP_SIZE4 = 4, 352 KCOV_CMP_SIZE8 = 6, 353 KCOV_CMP_SIZE_MASK = 6, 354 }; 355 356 struct kcov_comparison_t { 357 // Note: comparisons are always 64-bits regardless of kernel bitness. 358 uint64 type; 359 uint64 arg1; 360 uint64 arg2; 361 uint64 pc; 362 363 bool ignore() const; 364 void write(); 365 bool operator==(const struct kcov_comparison_t& other) const; 366 bool operator<(const struct kcov_comparison_t& other) const; 367 }; 368 369 typedef char kcov_comparison_size[sizeof(kcov_comparison_t) == 4 * sizeof(uint64) ? 1 : -1]; 370 371 struct feature_t { 372 rpc::Feature id; 373 void (*setup)(); 374 }; 375 376 static thread_t* schedule_call(int call_index, int call_num, uint64 copyout_index, uint64 num_args, uint64* args, uint8* pos, call_props_t call_props); 377 static void handle_completion(thread_t* th); 378 static void copyout_call_results(thread_t* th); 379 static void write_call_output(thread_t* th, bool finished); 380 static void write_extra_output(); 381 static void execute_call(thread_t* th); 382 static void thread_create(thread_t* th, int id, bool need_coverage); 383 static void thread_mmap_cover(thread_t* th); 384 static void* worker_thread(void* arg); 385 static uint64 read_input(uint8** input_posp, bool peek = false); 386 static uint64 read_arg(uint8** input_posp); 387 static uint64 read_const_arg(uint8** input_posp, uint64* size_p, uint64* bf, uint64* bf_off_p, uint64* bf_len_p); 388 static uint64 read_result(uint8** input_posp); 389 static uint64 swap(uint64 v, uint64 size, uint64 bf); 390 static void copyin(char* addr, uint64 val, uint64 size, uint64 bf, uint64 bf_off, uint64 bf_len); 391 static bool copyout(char* addr, uint64 size, uint64* res); 392 static void setup_control_pipes(); 393 static void setup_features(char** enable, int n); 394 395 #include "syscalls.h" 396 397 #if GOOS_linux 398 #include "executor_linux.h" 399 #elif GOOS_fuchsia 400 #include "executor_fuchsia.h" 401 #elif GOOS_freebsd || GOOS_netbsd || GOOS_openbsd 402 #include "executor_bsd.h" 403 #elif GOOS_darwin 404 #include "executor_darwin.h" 405 #elif GOOS_windows 406 #include "executor_windows.h" 407 #elif GOOS_test 408 #include "executor_test.h" 409 #else 410 #error "unknown OS" 411 #endif 412 413 #include "cov_filter.h" 414 415 #include "test.h" 416 417 #if SYZ_HAVE_SANDBOX_ANDROID 418 static uint64 sandbox_arg = 0; 419 #endif 420 421 int main(int argc, char** argv) 422 { 423 if (argc == 2 && strcmp(argv[1], "version") == 0) { 424 puts(GOOS " " GOARCH " " SYZ_REVISION " " GIT_REVISION); 425 return 0; 426 } 427 if (argc >= 2 && strcmp(argv[1], "setup") == 0) { 428 setup_features(argv + 2, argc - 2); 429 return 0; 430 } 431 if (argc >= 2 && strcmp(argv[1], "leak") == 0) { 432 #if SYZ_HAVE_LEAK_CHECK 433 check_leaks(argv + 2, argc - 2); 434 #else 435 fail("leak checking is not implemented"); 436 #endif 437 return 0; 438 } 439 if (argc >= 2 && strcmp(argv[1], "setup_kcsan_filterlist") == 0) { 440 #if SYZ_HAVE_KCSAN 441 setup_kcsan_filterlist(argv + 2, argc - 2, true); 442 #else 443 fail("KCSAN is not implemented"); 444 #endif 445 return 0; 446 } 447 if (argc == 2 && strcmp(argv[1], "test") == 0) 448 return run_tests(); 449 450 if (argc < 2 || strcmp(argv[1], "exec") != 0) { 451 fprintf(stderr, "unknown command"); 452 return 1; 453 } 454 455 start_time_ms = current_time_ms(); 456 457 os_init(argc, argv, (char*)SYZ_DATA_OFFSET, SYZ_NUM_PAGES * SYZ_PAGE_SIZE); 458 current_thread = &threads[0]; 459 460 #if SYZ_EXECUTOR_USES_SHMEM 461 void* mmap_out = mmap(NULL, kMaxInput, PROT_READ, MAP_PRIVATE, kInFd, 0); 462 #else 463 void* mmap_out = mmap(NULL, kMaxInput, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); 464 #endif 465 if (mmap_out == MAP_FAILED) 466 fail("mmap of input file failed"); 467 input_data = static_cast<uint8*>(mmap_out); 468 469 #if SYZ_EXECUTOR_USES_SHMEM 470 mmap_output(kInitialOutput); 471 // Prevent test programs to mess with these fds. 472 // Due to races in collider mode, a program can e.g. ftruncate one of these fds, 473 // which will cause fuzzer to crash. 474 close(kInFd); 475 #if !SYZ_EXECUTOR_USES_FORK_SERVER 476 close(kOutFd); 477 #endif 478 // For SYZ_EXECUTOR_USES_FORK_SERVER, close(kOutFd) is invoked in the forked child, 479 // after the program has been received. 480 #endif // if SYZ_EXECUTOR_USES_SHMEM 481 482 use_temporary_dir(); 483 install_segv_handler(); 484 setup_control_pipes(); 485 #if SYZ_EXECUTOR_USES_FORK_SERVER 486 receive_handshake(); 487 #else 488 receive_execute(); 489 #endif 490 if (flag_coverage) { 491 int create_count = kCoverDefaultCount, mmap_count = create_count; 492 if (flag_delay_kcov_mmap) { 493 create_count = kCoverOptimizedCount; 494 mmap_count = kCoverOptimizedPreMmap; 495 } 496 if (create_count > kMaxThreads) 497 create_count = kMaxThreads; 498 for (int i = 0; i < create_count; i++) { 499 threads[i].cov.fd = kCoverFd + i; 500 cover_open(&threads[i].cov, false); 501 if (i < mmap_count) { 502 // Pre-mmap coverage collection for some threads. This should be enough for almost 503 // all programs, for the remaning few ones coverage will be set up when it's needed. 504 thread_mmap_cover(&threads[i]); 505 } 506 } 507 extra_cov.fd = kExtraCoverFd; 508 cover_open(&extra_cov, true); 509 cover_mmap(&extra_cov); 510 cover_protect(&extra_cov); 511 if (flag_extra_coverage) { 512 // Don't enable comps because we don't use them in the fuzzer yet. 513 cover_enable(&extra_cov, false, true); 514 } 515 char sep = '/'; 516 #if GOOS_windows 517 sep = '\\'; 518 #endif 519 char filename[1024] = {0}; 520 char* end = strrchr(argv[0], sep); 521 size_t len = end - argv[0]; 522 strncpy(filename, argv[0], len + 1); 523 strncat(filename, "syz-cover-bitmap", 17); 524 filename[sizeof(filename) - 1] = '\0'; 525 init_coverage_filter(filename); 526 } 527 528 int status = 0; 529 if (flag_sandbox_none) 530 status = do_sandbox_none(); 531 #if SYZ_HAVE_SANDBOX_SETUID 532 else if (flag_sandbox_setuid) 533 status = do_sandbox_setuid(); 534 #endif 535 #if SYZ_HAVE_SANDBOX_NAMESPACE 536 else if (flag_sandbox_namespace) 537 status = do_sandbox_namespace(); 538 #endif 539 #if SYZ_HAVE_SANDBOX_ANDROID 540 else if (flag_sandbox_android) 541 status = do_sandbox_android(sandbox_arg); 542 #endif 543 else 544 fail("unknown sandbox type"); 545 546 #if SYZ_EXECUTOR_USES_FORK_SERVER 547 fprintf(stderr, "loop exited with status %d\n", status); 548 // Other statuses happen when fuzzer processes manages to kill loop, e.g. with: 549 // ptrace(PTRACE_SEIZE, 1, 0, 0x100040) 550 if (status != kFailStatus) 551 status = 0; 552 // If an external sandbox process wraps executor, the out pipe will be closed 553 // before the sandbox process exits this will make ipc package kill the sandbox. 554 // As the result sandbox process will exit with exit status 9 instead of the executor 555 // exit status (notably kFailStatus). So we duplicate the exit status on the pipe. 556 reply_execute(status); 557 doexit(status); 558 // Unreachable. 559 return 1; 560 #else 561 reply_execute(status); 562 return status; 563 #endif 564 } 565 566 #if SYZ_EXECUTOR_USES_SHMEM 567 // This method can be invoked as many times as one likes - MMAP_FIXED can overwrite the previous 568 // mapping without any problems. The only precondition - kOutFd must not be closed. 569 static void mmap_output(int size) 570 { 571 if (size <= output_size) 572 return; 573 if (size % SYZ_PAGE_SIZE != 0) 574 failmsg("trying to mmap output area that is not divisible by page size", "page=%d,area=%d", SYZ_PAGE_SIZE, size); 575 uint32* mmap_at = NULL; 576 if (output_data == NULL) { 577 // It's the first time we map output region - generate its location. 578 output_data = mmap_at = (uint32*)(kOutputBase + (1 << 20) * (getpid() % 128)); 579 } else { 580 // We are expanding the mmapped region. Adjust the parameters to avoid mmapping already 581 // mmapped area as much as possible. 582 // There exists a mremap call that could have helped, but it's purely Linux-specific. 583 mmap_at = (uint32*)((char*)(output_data) + output_size); 584 } 585 void* result = mmap(mmap_at, size - output_size, 586 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, kOutFd, output_size); 587 if (result != mmap_at) 588 failmsg("mmap of output file failed", "want %p, got %p", mmap_at, result); 589 output_size = size; 590 } 591 #endif 592 593 void setup_control_pipes() 594 { 595 if (dup2(0, kInPipeFd) < 0) 596 fail("dup2(0, kInPipeFd) failed"); 597 if (dup2(1, kOutPipeFd) < 0) 598 fail("dup2(1, kOutPipeFd) failed"); 599 if (dup2(2, 1) < 0) 600 fail("dup2(2, 1) failed"); 601 // We used to close(0), but now we dup stderr to stdin to keep fd numbers 602 // stable across executor and C programs generated by pkg/csource. 603 if (dup2(2, 0) < 0) 604 fail("dup2(2, 0) failed"); 605 } 606 607 void parse_env_flags(uint64 flags) 608 { 609 // Note: Values correspond to ordering in pkg/ipc/ipc.go, e.g. FlagSandboxNamespace 610 flag_debug = flags & (1 << 0); 611 flag_coverage = flags & (1 << 1); 612 if (flags & (1 << 2)) 613 flag_sandbox_setuid = true; 614 else if (flags & (1 << 3)) 615 flag_sandbox_namespace = true; 616 else if (flags & (1 << 4)) 617 flag_sandbox_android = true; 618 else 619 flag_sandbox_none = true; 620 flag_extra_coverage = flags & (1 << 5); 621 flag_net_injection = flags & (1 << 6); 622 flag_net_devices = flags & (1 << 7); 623 flag_net_reset = flags & (1 << 8); 624 flag_cgroups = flags & (1 << 9); 625 flag_close_fds = flags & (1 << 10); 626 flag_devlink_pci = flags & (1 << 11); 627 flag_vhci_injection = flags & (1 << 12); 628 flag_wifi = flags & (1 << 13); 629 flag_delay_kcov_mmap = flags & (1 << 14); 630 flag_nic_vf = flags & (1 << 15); 631 } 632 633 #if SYZ_EXECUTOR_USES_FORK_SERVER 634 void receive_handshake() 635 { 636 handshake_req req = {}; 637 int n = read(kInPipeFd, &req, sizeof(req)); 638 if (n != sizeof(req)) 639 failmsg("handshake read failed", "read=%d", n); 640 if (req.magic != kInMagic) 641 failmsg("bad handshake magic", "magic=0x%llx", req.magic); 642 #if SYZ_HAVE_SANDBOX_ANDROID 643 sandbox_arg = req.sandbox_arg; 644 #endif 645 parse_env_flags(req.flags); 646 procid = req.pid; 647 } 648 649 void reply_handshake() 650 { 651 handshake_reply reply = {}; 652 reply.magic = kOutMagic; 653 if (write(kOutPipeFd, &reply, sizeof(reply)) != sizeof(reply)) 654 fail("control pipe write failed"); 655 } 656 #endif 657 658 static execute_req last_execute_req; 659 660 void receive_execute() 661 { 662 execute_req& req = last_execute_req; 663 if (read(kInPipeFd, &req, sizeof(req)) != (ssize_t)sizeof(req)) 664 fail("control pipe read failed"); 665 if (req.magic != kInMagic) 666 failmsg("bad execute request magic", "magic=0x%llx", req.magic); 667 if (req.prog_size > kMaxInput) 668 failmsg("bad execute prog size", "size=0x%llx", req.prog_size); 669 parse_env_flags(req.env_flags); 670 procid = req.pid; 671 syscall_timeout_ms = req.syscall_timeout_ms; 672 program_timeout_ms = req.program_timeout_ms; 673 slowdown_scale = req.slowdown_scale; 674 flag_collect_signal = req.exec_flags & (1 << 0); 675 flag_collect_cover = req.exec_flags & (1 << 1); 676 flag_dedup_cover = req.exec_flags & (1 << 2); 677 flag_comparisons = req.exec_flags & (1 << 3); 678 flag_threaded = req.exec_flags & (1 << 4); 679 flag_coverage_filter = req.exec_flags & (1 << 5); 680 681 debug("[%llums] exec opts: procid=%llu threaded=%d cover=%d comps=%d dedup=%d signal=%d" 682 " timeouts=%llu/%llu/%llu prog=%llu filter=%d\n", 683 current_time_ms() - start_time_ms, procid, flag_threaded, flag_collect_cover, 684 flag_comparisons, flag_dedup_cover, flag_collect_signal, syscall_timeout_ms, 685 program_timeout_ms, slowdown_scale, req.prog_size, flag_coverage_filter); 686 if (syscall_timeout_ms == 0 || program_timeout_ms <= syscall_timeout_ms || slowdown_scale == 0) 687 failmsg("bad timeouts", "syscall=%llu, program=%llu, scale=%llu", 688 syscall_timeout_ms, program_timeout_ms, slowdown_scale); 689 if (SYZ_EXECUTOR_USES_SHMEM) { 690 if (req.prog_size) 691 fail("need_prog: no program"); 692 return; 693 } 694 if (req.prog_size == 0) 695 fail("need_prog: no program"); 696 uint64 pos = 0; 697 for (;;) { 698 ssize_t rv = read(kInPipeFd, input_data + pos, kMaxInput - pos); 699 if (rv < 0) 700 fail("read failed"); 701 pos += rv; 702 if (rv == 0 || pos >= req.prog_size) 703 break; 704 } 705 if (pos != req.prog_size) 706 failmsg("bad input size", "size=%lld, want=%lld", pos, req.prog_size); 707 } 708 709 bool cover_collection_required() 710 { 711 return flag_coverage && (flag_collect_signal || flag_collect_cover || flag_comparisons); 712 } 713 714 void reply_execute(int status) 715 { 716 execute_reply reply = {}; 717 reply.magic = kOutMagic; 718 reply.done = true; 719 reply.status = status; 720 if (write(kOutPipeFd, &reply, sizeof(reply)) != sizeof(reply)) 721 fail("control pipe write failed"); 722 } 723 724 #if SYZ_EXECUTOR_USES_SHMEM 725 void realloc_output_data() 726 { 727 #if SYZ_EXECUTOR_USES_FORK_SERVER 728 if (flag_comparisons) 729 mmap_output(kMaxOutputComparisons); 730 else if (flag_collect_cover) 731 mmap_output(kMaxOutputCoverage); 732 else if (flag_collect_signal) 733 mmap_output(kMaxOutputSignal); 734 if (close(kOutFd) < 0) 735 fail("failed to close kOutFd"); 736 #endif 737 } 738 #endif // if SYZ_EXECUTOR_USES_SHMEM 739 740 // execute_one executes program stored in input_data. 741 void execute_one() 742 { 743 in_execute_one = true; 744 #if SYZ_EXECUTOR_USES_SHMEM 745 realloc_output_data(); 746 output_pos = output_data; 747 write_output(0); // Number of executed syscalls (updated later). 748 #endif // if SYZ_EXECUTOR_USES_SHMEM 749 uint64 start = current_time_ms(); 750 uint8* input_pos = input_data; 751 752 if (cover_collection_required()) { 753 if (!flag_threaded) 754 cover_enable(&threads[0].cov, flag_comparisons, false); 755 if (flag_extra_coverage) 756 cover_reset(&extra_cov); 757 } 758 759 int call_index = 0; 760 uint64 prog_extra_timeout = 0; 761 uint64 prog_extra_cover_timeout = 0; 762 call_props_t call_props; 763 memset(&call_props, 0, sizeof(call_props)); 764 765 read_input(&input_pos); // total number of calls 766 for (;;) { 767 uint64 call_num = read_input(&input_pos); 768 if (call_num == instr_eof) 769 break; 770 if (call_num == instr_copyin) { 771 char* addr = (char*)(read_input(&input_pos) + SYZ_DATA_OFFSET); 772 uint64 typ = read_input(&input_pos); 773 switch (typ) { 774 case arg_const: { 775 uint64 size, bf, bf_off, bf_len; 776 uint64 arg = read_const_arg(&input_pos, &size, &bf, &bf_off, &bf_len); 777 copyin(addr, arg, size, bf, bf_off, bf_len); 778 break; 779 } 780 case arg_addr32: 781 case arg_addr64: { 782 uint64 val = read_input(&input_pos) + SYZ_DATA_OFFSET; 783 if (typ == arg_addr32) 784 NONFAILING(*(uint32*)addr = val); 785 else 786 NONFAILING(*(uint64*)addr = val); 787 break; 788 } 789 case arg_result: { 790 uint64 meta = read_input(&input_pos); 791 uint64 size = meta & 0xff; 792 uint64 bf = meta >> 8; 793 uint64 val = read_result(&input_pos); 794 copyin(addr, val, size, bf, 0, 0); 795 break; 796 } 797 case arg_data: { 798 uint64 size = read_input(&input_pos); 799 size &= ~(1ull << 63); // readable flag 800 if (input_pos + size > input_data + kMaxInput) 801 fail("data arg overflow"); 802 NONFAILING(memcpy(addr, input_pos, size)); 803 input_pos += size; 804 break; 805 } 806 case arg_csum: { 807 debug_verbose("checksum found at %p\n", addr); 808 uint64 size = read_input(&input_pos); 809 char* csum_addr = addr; 810 uint64 csum_kind = read_input(&input_pos); 811 switch (csum_kind) { 812 case arg_csum_inet: { 813 if (size != 2) 814 failmsg("bag inet checksum size", "size=%llu", size); 815 debug_verbose("calculating checksum for %p\n", csum_addr); 816 struct csum_inet csum; 817 csum_inet_init(&csum); 818 uint64 chunks_num = read_input(&input_pos); 819 uint64 chunk; 820 for (chunk = 0; chunk < chunks_num; chunk++) { 821 uint64 chunk_kind = read_input(&input_pos); 822 uint64 chunk_value = read_input(&input_pos); 823 uint64 chunk_size = read_input(&input_pos); 824 switch (chunk_kind) { 825 case arg_csum_chunk_data: 826 chunk_value += SYZ_DATA_OFFSET; 827 debug_verbose("#%lld: data chunk, addr: %llx, size: %llu\n", 828 chunk, chunk_value, chunk_size); 829 NONFAILING(csum_inet_update(&csum, (const uint8*)chunk_value, chunk_size)); 830 break; 831 case arg_csum_chunk_const: 832 if (chunk_size != 2 && chunk_size != 4 && chunk_size != 8) 833 failmsg("bad checksum const chunk size", "size=%lld", chunk_size); 834 // Here we assume that const values come to us big endian. 835 debug_verbose("#%lld: const chunk, value: %llx, size: %llu\n", 836 chunk, chunk_value, chunk_size); 837 csum_inet_update(&csum, (const uint8*)&chunk_value, chunk_size); 838 break; 839 default: 840 failmsg("bad checksum chunk kind", "kind=%llu", chunk_kind); 841 } 842 } 843 uint16 csum_value = csum_inet_digest(&csum); 844 debug_verbose("writing inet checksum %hx to %p\n", csum_value, csum_addr); 845 copyin(csum_addr, csum_value, 2, binary_format_native, 0, 0); 846 break; 847 } 848 default: 849 failmsg("bad checksum kind", "kind=%llu", csum_kind); 850 } 851 break; 852 } 853 default: 854 failmsg("bad argument type", "type=%llu", typ); 855 } 856 continue; 857 } 858 if (call_num == instr_copyout) { 859 read_input(&input_pos); // index 860 read_input(&input_pos); // addr 861 read_input(&input_pos); // size 862 // The copyout will happen when/if the call completes. 863 continue; 864 } 865 if (call_num == instr_setprops) { 866 read_call_props_t(call_props, read_input(&input_pos, false)); 867 continue; 868 } 869 870 // Normal syscall. 871 if (call_num >= ARRAY_SIZE(syscalls)) 872 failmsg("invalid syscall number", "call_num=%llu", call_num); 873 const call_t* call = &syscalls[call_num]; 874 if (prog_extra_timeout < call->attrs.prog_timeout) 875 prog_extra_timeout = call->attrs.prog_timeout * slowdown_scale; 876 if (strncmp(syscalls[call_num].name, "syz_usb", strlen("syz_usb")) == 0) 877 prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, 500 * slowdown_scale); 878 if (strncmp(syscalls[call_num].name, "syz_80211_inject_frame", strlen("syz_80211_inject_frame")) == 0) 879 prog_extra_cover_timeout = std::max(prog_extra_cover_timeout, 300 * slowdown_scale); 880 uint64 copyout_index = read_input(&input_pos); 881 uint64 num_args = read_input(&input_pos); 882 if (num_args > kMaxArgs) 883 failmsg("command has bad number of arguments", "args=%llu", num_args); 884 uint64 args[kMaxArgs] = {}; 885 for (uint64 i = 0; i < num_args; i++) 886 args[i] = read_arg(&input_pos); 887 for (uint64 i = num_args; i < kMaxArgs; i++) 888 args[i] = 0; 889 thread_t* th = schedule_call(call_index++, call_num, copyout_index, 890 num_args, args, input_pos, call_props); 891 892 if (call_props.async && flag_threaded) { 893 // Don't wait for an async call to finish. We'll wait at the end. 894 // If we're not in the threaded mode, just ignore the async flag - during repro simplification syzkaller 895 // will anyway try to make it non-threaded. 896 } else if (flag_threaded) { 897 // Wait for call completion. 898 uint64 timeout_ms = syscall_timeout_ms + call->attrs.timeout * slowdown_scale; 899 // This is because of printing pre/post call. Ideally we print everything in the main thread 900 // and then remove this (would also avoid intermixed output). 901 if (flag_debug && timeout_ms < 1000) 902 timeout_ms = 1000; 903 if (event_timedwait(&th->done, timeout_ms)) 904 handle_completion(th); 905 906 // Check if any of previous calls have completed. 907 for (int i = 0; i < kMaxThreads; i++) { 908 th = &threads[i]; 909 if (th->executing && event_isset(&th->done)) 910 handle_completion(th); 911 } 912 } else { 913 // Execute directly. 914 if (th != &threads[0]) 915 fail("using non-main thread in non-thread mode"); 916 event_reset(&th->ready); 917 execute_call(th); 918 event_set(&th->done); 919 handle_completion(th); 920 } 921 memset(&call_props, 0, sizeof(call_props)); 922 } 923 924 if (running > 0) { 925 // Give unfinished syscalls some additional time. 926 last_scheduled = 0; 927 uint64 wait_start = current_time_ms(); 928 uint64 wait_end = wait_start + 2 * syscall_timeout_ms; 929 wait_end = std::max(wait_end, start + program_timeout_ms / 6); 930 wait_end = std::max(wait_end, wait_start + prog_extra_timeout); 931 while (running > 0 && current_time_ms() <= wait_end) { 932 sleep_ms(1 * slowdown_scale); 933 for (int i = 0; i < kMaxThreads; i++) { 934 thread_t* th = &threads[i]; 935 if (th->executing && event_isset(&th->done)) 936 handle_completion(th); 937 } 938 } 939 // Write output coverage for unfinished calls. 940 if (running > 0) { 941 for (int i = 0; i < kMaxThreads; i++) { 942 thread_t* th = &threads[i]; 943 if (th->executing) { 944 if (cover_collection_required()) 945 cover_collect(&th->cov); 946 write_call_output(th, false); 947 } 948 } 949 } 950 } 951 952 #if SYZ_HAVE_CLOSE_FDS 953 close_fds(); 954 #endif 955 956 write_extra_output(); 957 // Check for new extra coverage in small intervals to avoid situation 958 // that we were killed on timeout before we write any. 959 // Check for extra coverage is very cheap, effectively a memory load. 960 const uint64 kSleepMs = 100; 961 for (uint64 i = 0; i < prog_extra_cover_timeout / kSleepMs; i++) { 962 sleep_ms(kSleepMs); 963 write_extra_output(); 964 } 965 } 966 967 thread_t* schedule_call(int call_index, int call_num, uint64 copyout_index, uint64 num_args, uint64* args, uint8* pos, call_props_t call_props) 968 { 969 // Find a spare thread to execute the call. 970 int i = 0; 971 for (; i < kMaxThreads; i++) { 972 thread_t* th = &threads[i]; 973 if (!th->created) 974 thread_create(th, i, cover_collection_required()); 975 if (event_isset(&th->done)) { 976 if (th->executing) 977 handle_completion(th); 978 break; 979 } 980 } 981 if (i == kMaxThreads) 982 exitf("out of threads"); 983 thread_t* th = &threads[i]; 984 if (event_isset(&th->ready) || !event_isset(&th->done) || th->executing) 985 exitf("bad thread state in schedule: ready=%d done=%d executing=%d", 986 event_isset(&th->ready), event_isset(&th->done), th->executing); 987 last_scheduled = th; 988 th->copyout_pos = pos; 989 th->copyout_index = copyout_index; 990 event_reset(&th->done); 991 th->executing = true; 992 th->call_index = call_index; 993 th->call_num = call_num; 994 th->num_args = num_args; 995 th->call_props = call_props; 996 for (int i = 0; i < kMaxArgs; i++) 997 th->args[i] = args[i]; 998 event_set(&th->ready); 999 running++; 1000 return th; 1001 } 1002 1003 #if SYZ_EXECUTOR_USES_SHMEM 1004 template <typename cover_data_t> 1005 void write_coverage_signal(cover_t* cov, uint32* signal_count_pos, uint32* cover_count_pos) 1006 { 1007 // Write out feedback signals. 1008 // Currently it is code edges computed as xor of two subsequent basic block PCs. 1009 cover_data_t* cover_data = (cover_data_t*)(cov->data + cov->data_offset); 1010 if (flag_collect_signal) { 1011 uint32 nsig = 0; 1012 cover_data_t prev_pc = 0; 1013 bool prev_filter = true; 1014 for (uint32 i = 0; i < cov->size; i++) { 1015 cover_data_t pc = cover_data[i] + cov->pc_offset; 1016 uint32 sig = pc & 0xFFFFF000; 1017 if (use_cover_edges(pc)) { 1018 // Only hash the lower 12 bits so the hash is 1019 // independent of any module offsets. 1020 sig |= (pc & 0xFFF) ^ (hash(prev_pc & 0xFFF) & 0xFFF); 1021 } 1022 bool filter = coverage_filter(pc); 1023 // Ignore the edge only if both current and previous PCs are filtered out 1024 // to capture all incoming and outcoming edges into the interesting code. 1025 bool ignore = !filter && !prev_filter; 1026 prev_pc = pc; 1027 prev_filter = filter; 1028 if (ignore || dedup(sig)) 1029 continue; 1030 write_output(sig); 1031 nsig++; 1032 } 1033 // Write out number of signals. 1034 *signal_count_pos = nsig; 1035 } 1036 1037 if (flag_collect_cover) { 1038 // Write out real coverage (basic block PCs). 1039 uint32 cover_size = cov->size; 1040 if (flag_dedup_cover) { 1041 cover_data_t* end = cover_data + cover_size; 1042 cover_unprotect(cov); 1043 std::sort(cover_data, end); 1044 cover_size = std::unique(cover_data, end) - cover_data; 1045 cover_protect(cov); 1046 } 1047 // Truncate PCs to uint32 assuming that they fit into 32-bits. 1048 // True for x86_64 and arm64 without KASLR. 1049 for (uint32 i = 0; i < cover_size; i++) 1050 write_output(cover_data[i] + cov->pc_offset); 1051 *cover_count_pos = cover_size; 1052 } 1053 } 1054 #endif // if SYZ_EXECUTOR_USES_SHMEM 1055 1056 void handle_completion(thread_t* th) 1057 { 1058 if (event_isset(&th->ready) || !event_isset(&th->done) || !th->executing) 1059 exitf("bad thread state in completion: ready=%d done=%d executing=%d", 1060 event_isset(&th->ready), event_isset(&th->done), th->executing); 1061 if (th->res != (intptr_t)-1) 1062 copyout_call_results(th); 1063 1064 write_call_output(th, true); 1065 write_extra_output(); 1066 th->executing = false; 1067 running--; 1068 if (running < 0) { 1069 // This fires periodically for the past 2 years (see issue #502). 1070 fprintf(stderr, "running=%d completed=%d flag_threaded=%d current=%d\n", 1071 running, completed, flag_threaded, th->id); 1072 for (int i = 0; i < kMaxThreads; i++) { 1073 thread_t* th1 = &threads[i]; 1074 fprintf(stderr, "th #%2d: created=%d executing=%d" 1075 " ready=%d done=%d call_index=%d res=%lld reserrno=%d\n", 1076 i, th1->created, th1->executing, 1077 event_isset(&th1->ready), event_isset(&th1->done), 1078 th1->call_index, (uint64)th1->res, th1->reserrno); 1079 } 1080 exitf("negative running"); 1081 } 1082 } 1083 1084 void copyout_call_results(thread_t* th) 1085 { 1086 if (th->copyout_index != no_copyout) { 1087 if (th->copyout_index >= kMaxCommands) 1088 failmsg("result overflows kMaxCommands", "index=%lld", th->copyout_index); 1089 results[th->copyout_index].executed = true; 1090 results[th->copyout_index].val = th->res; 1091 } 1092 for (bool done = false; !done;) { 1093 uint64 instr = read_input(&th->copyout_pos); 1094 switch (instr) { 1095 case instr_copyout: { 1096 uint64 index = read_input(&th->copyout_pos); 1097 if (index >= kMaxCommands) 1098 failmsg("result overflows kMaxCommands", "index=%lld", index); 1099 char* addr = (char*)(read_input(&th->copyout_pos) + SYZ_DATA_OFFSET); 1100 uint64 size = read_input(&th->copyout_pos); 1101 uint64 val = 0; 1102 if (copyout(addr, size, &val)) { 1103 results[index].executed = true; 1104 results[index].val = val; 1105 } 1106 debug_verbose("copyout 0x%llx from %p\n", val, addr); 1107 break; 1108 } 1109 default: 1110 done = true; 1111 break; 1112 } 1113 } 1114 } 1115 1116 void write_call_output(thread_t* th, bool finished) 1117 { 1118 uint32 reserrno = ENOSYS; 1119 const bool blocked = finished && th != last_scheduled; 1120 uint32 call_flags = call_flag_executed | (blocked ? call_flag_blocked : 0); 1121 if (finished) { 1122 reserrno = th->res != -1 ? 0 : th->reserrno; 1123 call_flags |= call_flag_finished | 1124 (th->fault_injected ? call_flag_fault_injected : 0); 1125 } 1126 #if SYZ_EXECUTOR_USES_SHMEM 1127 write_output(kOutMagic); 1128 write_output(th->call_index); 1129 write_output(th->call_num); 1130 write_output(reserrno); 1131 write_output(call_flags); 1132 uint32* signal_count_pos = write_output(0); // filled in later 1133 uint32* cover_count_pos = write_output(0); // filled in later 1134 uint32* comps_count_pos = write_output(0); // filled in later 1135 1136 if (flag_comparisons) { 1137 // Collect only the comparisons 1138 uint32 ncomps = th->cov.size; 1139 kcov_comparison_t* start = (kcov_comparison_t*)(th->cov.data + sizeof(uint64)); 1140 kcov_comparison_t* end = start + ncomps; 1141 if ((char*)end > th->cov.data_end) 1142 failmsg("too many comparisons", "ncomps=%u", ncomps); 1143 cover_unprotect(&th->cov); 1144 std::sort(start, end); 1145 ncomps = std::unique(start, end) - start; 1146 cover_protect(&th->cov); 1147 uint32 comps_size = 0; 1148 for (uint32 i = 0; i < ncomps; ++i) { 1149 if (start[i].ignore()) 1150 continue; 1151 comps_size++; 1152 start[i].write(); 1153 } 1154 // Write out number of comparisons. 1155 *comps_count_pos = comps_size; 1156 } else if (flag_collect_signal || flag_collect_cover) { 1157 if (is_kernel_64_bit) 1158 write_coverage_signal<uint64>(&th->cov, signal_count_pos, cover_count_pos); 1159 else 1160 write_coverage_signal<uint32>(&th->cov, signal_count_pos, cover_count_pos); 1161 } 1162 debug_verbose("out #%u: index=%u num=%u errno=%d finished=%d blocked=%d sig=%u cover=%u comps=%u\n", 1163 completed, th->call_index, th->call_num, reserrno, finished, blocked, 1164 *signal_count_pos, *cover_count_pos, *comps_count_pos); 1165 completed++; 1166 write_completed(completed); 1167 #else 1168 call_reply reply; 1169 reply.header.magic = kOutMagic; 1170 reply.header.done = 0; 1171 reply.header.status = 0; 1172 reply.magic = kOutMagic; 1173 reply.call_index = th->call_index; 1174 reply.call_num = th->call_num; 1175 reply.reserrno = reserrno; 1176 reply.flags = call_flags; 1177 reply.signal_size = 0; 1178 reply.cover_size = 0; 1179 reply.comps_size = 0; 1180 if (write(kOutPipeFd, &reply, sizeof(reply)) != sizeof(reply)) 1181 fail("control pipe call write failed"); 1182 debug_verbose("out: index=%u num=%u errno=%d finished=%d blocked=%d\n", 1183 th->call_index, th->call_num, reserrno, finished, blocked); 1184 #endif // if SYZ_EXECUTOR_USES_SHMEM 1185 } 1186 1187 void write_extra_output() 1188 { 1189 #if SYZ_EXECUTOR_USES_SHMEM 1190 if (!cover_collection_required() || !flag_extra_coverage || flag_comparisons) 1191 return; 1192 cover_collect(&extra_cov); 1193 if (!extra_cov.size) 1194 return; 1195 write_output(kOutMagic); 1196 write_output(-1); // call index 1197 write_output(-1); // call num 1198 write_output(999); // errno 1199 write_output(0); // call flags 1200 uint32* signal_count_pos = write_output(0); // filled in later 1201 uint32* cover_count_pos = write_output(0); // filled in later 1202 write_output(0); // comps_count_pos 1203 if (is_kernel_64_bit) 1204 write_coverage_signal<uint64>(&extra_cov, signal_count_pos, cover_count_pos); 1205 else 1206 write_coverage_signal<uint32>(&extra_cov, signal_count_pos, cover_count_pos); 1207 cover_reset(&extra_cov); 1208 debug_verbose("extra: sig=%u cover=%u\n", *signal_count_pos, *cover_count_pos); 1209 completed++; 1210 write_completed(completed); 1211 #endif // if SYZ_EXECUTOR_USES_SHMEM 1212 } 1213 1214 void thread_create(thread_t* th, int id, bool need_coverage) 1215 { 1216 th->created = true; 1217 th->id = id; 1218 th->executing = false; 1219 // Lazily set up coverage collection. 1220 // It is assumed that actually it's already initialized - with a few rare exceptions. 1221 if (need_coverage) { 1222 if (!th->cov.fd) 1223 exitf("out of opened kcov threads"); 1224 thread_mmap_cover(th); 1225 } 1226 event_init(&th->ready); 1227 event_init(&th->done); 1228 event_set(&th->done); 1229 if (flag_threaded) 1230 thread_start(worker_thread, th); 1231 } 1232 1233 void thread_mmap_cover(thread_t* th) 1234 { 1235 if (th->cov.data != NULL) 1236 return; 1237 cover_mmap(&th->cov); 1238 cover_protect(&th->cov); 1239 } 1240 1241 void* worker_thread(void* arg) 1242 { 1243 thread_t* th = (thread_t*)arg; 1244 current_thread = th; 1245 if (cover_collection_required()) 1246 cover_enable(&th->cov, flag_comparisons, false); 1247 for (;;) { 1248 event_wait(&th->ready); 1249 event_reset(&th->ready); 1250 execute_call(th); 1251 event_set(&th->done); 1252 } 1253 return 0; 1254 } 1255 1256 void execute_call(thread_t* th) 1257 { 1258 const call_t* call = &syscalls[th->call_num]; 1259 debug("#%d [%llums] -> %s(", 1260 th->id, current_time_ms() - start_time_ms, call->name); 1261 for (int i = 0; i < th->num_args; i++) { 1262 if (i != 0) 1263 debug(", "); 1264 debug("0x%llx", (uint64)th->args[i]); 1265 } 1266 debug(")\n"); 1267 1268 int fail_fd = -1; 1269 th->soft_fail_state = false; 1270 if (th->call_props.fail_nth > 0) { 1271 if (th->call_props.rerun > 0) 1272 fail("both fault injection and rerun are enabled for the same call"); 1273 fail_fd = inject_fault(th->call_props.fail_nth); 1274 th->soft_fail_state = true; 1275 } 1276 1277 if (flag_coverage) 1278 cover_reset(&th->cov); 1279 // For pseudo-syscalls and user-space functions NONFAILING can abort before assigning to th->res. 1280 // Arrange for res = -1 and errno = EFAULT result for such case. 1281 th->res = -1; 1282 errno = EFAULT; 1283 NONFAILING(th->res = execute_syscall(call, th->args)); 1284 th->reserrno = errno; 1285 // Our pseudo-syscalls may misbehave. 1286 if ((th->res == -1 && th->reserrno == 0) || call->attrs.ignore_return) 1287 th->reserrno = EINVAL; 1288 // Reset the flag before the first possible fail(). 1289 th->soft_fail_state = false; 1290 1291 if (flag_coverage) { 1292 cover_collect(&th->cov); 1293 if (th->cov.size >= kCoverSize) 1294 failmsg("too much cover", "thr=%d, cov=%u", th->id, th->cov.size); 1295 } 1296 th->fault_injected = false; 1297 1298 if (th->call_props.fail_nth > 0) 1299 th->fault_injected = fault_injected(fail_fd); 1300 1301 // If required, run the syscall some more times. 1302 // But let's still return res, errno and coverage from the first execution. 1303 for (int i = 0; i < th->call_props.rerun; i++) 1304 NONFAILING(execute_syscall(call, th->args)); 1305 1306 debug("#%d [%llums] <- %s=0x%llx", 1307 th->id, current_time_ms() - start_time_ms, call->name, (uint64)th->res); 1308 if (th->res == (intptr_t)-1) 1309 debug(" errno=%d", th->reserrno); 1310 if (flag_coverage) 1311 debug(" cover=%u", th->cov.size); 1312 if (th->call_props.fail_nth > 0) 1313 debug(" fault=%d", th->fault_injected); 1314 if (th->call_props.rerun > 0) 1315 debug(" rerun=%d", th->call_props.rerun); 1316 debug("\n"); 1317 } 1318 1319 #if SYZ_EXECUTOR_USES_SHMEM 1320 static uint32 hash(uint32 a) 1321 { 1322 a = (a ^ 61) ^ (a >> 16); 1323 a = a + (a << 3); 1324 a = a ^ (a >> 4); 1325 a = a * 0x27d4eb2d; 1326 a = a ^ (a >> 15); 1327 return a; 1328 } 1329 1330 const uint32 dedup_table_size = 8 << 10; 1331 uint32 dedup_table[dedup_table_size]; 1332 1333 // Poorman's best-effort hashmap-based deduplication. 1334 // The hashmap is global which means that we deduplicate across different calls. 1335 // This is OK because we are interested only in new signals. 1336 static bool dedup(uint32 sig) 1337 { 1338 for (uint32 i = 0; i < 4; i++) { 1339 uint32 pos = (sig + i) % dedup_table_size; 1340 if (dedup_table[pos] == sig) 1341 return true; 1342 if (dedup_table[pos] == 0) { 1343 dedup_table[pos] = sig; 1344 return false; 1345 } 1346 } 1347 dedup_table[sig % dedup_table_size] = sig; 1348 return false; 1349 } 1350 #endif // if SYZ_EXECUTOR_USES_SHMEM 1351 1352 template <typename T> 1353 void copyin_int(char* addr, uint64 val, uint64 bf, uint64 bf_off, uint64 bf_len) 1354 { 1355 if (bf_off == 0 && bf_len == 0) { 1356 *(T*)addr = swap(val, sizeof(T), bf); 1357 return; 1358 } 1359 T x = swap(*(T*)addr, sizeof(T), bf); 1360 debug_verbose("copyin_int<%zu>: old x=0x%llx\n", sizeof(T), (uint64)x); 1361 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 1362 const uint64 shift = sizeof(T) * CHAR_BIT - bf_off - bf_len; 1363 #else 1364 const uint64 shift = bf_off; 1365 #endif 1366 x = (x & ~BITMASK(shift, bf_len)) | ((val << shift) & BITMASK(shift, bf_len)); 1367 debug_verbose("copyin_int<%zu>: new x=0x%llx\n", sizeof(T), (uint64)x); 1368 *(T*)addr = swap(x, sizeof(T), bf); 1369 } 1370 1371 void copyin(char* addr, uint64 val, uint64 size, uint64 bf, uint64 bf_off, uint64 bf_len) 1372 { 1373 debug_verbose("copyin: addr=%p val=0x%llx size=%llu bf=%llu bf_off=%llu bf_len=%llu\n", 1374 addr, val, size, bf, bf_off, bf_len); 1375 if (bf != binary_format_native && bf != binary_format_bigendian && (bf_off != 0 || bf_len != 0)) 1376 failmsg("bitmask for string format", "off=%llu, len=%llu", bf_off, bf_len); 1377 switch (bf) { 1378 case binary_format_native: 1379 case binary_format_bigendian: 1380 NONFAILING(switch (size) { 1381 case 1: 1382 copyin_int<uint8>(addr, val, bf, bf_off, bf_len); 1383 break; 1384 case 2: 1385 copyin_int<uint16>(addr, val, bf, bf_off, bf_len); 1386 break; 1387 case 4: 1388 copyin_int<uint32>(addr, val, bf, bf_off, bf_len); 1389 break; 1390 case 8: 1391 copyin_int<uint64>(addr, val, bf, bf_off, bf_len); 1392 break; 1393 default: 1394 failmsg("copyin: bad argument size", "size=%llu", size); 1395 }); 1396 break; 1397 case binary_format_strdec: 1398 if (size != 20) 1399 failmsg("bad strdec size", "size=%llu", size); 1400 NONFAILING(sprintf((char*)addr, "%020llu", val)); 1401 break; 1402 case binary_format_strhex: 1403 if (size != 18) 1404 failmsg("bad strhex size", "size=%llu", size); 1405 NONFAILING(sprintf((char*)addr, "0x%016llx", val)); 1406 break; 1407 case binary_format_stroct: 1408 if (size != 23) 1409 failmsg("bad stroct size", "size=%llu", size); 1410 NONFAILING(sprintf((char*)addr, "%023llo", val)); 1411 break; 1412 default: 1413 failmsg("unknown binary format", "format=%llu", bf); 1414 } 1415 } 1416 1417 bool copyout(char* addr, uint64 size, uint64* res) 1418 { 1419 return NONFAILING( 1420 switch (size) { 1421 case 1: 1422 *res = *(uint8*)addr; 1423 break; 1424 case 2: 1425 *res = *(uint16*)addr; 1426 break; 1427 case 4: 1428 *res = *(uint32*)addr; 1429 break; 1430 case 8: 1431 *res = *(uint64*)addr; 1432 break; 1433 default: 1434 failmsg("copyout: bad argument size", "size=%llu", size); 1435 }); 1436 } 1437 1438 uint64 read_arg(uint8** input_posp) 1439 { 1440 uint64 typ = read_input(input_posp); 1441 switch (typ) { 1442 case arg_const: { 1443 uint64 size, bf, bf_off, bf_len; 1444 uint64 val = read_const_arg(input_posp, &size, &bf, &bf_off, &bf_len); 1445 if (bf != binary_format_native && bf != binary_format_bigendian) 1446 failmsg("bad argument binary format", "format=%llu", bf); 1447 if (bf_off != 0 || bf_len != 0) 1448 failmsg("bad argument bitfield", "off=%llu, len=%llu", bf_off, bf_len); 1449 return swap(val, size, bf); 1450 } 1451 case arg_addr32: 1452 case arg_addr64: { 1453 return read_input(input_posp) + SYZ_DATA_OFFSET; 1454 } 1455 case arg_result: { 1456 uint64 meta = read_input(input_posp); 1457 uint64 bf = meta >> 8; 1458 if (bf != binary_format_native) 1459 failmsg("bad result argument format", "format=%llu", bf); 1460 return read_result(input_posp); 1461 } 1462 default: 1463 failmsg("bad argument type", "type=%llu", typ); 1464 } 1465 } 1466 1467 uint64 swap(uint64 v, uint64 size, uint64 bf) 1468 { 1469 if (bf == binary_format_native) 1470 return v; 1471 if (bf != binary_format_bigendian) 1472 failmsg("bad binary format in swap", "format=%llu", bf); 1473 switch (size) { 1474 case 2: 1475 return htobe16(v); 1476 case 4: 1477 return htobe32(v); 1478 case 8: 1479 return htobe64(v); 1480 default: 1481 failmsg("bad big-endian int size", "size=%llu", size); 1482 } 1483 } 1484 1485 uint64 read_const_arg(uint8** input_posp, uint64* size_p, uint64* bf_p, uint64* bf_off_p, uint64* bf_len_p) 1486 { 1487 uint64 meta = read_input(input_posp); 1488 uint64 val = read_input(input_posp); 1489 *size_p = meta & 0xff; 1490 uint64 bf = (meta >> 8) & 0xff; 1491 *bf_off_p = (meta >> 16) & 0xff; 1492 *bf_len_p = (meta >> 24) & 0xff; 1493 uint64 pid_stride = meta >> 32; 1494 val += pid_stride * procid; 1495 *bf_p = bf; 1496 return val; 1497 } 1498 1499 uint64 read_result(uint8** input_posp) 1500 { 1501 uint64 idx = read_input(input_posp); 1502 uint64 op_div = read_input(input_posp); 1503 uint64 op_add = read_input(input_posp); 1504 uint64 arg = read_input(input_posp); 1505 if (idx >= kMaxCommands) 1506 failmsg("command refers to bad result", "result=%lld", idx); 1507 if (results[idx].executed) { 1508 arg = results[idx].val; 1509 if (op_div != 0) 1510 arg = arg / op_div; 1511 arg += op_add; 1512 } 1513 return arg; 1514 } 1515 1516 uint64 read_input(uint8** input_posp, bool peek) 1517 { 1518 uint64 v = 0; 1519 unsigned shift = 0; 1520 uint8* input_pos = *input_posp; 1521 for (int i = 0;; i++, shift += 7) { 1522 const int maxLen = 10; 1523 if (i == maxLen) 1524 failmsg("varint overflow", "pos=%zu", *input_posp - input_data); 1525 if (input_pos >= input_data + kMaxInput) 1526 failmsg("input command overflows input", "pos=%p: [%p:%p)", 1527 input_pos, input_data, input_data + kMaxInput); 1528 uint8 b = *input_pos++; 1529 v |= uint64(b & 0x7f) << shift; 1530 if (b < 0x80) { 1531 if (i == maxLen - 1 && b > 1) 1532 failmsg("varint overflow", "pos=%zu", *input_posp - input_data); 1533 break; 1534 } 1535 } 1536 if (v & 1) 1537 v = ~(v >> 1); 1538 else 1539 v = v >> 1; 1540 if (!peek) 1541 *input_posp = input_pos; 1542 return v; 1543 } 1544 1545 #if SYZ_EXECUTOR_USES_SHMEM 1546 uint32* write_output(uint32 v) 1547 { 1548 if (output_pos < output_data || (char*)output_pos >= (char*)output_data + output_size) 1549 failmsg("output overflow", "pos=%p region=[%p:%p]", 1550 output_pos, output_data, (char*)output_data + output_size); 1551 *output_pos = v; 1552 return output_pos++; 1553 } 1554 1555 uint32* write_output_64(uint64 v) 1556 { 1557 if (output_pos < output_data || (char*)(output_pos + 1) >= (char*)output_data + output_size) 1558 failmsg("output overflow", "pos=%p region=[%p:%p]", 1559 output_pos, output_data, (char*)output_data + output_size); 1560 *(uint64*)output_pos = v; 1561 output_pos += 2; 1562 return output_pos; 1563 } 1564 1565 void write_completed(uint32 completed) 1566 { 1567 __atomic_store_n(output_data, completed, __ATOMIC_RELEASE); 1568 } 1569 #endif // if SYZ_EXECUTOR_USES_SHMEM 1570 1571 #if SYZ_EXECUTOR_USES_SHMEM 1572 void kcov_comparison_t::write() 1573 { 1574 if (type > (KCOV_CMP_CONST | KCOV_CMP_SIZE_MASK)) 1575 failmsg("invalid kcov comp type", "type=%llx", type); 1576 1577 // Write order: type arg1 arg2 pc. 1578 write_output((uint32)type); 1579 1580 // KCOV converts all arguments of size x first to uintx_t and then to 1581 // uint64. We want to properly extend signed values, e.g we want 1582 // int8 c = 0xfe to be represented as 0xfffffffffffffffe. 1583 // Note that uint8 c = 0xfe will be represented the same way. 1584 // This is ok because during hints processing we will anyways try 1585 // the value 0x00000000000000fe. 1586 switch (type & KCOV_CMP_SIZE_MASK) { 1587 case KCOV_CMP_SIZE1: 1588 arg1 = (uint64)(long long)(signed char)arg1; 1589 arg2 = (uint64)(long long)(signed char)arg2; 1590 break; 1591 case KCOV_CMP_SIZE2: 1592 arg1 = (uint64)(long long)(short)arg1; 1593 arg2 = (uint64)(long long)(short)arg2; 1594 break; 1595 case KCOV_CMP_SIZE4: 1596 arg1 = (uint64)(long long)(int)arg1; 1597 arg2 = (uint64)(long long)(int)arg2; 1598 break; 1599 } 1600 bool is_size_8 = (type & KCOV_CMP_SIZE_MASK) == KCOV_CMP_SIZE8; 1601 if (!is_size_8) { 1602 write_output((uint32)arg1); 1603 write_output((uint32)arg2); 1604 } else { 1605 write_output_64(arg1); 1606 write_output_64(arg2); 1607 } 1608 } 1609 1610 bool kcov_comparison_t::ignore() const 1611 { 1612 // Comparisons with 0 are not interesting, fuzzer should be able to guess 0's without help. 1613 if (arg1 == 0 && (arg2 == 0 || (type & KCOV_CMP_CONST))) 1614 return true; 1615 if ((type & KCOV_CMP_SIZE_MASK) == KCOV_CMP_SIZE8) { 1616 // This can be a pointer (assuming 64-bit kernel). 1617 // First of all, we want avert fuzzer from our output region. 1618 // Without this fuzzer manages to discover and corrupt it. 1619 uint64 out_start = (uint64)output_data; 1620 uint64 out_end = out_start + output_size; 1621 if (arg1 >= out_start && arg1 <= out_end) 1622 return true; 1623 if (arg2 >= out_start && arg2 <= out_end) 1624 return true; 1625 #if defined(GOOS_linux) 1626 // Filter out kernel physical memory addresses. 1627 // These are internal kernel comparisons and should not be interesting. 1628 // The range covers first 1TB of physical mapping. 1629 uint64 kmem_start = (uint64)0xffff880000000000ull; 1630 uint64 kmem_end = (uint64)0xffff890000000000ull; 1631 bool kptr1 = arg1 >= kmem_start && arg1 <= kmem_end; 1632 bool kptr2 = arg2 >= kmem_start && arg2 <= kmem_end; 1633 if (kptr1 && kptr2) 1634 return true; 1635 if (kptr1 && arg2 == 0) 1636 return true; 1637 if (kptr2 && arg1 == 0) 1638 return true; 1639 #endif 1640 } 1641 return !coverage_filter(pc); 1642 } 1643 1644 bool kcov_comparison_t::operator==(const struct kcov_comparison_t& other) const 1645 { 1646 // We don't check for PC equality now, because it is not used. 1647 return type == other.type && arg1 == other.arg1 && arg2 == other.arg2; 1648 } 1649 1650 bool kcov_comparison_t::operator<(const struct kcov_comparison_t& other) const 1651 { 1652 if (type != other.type) 1653 return type < other.type; 1654 if (arg1 != other.arg1) 1655 return arg1 < other.arg1; 1656 // We don't check for PC equality now, because it is not used. 1657 return arg2 < other.arg2; 1658 } 1659 #endif // if SYZ_EXECUTOR_USES_SHMEM 1660 1661 #if !SYZ_HAVE_FEATURES 1662 static feature_t features[] = {}; 1663 #endif 1664 1665 void setup_features(char** enable, int n) 1666 { 1667 // This does any one-time setup for the requested features on the machine. 1668 // Note: this can be called multiple times and must be idempotent. 1669 flag_debug = true; 1670 if (n != 1) 1671 fail("setup: more than one feature"); 1672 char* endptr = nullptr; 1673 auto feature = static_cast<rpc::Feature>(strtoull(enable[0], &endptr, 10)); 1674 if (endptr == enable[0] || (feature > rpc::Feature::ANY) || 1675 __builtin_popcountll(static_cast<uint64>(feature)) > 1) 1676 failmsg("setup: failed to parse feature", "feature='%s'", enable[0]); 1677 if (feature == rpc::Feature::NONE) { 1678 #if SYZ_HAVE_FEATURES 1679 setup_sysctl(); 1680 setup_cgroups(); 1681 #endif 1682 #if SYZ_HAVE_SETUP_EXT 1683 // This can be defined in common_ext.h. 1684 setup_ext(); 1685 #endif 1686 return; 1687 } 1688 for (size_t i = 0; i < sizeof(features) / sizeof(features[0]); i++) { 1689 if (features[i].id == feature) { 1690 features[i].setup(); 1691 return; 1692 } 1693 } 1694 // Note: pkg/host knows about this error message. 1695 fail("feature setup is not needed"); 1696 } 1697 1698 void failmsg(const char* err, const char* msg, ...) 1699 { 1700 int e = errno; 1701 fprintf(stderr, "SYZFAIL: %s\n", err); 1702 if (msg) { 1703 va_list args; 1704 va_start(args, msg); 1705 vfprintf(stderr, msg, args); 1706 va_end(args); 1707 } 1708 fprintf(stderr, " (errno %d: %s)\n", e, strerror(e)); 1709 1710 // fail()'s are often used during the validation of kernel reactions to queries 1711 // that were issued by pseudo syscalls implementations. As fault injection may 1712 // cause the kernel not to succeed in handling these queries (e.g. socket writes 1713 // or reads may fail), this could ultimately lead to unwanted "lost connection to 1714 // test machine" crashes. 1715 // In order to avoid this and, on the other hand, to still have the ability to 1716 // signal a disastrous situation, the exit code of this function depends on the 1717 // current context. 1718 // All fail() invocations during system call execution with enabled fault injection 1719 // lead to termination with zero exit code. In all other cases, the exit code is 1720 // kFailStatus. 1721 if (current_thread && current_thread->soft_fail_state) 1722 doexit(0); 1723 doexit(kFailStatus); 1724 } 1725 1726 void fail(const char* err) 1727 { 1728 failmsg(err, 0); 1729 } 1730 1731 void exitf(const char* msg, ...) 1732 { 1733 int e = errno; 1734 va_list args; 1735 va_start(args, msg); 1736 vfprintf(stderr, msg, args); 1737 va_end(args); 1738 fprintf(stderr, " (errno %d)\n", e); 1739 doexit(1); 1740 } 1741 1742 void debug(const char* msg, ...) 1743 { 1744 if (!flag_debug) 1745 return; 1746 int err = errno; 1747 va_list args; 1748 va_start(args, msg); 1749 vfprintf(stderr, msg, args); 1750 va_end(args); 1751 fflush(stderr); 1752 errno = err; 1753 } 1754 1755 void debug_dump_data(const char* data, int length) 1756 { 1757 if (!flag_debug) 1758 return; 1759 int i = 0; 1760 for (; i < length; i++) { 1761 debug("%02x ", data[i] & 0xff); 1762 if (i % 16 == 15) 1763 debug("\n"); 1764 } 1765 if (i % 16 != 0) 1766 debug("\n"); 1767 }