github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/executor/executor_linux.h (about) 1 // Copyright 2015 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 #include <fcntl.h> 5 #include <signal.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <sys/ioctl.h> 9 #include <sys/mman.h> 10 #include <sys/prctl.h> 11 #include <sys/syscall.h> 12 #include <unistd.h> 13 14 static bool pkeys_enabled; 15 16 // The coverage buffer can realistically overflow. In the non-snapshot mode we cannot afford 17 // very large buffer b/c there are usually multiple procs, and each of them consumes 18 // significant amount of memory. In snapshot mode we have only one proc, so we can have 19 // larger coverage buffer. 20 const int kSnapshotCoverSize = 1024 << 10; 21 22 const unsigned long KCOV_TRACE_PC = 0; 23 const unsigned long KCOV_TRACE_CMP = 1; 24 25 template <int N> 26 struct kcov_remote_arg { 27 uint32 trace_mode; 28 uint32 area_size; 29 uint32 num_handles; 30 uint32 pad; 31 uint64 common_handle; 32 uint64 handles[N]; 33 }; 34 35 #define KCOV_INIT_TRACE32 _IOR('c', 1, uint32) 36 #define KCOV_INIT_TRACE64 _IOR('c', 1, uint64) 37 #define KCOV_ENABLE _IO('c', 100) 38 #define KCOV_DISABLE _IO('c', 101) 39 #define KCOV_REMOTE_ENABLE _IOW('c', 102, kcov_remote_arg<0>) 40 #define KCOV_RESET_TRACE _IO('c', 104) 41 42 #define KCOV_SUBSYSTEM_COMMON (0x00ull << 56) 43 #define KCOV_SUBSYSTEM_USB (0x01ull << 56) 44 45 #define KCOV_SUBSYSTEM_MASK (0xffull << 56) 46 #define KCOV_INSTANCE_MASK (0xffffffffull) 47 48 static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst) 49 { 50 if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK) 51 return 0; 52 return subsys | inst; 53 } 54 55 static void os_init(int argc, char** argv, char* data, size_t data_size) 56 { 57 prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); 58 // Surround the main data mapping with PROT_NONE pages to make virtual address layout more consistent 59 // across different configurations (static/non-static build) and C repros. 60 // One observed case before: executor had a mapping above the data mapping (output region), 61 // while C repros did not have that mapping above, as the result in one case VMA had next link, 62 // while in the other it didn't and it caused a bug to not reproduce with the C repro. 63 void* got = mmap(data - SYZ_PAGE_SIZE, SYZ_PAGE_SIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE | MAP_FIXED_EXCLUSIVE, -1, 0); 64 if (data - SYZ_PAGE_SIZE != got) 65 failmsg("mmap of left data PROT_NONE page failed", "want %p, got %p", data - SYZ_PAGE_SIZE, got); 66 // NOLINTBEGIN(clang-analyzer-security.MmapWriteExec) 67 got = mmap(data, data_size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_FIXED_EXCLUSIVE, -1, 0); 68 // NOLINTEND(clang-analyzer-security.MmapWriteExec) 69 if (data != got) 70 failmsg("mmap of data segment failed", "want %p, got %p", data, got); 71 got = mmap(data + data_size, SYZ_PAGE_SIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE | MAP_FIXED_EXCLUSIVE, -1, 0); 72 if (data + data_size != got) 73 failmsg("mmap of right data PROT_NONE page failed", "want %p, got %p", data + data_size, got); 74 75 // A SIGCHLD handler makes sleep in loop exit immediately return with EINTR with a child exits. 76 struct sigaction act = {}; 77 act.sa_handler = [](int) {}; 78 sigaction(SIGCHLD, &act, nullptr); 79 80 // Use the last available pkey so that C reproducers get the the same keys from pkey_alloc. 81 int pkeys[RESERVED_PKEY + 1]; 82 int npkey = 0; 83 for (; npkey <= RESERVED_PKEY; npkey++) { 84 int pk = pkey_alloc(0, 0); 85 if (pk == -1) 86 break; 87 if (pk == RESERVED_PKEY) { 88 pkeys_enabled = true; 89 break; 90 } 91 pkeys[npkey] = pk; 92 } 93 while (npkey--) 94 pkey_free(pkeys[npkey]); 95 } 96 97 static intptr_t execute_syscall(const call_t* c, intptr_t a[kMaxArgs]) 98 { 99 if (c->call) 100 return c->call(a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7], a[8]); 101 return syscall(c->sys_nr, a[0], a[1], a[2], a[3], a[4], a[5]); 102 } 103 104 static void cover_open(cover_t* cov, bool extra) 105 { 106 int fd = open("/sys/kernel/debug/kcov", O_RDWR); 107 if (fd == -1) 108 fail("open of /sys/kernel/debug/kcov failed"); 109 if (dup2(fd, cov->fd) < 0) 110 failmsg("filed to dup cover fd", "from=%d, to=%d", fd, cov->fd); 111 close(fd); 112 const int kcov_init_trace = is_kernel_64_bit ? KCOV_INIT_TRACE64 : KCOV_INIT_TRACE32; 113 const int cover_size = extra ? kExtraCoverSize : flag_snapshot ? kSnapshotCoverSize 114 : kCoverSize; 115 if (ioctl(cov->fd, kcov_init_trace, cover_size)) 116 fail("cover init trace write failed"); 117 cov->data_size = cover_size * (is_kernel_64_bit ? 8 : 4); 118 if (pkeys_enabled) 119 debug("pkey protection enabled\n"); 120 } 121 122 static void cover_close(cover_t* cov) 123 { 124 if (cov->fd == -1) 125 fail("attempting to close an invalid cover fd"); 126 if (cov->enabled) { 127 if (ioctl(cov->fd, KCOV_DISABLE, 0)) 128 fail("KCOV_DISABLE failed"); 129 cov->enabled = false; 130 } 131 close(cov->fd); 132 cov->fd = -1; 133 } 134 135 static void cover_protect(cover_t* cov) 136 { 137 if (pkeys_enabled && pkey_set(RESERVED_PKEY, PKEY_DISABLE_WRITE)) 138 debug("pkey_set failed: %d\n", errno); 139 } 140 141 static void cover_unprotect(cover_t* cov) 142 { 143 if (pkeys_enabled && pkey_set(RESERVED_PKEY, 0)) 144 debug("pkey_set failed: %d\n", errno); 145 } 146 147 static void cover_mmap(cover_t* cov) 148 { 149 if (cov->mmap_alloc_ptr != NULL) 150 fail("cover_mmap invoked on an already mmapped cover_t object"); 151 if (cov->data_size == 0) 152 fail("cover_t structure is corrupted"); 153 // Allocate kcov buffer plus two guard pages surrounding it. 154 cov->mmap_alloc_size = cov->data_size + 2 * SYZ_PAGE_SIZE; 155 cov->mmap_alloc_ptr = (char*)mmap(NULL, cov->mmap_alloc_size, 156 PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); 157 if (cov->mmap_alloc_ptr == MAP_FAILED) 158 exitf("failed to preallocate kcov buffer"); 159 // Now map the kcov buffer to the file, overwriting the existing mapping above. 160 int prot = flag_read_only_coverage ? PROT_READ : (PROT_READ | PROT_WRITE); 161 void* data_buf = (char*)mmap(cov->mmap_alloc_ptr + SYZ_PAGE_SIZE, cov->data_size, 162 prot, MAP_SHARED | MAP_FIXED, cov->fd, 0); 163 if (data_buf == MAP_FAILED) 164 exitf("cover mmap failed"); 165 if (pkeys_enabled && pkey_mprotect(data_buf, cov->data_size, prot, RESERVED_PKEY)) 166 exitf("failed to pkey_mprotect kcov buffer"); 167 cov->data = (char*)data_buf; 168 cov->data_end = cov->data + cov->data_size; 169 cov->data_offset = is_kernel_64_bit ? sizeof(uint64_t) : sizeof(uint32_t); 170 cov->pc_offset = 0; 171 } 172 173 static void cover_munmap(cover_t* cov) 174 { 175 if (cov->mmap_alloc_ptr == NULL) 176 fail("cover_munmap invoked on a non-mmapped cover_t object"); 177 if (munmap(cov->mmap_alloc_ptr, cov->mmap_alloc_size)) 178 fail("cover_munmap failed"); 179 cov->mmap_alloc_ptr = NULL; 180 } 181 182 static void cover_enable(cover_t* cov, bool collect_comps, bool extra) 183 { 184 unsigned int kcov_mode = collect_comps ? KCOV_TRACE_CMP : KCOV_TRACE_PC; 185 // The KCOV_ENABLE call should be fatal, 186 // but in practice ioctl fails with assorted errors (9, 14, 25), 187 // so we use exitf. 188 if (!extra) { 189 if (ioctl(cov->fd, KCOV_ENABLE, kcov_mode)) 190 exitf("cover enable write trace failed, mode=%d", kcov_mode); 191 cov->enabled = true; 192 return; 193 } 194 kcov_remote_arg<1> arg = { 195 .trace_mode = kcov_mode, 196 // Coverage buffer size of background threads. 197 .area_size = kExtraCoverSize, 198 .num_handles = 1, 199 }; 200 arg.common_handle = kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, procid + 1); 201 arg.handles[0] = kcov_remote_handle(KCOV_SUBSYSTEM_USB, procid + 1); 202 if (ioctl(cov->fd, KCOV_REMOTE_ENABLE, &arg)) 203 exitf("remote cover enable write trace failed"); 204 cov->enabled = true; 205 } 206 207 static void cover_reset(cover_t* cov) 208 { 209 // Callers in common_linux.h don't check this flag. 210 if (!flag_coverage) 211 return; 212 if (cov == 0) { 213 if (current_thread == 0) 214 fail("cover_reset: current_thread == 0"); 215 cov = ¤t_thread->cov; 216 } 217 if (flag_read_only_coverage) { 218 if (ioctl(cov->fd, KCOV_RESET_TRACE, 0)) 219 fail("KCOV_RESET_TRACE failed"); 220 } else { 221 cover_unprotect(cov); 222 *(uint64*)cov->data = 0; 223 cover_protect(cov); 224 } 225 cov->overflow = false; 226 } 227 228 template <typename cover_data_t> 229 static void cover_collect_impl(cover_t* cov) 230 { 231 cov->size = *(cover_data_t*)cov->data; 232 cov->overflow = (cov->data + (cov->size + 2) * sizeof(cover_data_t)) > cov->data_end; 233 } 234 235 static void cover_collect(cover_t* cov) 236 { 237 if (is_kernel_64_bit) 238 cover_collect_impl<uint64>(cov); 239 else 240 cover_collect_impl<uint32>(cov); 241 } 242 243 // One does not simply exit. 244 // _exit can in fact fail. 245 // syzkaller did manage to generate a seccomp filter that prohibits exit_group syscall. 246 // Previously, we get into infinite recursion via segv_handler in such case 247 // and corrupted output_data, which does matter in our case since it is shared 248 // with fuzzer process. Loop infinitely instead. Parent will kill us. 249 // But one does not simply loop either. Compilers are sure that _exit never returns, 250 // so they remove all code after _exit as dead. Call _exit via volatile indirection. 251 // And this does not work as well. _exit has own handling of failing exit_group 252 // in the form of HLT instruction, it will divert control flow from our loop. 253 // So call the syscall directly. 254 NORETURN void doexit(int status) 255 { 256 volatile unsigned i; 257 syscall(__NR_exit_group, status); 258 for (i = 0;; i++) { 259 } 260 } 261 262 // If we need to kill just a single thread (e.g. after cloning), exit_group is not 263 // the right choice - it will kill all threads, which might eventually lead to 264 // unnecessary SYZFAIL errors. 265 NORETURN void doexit_thread(int status) 266 { 267 volatile unsigned i; 268 syscall(__NR_exit, status); 269 for (i = 0;; i++) { 270 } 271 } 272 273 #define SYZ_HAVE_KCSAN 1 274 static void setup_kcsan_filter(const std::vector<std::string>& frames) 275 { 276 if (frames.empty()) 277 return; 278 int fd = open("/sys/kernel/debug/kcsan", O_WRONLY); 279 if (fd == -1) 280 fail("failed to open kcsan debugfs file"); 281 for (const auto& frame : frames) 282 dprintf(fd, "!%s\n", frame.c_str()); 283 close(fd); 284 } 285 286 static const char* setup_nicvf() 287 { 288 // This feature has custom checking precedure rather than just rely on running 289 // a simple program with this feature enabled b/c find_vf_interface cannot be made 290 // failing. It searches for the nic in init namespace, but then the nic is moved 291 // to one of testing namespace, so if number of procs is more than the number of devices, 292 // then some of them won't fine a nic (the code is also racy, more than one proc 293 // can find the same device and then moving it will fail for all but one). 294 // So we have to make find_vf_interface non-failing in case of failures, 295 // which means we cannot use it for feature checking. 296 int fd = open("/sys/bus/pci/devices/0000:00:11.0/", O_RDONLY | O_NONBLOCK); 297 if (fd == -1) 298 return "PCI device 0000:00:11.0 is not available"; 299 close(fd); 300 return NULL; 301 } 302 303 static const char* setup_devlink_pci() 304 { 305 // See comment in setup_nicvf. 306 int fd = open("/sys/bus/pci/devices/0000:00:10.0/", O_RDONLY | O_NONBLOCK); 307 if (fd == -1) 308 return "PCI device 0000:00:10.0 is not available"; 309 close(fd); 310 return NULL; 311 } 312 313 static const char* setup_delay_kcov() 314 { 315 int fd = open("/sys/kernel/debug/kcov", O_RDWR); 316 if (fd == -1) 317 return "open of /sys/kernel/debug/kcov failed"; 318 close(fd); 319 cover_t cov = {}; 320 cov.fd = kCoverFd; 321 cover_open(&cov, false); 322 cover_mmap(&cov); 323 char* first = cov.mmap_alloc_ptr; 324 cov.mmap_alloc_ptr = nullptr; 325 cover_mmap(&cov); 326 // If delayed kcov mmap is not supported by the kernel, 327 // accesses to the second mapping will crash. 328 // Use clock_gettime to check if it's mapped w/o crashing the process. 329 const char* error = NULL; 330 timespec ts; 331 if (clock_gettime(CLOCK_MONOTONIC, &ts)) { 332 if (errno != EFAULT) 333 fail("clock_gettime failed"); 334 error = "kernel commit b3d7fe86fbd0 is not present"; 335 } else { 336 munmap(cov.mmap_alloc_ptr, cov.mmap_alloc_size); 337 } 338 munmap(first, cov.mmap_alloc_size); 339 cover_close(&cov); 340 return error; 341 } 342 343 static const char* setup_kcov_reset_ioctl() 344 { 345 int fd = open("/sys/kernel/debug/kcov", O_RDWR); 346 if (fd == -1) 347 return "open of /sys/kernel/debug/kcov failed"; 348 close(fd); 349 350 cover_t cov = {}; 351 cov.fd = kCoverFd; 352 cover_open(&cov, false); 353 cover_mmap(&cov); 354 const char* error = NULL; 355 cover_enable(&cov, false, false); 356 int ret; 357 if ((ret = ioctl(cov.fd, KCOV_RESET_TRACE, 0))) { 358 if (errno != ENOTTY) { 359 fprintf(stderr, "ret: %d, errno: %d\n", ret, errno); 360 fail("ioctl(KCOV_RESET_TRACE) failed"); 361 } 362 error = "kernel does not support ioctl(KCOV_RESET_TRACE)"; 363 } 364 cover_munmap(&cov); 365 cover_close(&cov); 366 return error; 367 } 368 369 #define SYZ_HAVE_FEATURES 1 370 static feature_t features[] = { 371 {rpc::Feature::DelayKcovMmap, setup_delay_kcov}, 372 {rpc::Feature::KcovResetIoctl, setup_kcov_reset_ioctl}, 373 {rpc::Feature::Fault, setup_fault}, 374 {rpc::Feature::Leak, setup_leak}, 375 {rpc::Feature::KCSAN, setup_kcsan}, 376 {rpc::Feature::USBEmulation, setup_usb}, 377 {rpc::Feature::LRWPANEmulation, setup_802154}, 378 {rpc::Feature::BinFmtMisc, setup_binfmt_misc}, 379 {rpc::Feature::Swap, setup_swap}, 380 {rpc::Feature::NicVF, setup_nicvf}, 381 {rpc::Feature::DevlinkPCI, setup_devlink_pci}, 382 };