github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/executor/executor_linux.h (about) 1 // Copyright 2015 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 #include <fcntl.h> 5 #include <signal.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <sys/ioctl.h> 9 #include <sys/mman.h> 10 #include <sys/prctl.h> 11 #include <sys/syscall.h> 12 #include <unistd.h> 13 14 const unsigned long KCOV_TRACE_PC = 0; 15 const unsigned long KCOV_TRACE_CMP = 1; 16 17 template <int N> 18 struct kcov_remote_arg { 19 uint32 trace_mode; 20 uint32 area_size; 21 uint32 num_handles; 22 uint32 pad; 23 uint64 common_handle; 24 uint64 handles[N]; 25 }; 26 27 #define KCOV_INIT_TRACE32 _IOR('c', 1, uint32) 28 #define KCOV_INIT_TRACE64 _IOR('c', 1, uint64) 29 #define KCOV_ENABLE _IO('c', 100) 30 #define KCOV_DISABLE _IO('c', 101) 31 #define KCOV_REMOTE_ENABLE _IOW('c', 102, kcov_remote_arg<0>) 32 33 #define KCOV_SUBSYSTEM_COMMON (0x00ull << 56) 34 #define KCOV_SUBSYSTEM_USB (0x01ull << 56) 35 36 #define KCOV_SUBSYSTEM_MASK (0xffull << 56) 37 #define KCOV_INSTANCE_MASK (0xffffffffull) 38 39 static bool is_gvisor; 40 41 static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst) 42 { 43 if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK) 44 return 0; 45 return subsys | inst; 46 } 47 48 static bool detect_kernel_bitness(); 49 static bool detect_gvisor(); 50 51 static void os_init(int argc, char** argv, char* data, size_t data_size) 52 { 53 prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); 54 is_kernel_64_bit = detect_kernel_bitness(); 55 is_gvisor = detect_gvisor(); 56 // Surround the main data mapping with PROT_NONE pages to make virtual address layout more consistent 57 // across different configurations (static/non-static build) and C repros. 58 // One observed case before: executor had a mapping above the data mapping (output region), 59 // while C repros did not have that mapping above, as the result in one case VMA had next link, 60 // while in the other it didn't and it caused a bug to not reproduce with the C repro. 61 void* got = mmap(data - SYZ_PAGE_SIZE, SYZ_PAGE_SIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); 62 if (data - SYZ_PAGE_SIZE != got) 63 failmsg("mmap of left data PROT_NONE page failed", "want %p, got %p", data - SYZ_PAGE_SIZE, got); 64 got = mmap(data, data_size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); 65 if (data != got) 66 failmsg("mmap of data segment failed", "want %p, got %p", data, got); 67 got = mmap(data + data_size, SYZ_PAGE_SIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); 68 if (data + data_size != got) 69 failmsg("mmap of right data PROT_NONE page failed", "want %p, got %p", data + data_size, got); 70 } 71 72 static intptr_t execute_syscall(const call_t* c, intptr_t a[kMaxArgs]) 73 { 74 if (c->call) 75 return c->call(a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7], a[8]); 76 return syscall(c->sys_nr, a[0], a[1], a[2], a[3], a[4], a[5]); 77 } 78 79 static void cover_open(cover_t* cov, bool extra) 80 { 81 int fd = open("/sys/kernel/debug/kcov", O_RDWR); 82 if (fd == -1) 83 fail("open of /sys/kernel/debug/kcov failed"); 84 if (dup2(fd, cov->fd) < 0) 85 failmsg("filed to dup cover fd", "from=%d, to=%d", fd, cov->fd); 86 close(fd); 87 const int kcov_init_trace = is_kernel_64_bit ? KCOV_INIT_TRACE64 : KCOV_INIT_TRACE32; 88 const int cover_size = extra ? kExtraCoverSize : kCoverSize; 89 if (ioctl(cov->fd, kcov_init_trace, cover_size)) 90 fail("cover init trace write failed"); 91 cov->mmap_alloc_size = cover_size * (is_kernel_64_bit ? 8 : 4); 92 } 93 94 static void cover_protect(cover_t* cov) 95 { 96 } 97 98 #if SYZ_EXECUTOR_USES_SHMEM 99 static void cover_unprotect(cover_t* cov) 100 { 101 } 102 #endif 103 104 static void cover_mmap(cover_t* cov) 105 { 106 if (cov->data != NULL) 107 fail("cover_mmap invoked on an already mmapped cover_t object"); 108 if (cov->mmap_alloc_size == 0) 109 fail("cover_t structure is corrupted"); 110 // Allocate kcov buffer plus two guard pages surrounding it. 111 char* mapped = (char*)mmap(NULL, cov->mmap_alloc_size + 2 * SYZ_PAGE_SIZE, 112 PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); 113 if (mapped == MAP_FAILED) 114 exitf("failed to preallocate kcov buffer"); 115 // Protect the guard pages. 116 int res = mprotect(mapped, SYZ_PAGE_SIZE, PROT_NONE); 117 if (res == -1) 118 exitf("failed to protect kcov guard page"); 119 res = mprotect(mapped + SYZ_PAGE_SIZE + cov->mmap_alloc_size, 120 SYZ_PAGE_SIZE, PROT_NONE); 121 if (res == -1) 122 exitf("failed to protect kcov guard page"); 123 // Now map the kcov buffer to the file, overwriting the existing mapping above. 124 cov->data = (char*)mmap(mapped + SYZ_PAGE_SIZE, cov->mmap_alloc_size, 125 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, cov->fd, 0); 126 if (cov->data == MAP_FAILED) 127 exitf("cover mmap failed"); 128 cov->data_end = cov->data + cov->mmap_alloc_size; 129 cov->data_offset = is_kernel_64_bit ? sizeof(uint64_t) : sizeof(uint32_t); 130 cov->pc_offset = 0; 131 } 132 133 static void cover_enable(cover_t* cov, bool collect_comps, bool extra) 134 { 135 unsigned int kcov_mode = collect_comps ? KCOV_TRACE_CMP : KCOV_TRACE_PC; 136 // The KCOV_ENABLE call should be fatal, 137 // but in practice ioctl fails with assorted errors (9, 14, 25), 138 // so we use exitf. 139 if (!extra) { 140 if (ioctl(cov->fd, KCOV_ENABLE, kcov_mode)) 141 exitf("cover enable write trace failed, mode=%d", kcov_mode); 142 return; 143 } 144 kcov_remote_arg<1> arg = { 145 .trace_mode = kcov_mode, 146 // Coverage buffer size of background threads. 147 .area_size = kExtraCoverSize, 148 .num_handles = 1, 149 }; 150 arg.common_handle = kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, procid + 1); 151 arg.handles[0] = kcov_remote_handle(KCOV_SUBSYSTEM_USB, procid + 1); 152 if (ioctl(cov->fd, KCOV_REMOTE_ENABLE, &arg)) 153 exitf("remote cover enable write trace failed"); 154 } 155 156 static void cover_reset(cover_t* cov) 157 { 158 // Callers in common_linux.h don't check this flag. 159 if (!flag_coverage) 160 return; 161 if (cov == 0) { 162 if (current_thread == 0) 163 fail("cover_reset: current_thread == 0"); 164 cov = ¤t_thread->cov; 165 } 166 *(uint64*)cov->data = 0; 167 } 168 169 static void cover_collect(cover_t* cov) 170 { 171 if (is_kernel_64_bit) 172 cov->size = *(uint64*)cov->data; 173 else 174 cov->size = *(uint32*)cov->data; 175 } 176 177 #if SYZ_EXECUTOR_USES_SHMEM 178 static bool use_cover_edges(uint32 pc) 179 { 180 return true; 181 } 182 183 static bool use_cover_edges(uint64 pc) 184 { 185 #if defined(__i386__) || defined(__x86_64__) 186 if (is_gvisor) 187 return false; // gvisor coverage is not a trace, so producing edges won't work 188 // Text/modules range for x86_64. 189 if (pc < 0xffffffff80000000ull || pc >= 0xffffffffff000000ull) { 190 debug("got bad pc: 0x%llx\n", pc); 191 doexit(0); 192 } 193 #endif 194 return true; 195 } 196 #endif 197 198 static bool detect_kernel_bitness() 199 { 200 if (sizeof(void*) == 8) 201 return true; 202 // It turns out to be surprisingly hard to understand if the kernel underneath is 64-bits. 203 // A common method is to look at uname.machine. But it is produced in some involved ways, 204 // and we will need to know about all strings it returns and in the end it can be overriden 205 // during build and lie (and there are known precedents of this). 206 // So instead we look at size of addresses in /proc/kallsyms. 207 bool wide = true; 208 int fd = open("/proc/kallsyms", O_RDONLY); 209 if (fd != -1) { 210 char buf[16]; 211 if (read(fd, buf, sizeof(buf)) == sizeof(buf) && 212 (buf[8] == ' ' || buf[8] == '\t')) 213 wide = false; 214 close(fd); 215 } 216 debug("detected %d-bit kernel\n", wide ? 64 : 32); 217 return wide; 218 } 219 220 static bool detect_gvisor() 221 { 222 char buf[64] = {}; 223 // 3 stands for undeclared SYSLOG_ACTION_READ_ALL. 224 syscall(__NR_syslog, 3, buf, sizeof(buf) - 1); 225 // This is a first line of gvisor dmesg. 226 return strstr(buf, "Starting gVisor"); 227 } 228 229 // One does not simply exit. 230 // _exit can in fact fail. 231 // syzkaller did manage to generate a seccomp filter that prohibits exit_group syscall. 232 // Previously, we get into infinite recursion via segv_handler in such case 233 // and corrupted output_data, which does matter in our case since it is shared 234 // with fuzzer process. Loop infinitely instead. Parent will kill us. 235 // But one does not simply loop either. Compilers are sure that _exit never returns, 236 // so they remove all code after _exit as dead. Call _exit via volatile indirection. 237 // And this does not work as well. _exit has own handling of failing exit_group 238 // in the form of HLT instruction, it will divert control flow from our loop. 239 // So call the syscall directly. 240 NORETURN void doexit(int status) 241 { 242 volatile unsigned i; 243 syscall(__NR_exit_group, status); 244 for (i = 0;; i++) { 245 } 246 } 247 248 // If we need to kill just a single thread (e.g. after cloning), exit_group is not 249 // the right choice - it will kill all threads, which might eventually lead to 250 // unnecessary SYZFAIL errors. 251 NORETURN void doexit_thread(int status) 252 { 253 volatile unsigned i; 254 syscall(__NR_exit, status); 255 for (i = 0;; i++) { 256 } 257 } 258 259 static void setup_nicvf() 260 { 261 // This feature has custom checking precedure rather than just rely on running 262 // a simple program with this feature enabled b/c find_vf_interface cannot be made 263 // failing. It searches for the nic in init namespace, but then the nic is moved 264 // to one of testing namespace, so if number of procs is more than the number of devices, 265 // then some of them won't fine a nic (the code is also racy, more than one proc 266 // can find the same device and then moving it will fail for all but one). 267 // So we have to make find_vf_interface non-failing in case of failures, 268 // which means we cannot use it for feature checking. 269 if (open("/sys/bus/pci/devices/0000:00:11.0/", O_RDONLY | O_NONBLOCK) == -1) 270 fail("PCI device 0000:00:11.0 is not available"); 271 } 272 273 static void setup_devlink_pci() 274 { 275 // See comment in setup_nicvf. 276 if (open("/sys/bus/pci/devices/0000:00:10.0/", O_RDONLY | O_NONBLOCK) == -1) 277 fail("PCI device 0000:00:10.0 is not available"); 278 } 279 280 static void setup_delay_kcov() 281 { 282 is_kernel_64_bit = detect_kernel_bitness(); 283 cover_t cov = {}; 284 cov.fd = kCoverFd; 285 cover_open(&cov, false); 286 cover_mmap(&cov); 287 cov.data = nullptr; 288 cover_mmap(&cov); 289 // If delayed kcov mmap is not supported by the kernel, 290 // accesses to the second mapping will crash. 291 const_cast<volatile char*>(cov.data)[0] = 1; 292 } 293 294 #define SYZ_HAVE_FEATURES 1 295 static feature_t features[] = { 296 {rpc::Feature::DelayKcovMmap, setup_delay_kcov}, 297 {rpc::Feature::Fault, setup_fault}, 298 {rpc::Feature::Leak, setup_leak}, 299 {rpc::Feature::KCSAN, setup_kcsan}, 300 {rpc::Feature::USBEmulation, setup_usb}, 301 {rpc::Feature::LRWPANEmulation, setup_802154}, 302 {rpc::Feature::BinFmtMisc, setup_binfmt_misc}, 303 {rpc::Feature::Swap, setup_swap}, 304 {rpc::Feature::NicVF, setup_nicvf}, 305 {rpc::Feature::DevlinkPCI, setup_devlink_pci}, 306 };