github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/executor/executor_linux.h (about)

     1  // Copyright 2015 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  #include <fcntl.h>
     5  #include <signal.h>
     6  #include <stdio.h>
     7  #include <stdlib.h>
     8  #include <sys/ioctl.h>
     9  #include <sys/mman.h>
    10  #include <sys/prctl.h>
    11  #include <sys/syscall.h>
    12  #include <unistd.h>
    13  
    14  const unsigned long KCOV_TRACE_PC = 0;
    15  const unsigned long KCOV_TRACE_CMP = 1;
    16  
    17  template <int N>
    18  struct kcov_remote_arg {
    19  	uint32 trace_mode;
    20  	uint32 area_size;
    21  	uint32 num_handles;
    22  	uint32 pad;
    23  	uint64 common_handle;
    24  	uint64 handles[N];
    25  };
    26  
    27  #define KCOV_INIT_TRACE32 _IOR('c', 1, uint32)
    28  #define KCOV_INIT_TRACE64 _IOR('c', 1, uint64)
    29  #define KCOV_ENABLE _IO('c', 100)
    30  #define KCOV_DISABLE _IO('c', 101)
    31  #define KCOV_REMOTE_ENABLE _IOW('c', 102, kcov_remote_arg<0>)
    32  
    33  #define KCOV_SUBSYSTEM_COMMON (0x00ull << 56)
    34  #define KCOV_SUBSYSTEM_USB (0x01ull << 56)
    35  
    36  #define KCOV_SUBSYSTEM_MASK (0xffull << 56)
    37  #define KCOV_INSTANCE_MASK (0xffffffffull)
    38  
    39  static bool is_gvisor;
    40  
    41  static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst)
    42  {
    43  	if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK)
    44  		return 0;
    45  	return subsys | inst;
    46  }
    47  
    48  static bool detect_kernel_bitness();
    49  static bool detect_gvisor();
    50  
    51  static void os_init(int argc, char** argv, char* data, size_t data_size)
    52  {
    53  	prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
    54  	is_kernel_64_bit = detect_kernel_bitness();
    55  	is_gvisor = detect_gvisor();
    56  	// Surround the main data mapping with PROT_NONE pages to make virtual address layout more consistent
    57  	// across different configurations (static/non-static build) and C repros.
    58  	// One observed case before: executor had a mapping above the data mapping (output region),
    59  	// while C repros did not have that mapping above, as the result in one case VMA had next link,
    60  	// while in the other it didn't and it caused a bug to not reproduce with the C repro.
    61  	void* got = mmap(data - SYZ_PAGE_SIZE, SYZ_PAGE_SIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
    62  	if (data - SYZ_PAGE_SIZE != got)
    63  		failmsg("mmap of left data PROT_NONE page failed", "want %p, got %p", data - SYZ_PAGE_SIZE, got);
    64  	got = mmap(data, data_size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
    65  	if (data != got)
    66  		failmsg("mmap of data segment failed", "want %p, got %p", data, got);
    67  	got = mmap(data + data_size, SYZ_PAGE_SIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0);
    68  	if (data + data_size != got)
    69  		failmsg("mmap of right data PROT_NONE page failed", "want %p, got %p", data + data_size, got);
    70  }
    71  
    72  static intptr_t execute_syscall(const call_t* c, intptr_t a[kMaxArgs])
    73  {
    74  	if (c->call)
    75  		return c->call(a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7], a[8]);
    76  	return syscall(c->sys_nr, a[0], a[1], a[2], a[3], a[4], a[5]);
    77  }
    78  
    79  static void cover_open(cover_t* cov, bool extra)
    80  {
    81  	int fd = open("/sys/kernel/debug/kcov", O_RDWR);
    82  	if (fd == -1)
    83  		fail("open of /sys/kernel/debug/kcov failed");
    84  	if (dup2(fd, cov->fd) < 0)
    85  		failmsg("filed to dup cover fd", "from=%d, to=%d", fd, cov->fd);
    86  	close(fd);
    87  	const int kcov_init_trace = is_kernel_64_bit ? KCOV_INIT_TRACE64 : KCOV_INIT_TRACE32;
    88  	const int cover_size = extra ? kExtraCoverSize : kCoverSize;
    89  	if (ioctl(cov->fd, kcov_init_trace, cover_size))
    90  		fail("cover init trace write failed");
    91  	cov->mmap_alloc_size = cover_size * (is_kernel_64_bit ? 8 : 4);
    92  }
    93  
    94  static void cover_protect(cover_t* cov)
    95  {
    96  }
    97  
    98  #if SYZ_EXECUTOR_USES_SHMEM
    99  static void cover_unprotect(cover_t* cov)
   100  {
   101  }
   102  #endif
   103  
   104  static void cover_mmap(cover_t* cov)
   105  {
   106  	if (cov->data != NULL)
   107  		fail("cover_mmap invoked on an already mmapped cover_t object");
   108  	if (cov->mmap_alloc_size == 0)
   109  		fail("cover_t structure is corrupted");
   110  	// Allocate kcov buffer plus two guard pages surrounding it.
   111  	char* mapped = (char*)mmap(NULL, cov->mmap_alloc_size + 2 * SYZ_PAGE_SIZE,
   112  				   PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
   113  	if (mapped == MAP_FAILED)
   114  		exitf("failed to preallocate kcov buffer");
   115  	// Protect the guard pages.
   116  	int res = mprotect(mapped, SYZ_PAGE_SIZE, PROT_NONE);
   117  	if (res == -1)
   118  		exitf("failed to protect kcov guard page");
   119  	res = mprotect(mapped + SYZ_PAGE_SIZE + cov->mmap_alloc_size,
   120  		       SYZ_PAGE_SIZE, PROT_NONE);
   121  	if (res == -1)
   122  		exitf("failed to protect kcov guard page");
   123  	// Now map the kcov buffer to the file, overwriting the existing mapping above.
   124  	cov->data = (char*)mmap(mapped + SYZ_PAGE_SIZE, cov->mmap_alloc_size,
   125  				PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, cov->fd, 0);
   126  	if (cov->data == MAP_FAILED)
   127  		exitf("cover mmap failed");
   128  	cov->data_end = cov->data + cov->mmap_alloc_size;
   129  	cov->data_offset = is_kernel_64_bit ? sizeof(uint64_t) : sizeof(uint32_t);
   130  	cov->pc_offset = 0;
   131  }
   132  
   133  static void cover_enable(cover_t* cov, bool collect_comps, bool extra)
   134  {
   135  	unsigned int kcov_mode = collect_comps ? KCOV_TRACE_CMP : KCOV_TRACE_PC;
   136  	// The KCOV_ENABLE call should be fatal,
   137  	// but in practice ioctl fails with assorted errors (9, 14, 25),
   138  	// so we use exitf.
   139  	if (!extra) {
   140  		if (ioctl(cov->fd, KCOV_ENABLE, kcov_mode))
   141  			exitf("cover enable write trace failed, mode=%d", kcov_mode);
   142  		return;
   143  	}
   144  	kcov_remote_arg<1> arg = {
   145  	    .trace_mode = kcov_mode,
   146  	    // Coverage buffer size of background threads.
   147  	    .area_size = kExtraCoverSize,
   148  	    .num_handles = 1,
   149  	};
   150  	arg.common_handle = kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, procid + 1);
   151  	arg.handles[0] = kcov_remote_handle(KCOV_SUBSYSTEM_USB, procid + 1);
   152  	if (ioctl(cov->fd, KCOV_REMOTE_ENABLE, &arg))
   153  		exitf("remote cover enable write trace failed");
   154  }
   155  
   156  static void cover_reset(cover_t* cov)
   157  {
   158  	// Callers in common_linux.h don't check this flag.
   159  	if (!flag_coverage)
   160  		return;
   161  	if (cov == 0) {
   162  		if (current_thread == 0)
   163  			fail("cover_reset: current_thread == 0");
   164  		cov = &current_thread->cov;
   165  	}
   166  	*(uint64*)cov->data = 0;
   167  }
   168  
   169  static void cover_collect(cover_t* cov)
   170  {
   171  	if (is_kernel_64_bit)
   172  		cov->size = *(uint64*)cov->data;
   173  	else
   174  		cov->size = *(uint32*)cov->data;
   175  }
   176  
   177  #if SYZ_EXECUTOR_USES_SHMEM
   178  static bool use_cover_edges(uint32 pc)
   179  {
   180  	return true;
   181  }
   182  
   183  static bool use_cover_edges(uint64 pc)
   184  {
   185  #if defined(__i386__) || defined(__x86_64__)
   186  	if (is_gvisor)
   187  		return false; // gvisor coverage is not a trace, so producing edges won't work
   188  	// Text/modules range for x86_64.
   189  	if (pc < 0xffffffff80000000ull || pc >= 0xffffffffff000000ull) {
   190  		debug("got bad pc: 0x%llx\n", pc);
   191  		doexit(0);
   192  	}
   193  #endif
   194  	return true;
   195  }
   196  #endif
   197  
   198  static bool detect_kernel_bitness()
   199  {
   200  	if (sizeof(void*) == 8)
   201  		return true;
   202  	// It turns out to be surprisingly hard to understand if the kernel underneath is 64-bits.
   203  	// A common method is to look at uname.machine. But it is produced in some involved ways,
   204  	// and we will need to know about all strings it returns and in the end it can be overriden
   205  	// during build and lie (and there are known precedents of this).
   206  	// So instead we look at size of addresses in /proc/kallsyms.
   207  	bool wide = true;
   208  	int fd = open("/proc/kallsyms", O_RDONLY);
   209  	if (fd != -1) {
   210  		char buf[16];
   211  		if (read(fd, buf, sizeof(buf)) == sizeof(buf) &&
   212  		    (buf[8] == ' ' || buf[8] == '\t'))
   213  			wide = false;
   214  		close(fd);
   215  	}
   216  	debug("detected %d-bit kernel\n", wide ? 64 : 32);
   217  	return wide;
   218  }
   219  
   220  static bool detect_gvisor()
   221  {
   222  	char buf[64] = {};
   223  	// 3 stands for undeclared SYSLOG_ACTION_READ_ALL.
   224  	syscall(__NR_syslog, 3, buf, sizeof(buf) - 1);
   225  	// This is a first line of gvisor dmesg.
   226  	return strstr(buf, "Starting gVisor");
   227  }
   228  
   229  // One does not simply exit.
   230  // _exit can in fact fail.
   231  // syzkaller did manage to generate a seccomp filter that prohibits exit_group syscall.
   232  // Previously, we get into infinite recursion via segv_handler in such case
   233  // and corrupted output_data, which does matter in our case since it is shared
   234  // with fuzzer process. Loop infinitely instead. Parent will kill us.
   235  // But one does not simply loop either. Compilers are sure that _exit never returns,
   236  // so they remove all code after _exit as dead. Call _exit via volatile indirection.
   237  // And this does not work as well. _exit has own handling of failing exit_group
   238  // in the form of HLT instruction, it will divert control flow from our loop.
   239  // So call the syscall directly.
   240  NORETURN void doexit(int status)
   241  {
   242  	volatile unsigned i;
   243  	syscall(__NR_exit_group, status);
   244  	for (i = 0;; i++) {
   245  	}
   246  }
   247  
   248  // If we need to kill just a single thread (e.g. after cloning), exit_group is not
   249  // the right choice - it will kill all threads, which might eventually lead to
   250  // unnecessary SYZFAIL errors.
   251  NORETURN void doexit_thread(int status)
   252  {
   253  	volatile unsigned i;
   254  	syscall(__NR_exit, status);
   255  	for (i = 0;; i++) {
   256  	}
   257  }
   258  
   259  static void setup_nicvf()
   260  {
   261  	// This feature has custom checking precedure rather than just rely on running
   262  	// a simple program with this feature enabled b/c find_vf_interface cannot be made
   263  	// failing. It searches for the nic in init namespace, but then the nic is moved
   264  	// to one of testing namespace, so if number of procs is more than the number of devices,
   265  	// then some of them won't fine a nic (the code is also racy, more than one proc
   266  	// can find the same device and then moving it will fail for all but one).
   267  	// So we have to make find_vf_interface non-failing in case of failures,
   268  	// which means we cannot use it for feature checking.
   269  	if (open("/sys/bus/pci/devices/0000:00:11.0/", O_RDONLY | O_NONBLOCK) == -1)
   270  		fail("PCI device 0000:00:11.0 is not available");
   271  }
   272  
   273  static void setup_devlink_pci()
   274  {
   275  	// See comment in setup_nicvf.
   276  	if (open("/sys/bus/pci/devices/0000:00:10.0/", O_RDONLY | O_NONBLOCK) == -1)
   277  		fail("PCI device 0000:00:10.0 is not available");
   278  }
   279  
   280  static void setup_delay_kcov()
   281  {
   282  	is_kernel_64_bit = detect_kernel_bitness();
   283  	cover_t cov = {};
   284  	cov.fd = kCoverFd;
   285  	cover_open(&cov, false);
   286  	cover_mmap(&cov);
   287  	cov.data = nullptr;
   288  	cover_mmap(&cov);
   289  	// If delayed kcov mmap is not supported by the kernel,
   290  	// accesses to the second mapping will crash.
   291  	const_cast<volatile char*>(cov.data)[0] = 1;
   292  }
   293  
   294  #define SYZ_HAVE_FEATURES 1
   295  static feature_t features[] = {
   296      {rpc::Feature::DelayKcovMmap, setup_delay_kcov},
   297      {rpc::Feature::Fault, setup_fault},
   298      {rpc::Feature::Leak, setup_leak},
   299      {rpc::Feature::KCSAN, setup_kcsan},
   300      {rpc::Feature::USBEmulation, setup_usb},
   301      {rpc::Feature::LRWPANEmulation, setup_802154},
   302      {rpc::Feature::BinFmtMisc, setup_binfmt_misc},
   303      {rpc::Feature::Swap, setup_swap},
   304      {rpc::Feature::NicVF, setup_nicvf},
   305      {rpc::Feature::DevlinkPCI, setup_devlink_pci},
   306  };