github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/executor/executor_linux.h (about)

     1  // Copyright 2015 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  #include <fcntl.h>
     5  #include <signal.h>
     6  #include <stdio.h>
     7  #include <stdlib.h>
     8  #include <sys/ioctl.h>
     9  #include <sys/mman.h>
    10  #include <sys/prctl.h>
    11  #include <sys/syscall.h>
    12  #include <unistd.h>
    13  
    14  static bool pkeys_enabled;
    15  
    16  // The coverage buffer can realistically overflow. In the non-snapshot mode we cannot afford
    17  // very large buffer b/c there are usually multiple procs, and each of them consumes
    18  // significant amount of memory. In snapshot mode we have only one proc, so we can have
    19  // larger coverage buffer.
    20  const int kSnapshotCoverSize = 1024 << 10;
    21  
    22  const unsigned long KCOV_TRACE_PC = 0;
    23  const unsigned long KCOV_TRACE_CMP = 1;
    24  
    25  template <int N>
    26  struct kcov_remote_arg {
    27  	uint32 trace_mode;
    28  	uint32 area_size;
    29  	uint32 num_handles;
    30  	uint32 pad;
    31  	uint64 common_handle;
    32  	uint64 handles[N];
    33  };
    34  
    35  #define KCOV_INIT_TRACE32 _IOR('c', 1, uint32)
    36  #define KCOV_INIT_TRACE64 _IOR('c', 1, uint64)
    37  #define KCOV_ENABLE _IO('c', 100)
    38  #define KCOV_DISABLE _IO('c', 101)
    39  #define KCOV_REMOTE_ENABLE _IOW('c', 102, kcov_remote_arg<0>)
    40  #define KCOV_RESET_TRACE _IO('c', 104)
    41  
    42  #define KCOV_SUBSYSTEM_COMMON (0x00ull << 56)
    43  #define KCOV_SUBSYSTEM_USB (0x01ull << 56)
    44  
    45  #define KCOV_SUBSYSTEM_MASK (0xffull << 56)
    46  #define KCOV_INSTANCE_MASK (0xffffffffull)
    47  
    48  static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst)
    49  {
    50  	if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK)
    51  		return 0;
    52  	return subsys | inst;
    53  }
    54  
    55  static void os_init(int argc, char** argv, char* data, size_t data_size)
    56  {
    57  	prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
    58  	// Surround the main data mapping with PROT_NONE pages to make virtual address layout more consistent
    59  	// across different configurations (static/non-static build) and C repros.
    60  	// One observed case before: executor had a mapping above the data mapping (output region),
    61  	// while C repros did not have that mapping above, as the result in one case VMA had next link,
    62  	// while in the other it didn't and it caused a bug to not reproduce with the C repro.
    63  	void* got = mmap(data - SYZ_PAGE_SIZE, SYZ_PAGE_SIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE | MAP_FIXED_EXCLUSIVE, -1, 0);
    64  	if (data - SYZ_PAGE_SIZE != got)
    65  		failmsg("mmap of left data PROT_NONE page failed", "want %p, got %p", data - SYZ_PAGE_SIZE, got);
    66  	// NOLINTBEGIN(clang-analyzer-security.MmapWriteExec)
    67  	got = mmap(data, data_size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_FIXED_EXCLUSIVE, -1, 0);
    68  	// NOLINTEND(clang-analyzer-security.MmapWriteExec)
    69  	if (data != got)
    70  		failmsg("mmap of data segment failed", "want %p, got %p", data, got);
    71  	got = mmap(data + data_size, SYZ_PAGE_SIZE, PROT_NONE, MAP_ANON | MAP_PRIVATE | MAP_FIXED_EXCLUSIVE, -1, 0);
    72  	if (data + data_size != got)
    73  		failmsg("mmap of right data PROT_NONE page failed", "want %p, got %p", data + data_size, got);
    74  
    75  	// A SIGCHLD handler makes sleep in loop exit immediately return with EINTR with a child exits.
    76  	struct sigaction act = {};
    77  	act.sa_handler = [](int) {};
    78  	sigaction(SIGCHLD, &act, nullptr);
    79  
    80  	// Use the last available pkey so that C reproducers get the the same keys from pkey_alloc.
    81  	int pkeys[RESERVED_PKEY + 1];
    82  	int npkey = 0;
    83  	for (; npkey <= RESERVED_PKEY; npkey++) {
    84  		int pk = pkey_alloc(0, 0);
    85  		if (pk == -1)
    86  			break;
    87  		if (pk == RESERVED_PKEY) {
    88  			pkeys_enabled = true;
    89  			break;
    90  		}
    91  		pkeys[npkey] = pk;
    92  	}
    93  	while (npkey--)
    94  		pkey_free(pkeys[npkey]);
    95  }
    96  
    97  static intptr_t execute_syscall(const call_t* c, intptr_t a[kMaxArgs])
    98  {
    99  	if (c->call)
   100  		return c->call(a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7], a[8]);
   101  	return syscall(c->sys_nr, a[0], a[1], a[2], a[3], a[4], a[5]);
   102  }
   103  
   104  static void cover_open(cover_t* cov, bool extra)
   105  {
   106  	int fd = open("/sys/kernel/debug/kcov", O_RDWR);
   107  	if (fd == -1)
   108  		fail("open of /sys/kernel/debug/kcov failed");
   109  	if (dup2(fd, cov->fd) < 0)
   110  		failmsg("filed to dup cover fd", "from=%d, to=%d", fd, cov->fd);
   111  	close(fd);
   112  	const int kcov_init_trace = is_kernel_64_bit ? KCOV_INIT_TRACE64 : KCOV_INIT_TRACE32;
   113  	const int cover_size = extra ? kExtraCoverSize : flag_snapshot ? kSnapshotCoverSize
   114  								       : kCoverSize;
   115  	if (ioctl(cov->fd, kcov_init_trace, cover_size))
   116  		fail("cover init trace write failed");
   117  	cov->data_size = cover_size * (is_kernel_64_bit ? 8 : 4);
   118  	if (pkeys_enabled)
   119  		debug("pkey protection enabled\n");
   120  }
   121  
   122  static void cover_close(cover_t* cov)
   123  {
   124  	if (cov->fd == -1)
   125  		fail("attempting to close an invalid cover fd");
   126  	if (cov->enabled) {
   127  		if (ioctl(cov->fd, KCOV_DISABLE, 0))
   128  			fail("KCOV_DISABLE failed");
   129  		cov->enabled = false;
   130  	}
   131  	close(cov->fd);
   132  	cov->fd = -1;
   133  }
   134  
   135  static void cover_protect(cover_t* cov)
   136  {
   137  	if (pkeys_enabled && pkey_set(RESERVED_PKEY, PKEY_DISABLE_WRITE))
   138  		debug("pkey_set failed: %d\n", errno);
   139  }
   140  
   141  static void cover_unprotect(cover_t* cov)
   142  {
   143  	if (pkeys_enabled && pkey_set(RESERVED_PKEY, 0))
   144  		debug("pkey_set failed: %d\n", errno);
   145  }
   146  
   147  static void cover_mmap(cover_t* cov)
   148  {
   149  	if (cov->mmap_alloc_ptr != NULL)
   150  		fail("cover_mmap invoked on an already mmapped cover_t object");
   151  	if (cov->data_size == 0)
   152  		fail("cover_t structure is corrupted");
   153  	// Allocate kcov buffer plus two guard pages surrounding it.
   154  	cov->mmap_alloc_size = cov->data_size + 2 * SYZ_PAGE_SIZE;
   155  	cov->mmap_alloc_ptr = (char*)mmap(NULL, cov->mmap_alloc_size,
   156  					  PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0);
   157  	if (cov->mmap_alloc_ptr == MAP_FAILED)
   158  		exitf("failed to preallocate kcov buffer");
   159  	// Now map the kcov buffer to the file, overwriting the existing mapping above.
   160  	int prot = flag_read_only_coverage ? PROT_READ : (PROT_READ | PROT_WRITE);
   161  	void* data_buf = (char*)mmap(cov->mmap_alloc_ptr + SYZ_PAGE_SIZE, cov->data_size,
   162  				     prot, MAP_SHARED | MAP_FIXED, cov->fd, 0);
   163  	if (data_buf == MAP_FAILED)
   164  		exitf("cover mmap failed");
   165  	if (pkeys_enabled && pkey_mprotect(data_buf, cov->data_size, prot, RESERVED_PKEY))
   166  		exitf("failed to pkey_mprotect kcov buffer");
   167  	cov->data = (char*)data_buf;
   168  	cov->data_end = cov->data + cov->data_size;
   169  	cov->data_offset = is_kernel_64_bit ? sizeof(uint64_t) : sizeof(uint32_t);
   170  	cov->pc_offset = 0;
   171  }
   172  
   173  static void cover_munmap(cover_t* cov)
   174  {
   175  	if (cov->mmap_alloc_ptr == NULL)
   176  		fail("cover_munmap invoked on a non-mmapped cover_t object");
   177  	if (munmap(cov->mmap_alloc_ptr, cov->mmap_alloc_size))
   178  		fail("cover_munmap failed");
   179  	cov->mmap_alloc_ptr = NULL;
   180  }
   181  
   182  static void cover_enable(cover_t* cov, bool collect_comps, bool extra)
   183  {
   184  	unsigned int kcov_mode = collect_comps ? KCOV_TRACE_CMP : KCOV_TRACE_PC;
   185  	// The KCOV_ENABLE call should be fatal,
   186  	// but in practice ioctl fails with assorted errors (9, 14, 25),
   187  	// so we use exitf.
   188  	if (!extra) {
   189  		if (ioctl(cov->fd, KCOV_ENABLE, kcov_mode))
   190  			exitf("cover enable write trace failed, mode=%d", kcov_mode);
   191  		cov->enabled = true;
   192  		return;
   193  	}
   194  	kcov_remote_arg<1> arg = {
   195  	    .trace_mode = kcov_mode,
   196  	    // Coverage buffer size of background threads.
   197  	    .area_size = kExtraCoverSize,
   198  	    .num_handles = 1,
   199  	};
   200  	arg.common_handle = kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, procid + 1);
   201  	arg.handles[0] = kcov_remote_handle(KCOV_SUBSYSTEM_USB, procid + 1);
   202  	if (ioctl(cov->fd, KCOV_REMOTE_ENABLE, &arg))
   203  		exitf("remote cover enable write trace failed");
   204  	cov->enabled = true;
   205  }
   206  
   207  static void cover_reset(cover_t* cov)
   208  {
   209  	// Callers in common_linux.h don't check this flag.
   210  	if (!flag_coverage)
   211  		return;
   212  	if (cov == 0) {
   213  		if (current_thread == 0)
   214  			fail("cover_reset: current_thread == 0");
   215  		cov = &current_thread->cov;
   216  	}
   217  	if (flag_read_only_coverage) {
   218  		if (ioctl(cov->fd, KCOV_RESET_TRACE, 0))
   219  			fail("KCOV_RESET_TRACE failed");
   220  	} else {
   221  		cover_unprotect(cov);
   222  		*(uint64*)cov->data = 0;
   223  		cover_protect(cov);
   224  	}
   225  	cov->overflow = false;
   226  }
   227  
   228  template <typename cover_data_t>
   229  static void cover_collect_impl(cover_t* cov)
   230  {
   231  	cov->size = *(cover_data_t*)cov->data;
   232  	cov->overflow = (cov->data + (cov->size + 2) * sizeof(cover_data_t)) > cov->data_end;
   233  }
   234  
   235  static void cover_collect(cover_t* cov)
   236  {
   237  	if (is_kernel_64_bit)
   238  		cover_collect_impl<uint64>(cov);
   239  	else
   240  		cover_collect_impl<uint32>(cov);
   241  }
   242  
   243  // One does not simply exit.
   244  // _exit can in fact fail.
   245  // syzkaller did manage to generate a seccomp filter that prohibits exit_group syscall.
   246  // Previously, we get into infinite recursion via segv_handler in such case
   247  // and corrupted output_data, which does matter in our case since it is shared
   248  // with fuzzer process. Loop infinitely instead. Parent will kill us.
   249  // But one does not simply loop either. Compilers are sure that _exit never returns,
   250  // so they remove all code after _exit as dead. Call _exit via volatile indirection.
   251  // And this does not work as well. _exit has own handling of failing exit_group
   252  // in the form of HLT instruction, it will divert control flow from our loop.
   253  // So call the syscall directly.
   254  NORETURN void doexit(int status)
   255  {
   256  	volatile unsigned i;
   257  	syscall(__NR_exit_group, status);
   258  	for (i = 0;; i++) {
   259  	}
   260  }
   261  
   262  // If we need to kill just a single thread (e.g. after cloning), exit_group is not
   263  // the right choice - it will kill all threads, which might eventually lead to
   264  // unnecessary SYZFAIL errors.
   265  NORETURN void doexit_thread(int status)
   266  {
   267  	volatile unsigned i;
   268  	syscall(__NR_exit, status);
   269  	for (i = 0;; i++) {
   270  	}
   271  }
   272  
   273  #define SYZ_HAVE_KCSAN 1
   274  static void setup_kcsan_filter(const std::vector<std::string>& frames)
   275  {
   276  	if (frames.empty())
   277  		return;
   278  	int fd = open("/sys/kernel/debug/kcsan", O_WRONLY);
   279  	if (fd == -1)
   280  		fail("failed to open kcsan debugfs file");
   281  	for (const auto& frame : frames)
   282  		dprintf(fd, "!%s\n", frame.c_str());
   283  	close(fd);
   284  }
   285  
   286  static const char* setup_nicvf()
   287  {
   288  	// This feature has custom checking precedure rather than just rely on running
   289  	// a simple program with this feature enabled b/c find_vf_interface cannot be made
   290  	// failing. It searches for the nic in init namespace, but then the nic is moved
   291  	// to one of testing namespace, so if number of procs is more than the number of devices,
   292  	// then some of them won't fine a nic (the code is also racy, more than one proc
   293  	// can find the same device and then moving it will fail for all but one).
   294  	// So we have to make find_vf_interface non-failing in case of failures,
   295  	// which means we cannot use it for feature checking.
   296  	int fd = open("/sys/bus/pci/devices/0000:00:11.0/", O_RDONLY | O_NONBLOCK);
   297  	if (fd == -1)
   298  		return "PCI device 0000:00:11.0 is not available";
   299  	close(fd);
   300  	return NULL;
   301  }
   302  
   303  static const char* setup_devlink_pci()
   304  {
   305  	// See comment in setup_nicvf.
   306  	int fd = open("/sys/bus/pci/devices/0000:00:10.0/", O_RDONLY | O_NONBLOCK);
   307  	if (fd == -1)
   308  		return "PCI device 0000:00:10.0 is not available";
   309  	close(fd);
   310  	return NULL;
   311  }
   312  
   313  static const char* setup_delay_kcov()
   314  {
   315  	int fd = open("/sys/kernel/debug/kcov", O_RDWR);
   316  	if (fd == -1)
   317  		return "open of /sys/kernel/debug/kcov failed";
   318  	close(fd);
   319  	cover_t cov = {};
   320  	cov.fd = kCoverFd;
   321  	cover_open(&cov, false);
   322  	cover_mmap(&cov);
   323  	char* first = cov.mmap_alloc_ptr;
   324  	cov.mmap_alloc_ptr = nullptr;
   325  	cover_mmap(&cov);
   326  	// If delayed kcov mmap is not supported by the kernel,
   327  	// accesses to the second mapping will crash.
   328  	// Use clock_gettime to check if it's mapped w/o crashing the process.
   329  	const char* error = NULL;
   330  	timespec ts;
   331  	if (clock_gettime(CLOCK_MONOTONIC, &ts)) {
   332  		if (errno != EFAULT)
   333  			fail("clock_gettime failed");
   334  		error = "kernel commit b3d7fe86fbd0 is not present";
   335  	} else {
   336  		munmap(cov.mmap_alloc_ptr, cov.mmap_alloc_size);
   337  	}
   338  	munmap(first, cov.mmap_alloc_size);
   339  	cover_close(&cov);
   340  	return error;
   341  }
   342  
   343  static const char* setup_kcov_reset_ioctl()
   344  {
   345  	int fd = open("/sys/kernel/debug/kcov", O_RDWR);
   346  	if (fd == -1)
   347  		return "open of /sys/kernel/debug/kcov failed";
   348  	close(fd);
   349  
   350  	cover_t cov = {};
   351  	cov.fd = kCoverFd;
   352  	cover_open(&cov, false);
   353  	cover_mmap(&cov);
   354  	const char* error = NULL;
   355  	cover_enable(&cov, false, false);
   356  	int ret;
   357  	if ((ret = ioctl(cov.fd, KCOV_RESET_TRACE, 0))) {
   358  		if (errno != ENOTTY) {
   359  			fprintf(stderr, "ret: %d, errno: %d\n", ret, errno);
   360  			fail("ioctl(KCOV_RESET_TRACE) failed");
   361  		}
   362  		error = "kernel does not support ioctl(KCOV_RESET_TRACE)";
   363  	}
   364  	cover_munmap(&cov);
   365  	cover_close(&cov);
   366  	return error;
   367  }
   368  
   369  #define SYZ_HAVE_FEATURES 1
   370  static feature_t features[] = {
   371      {rpc::Feature::DelayKcovMmap, setup_delay_kcov},
   372      {rpc::Feature::KcovResetIoctl, setup_kcov_reset_ioctl},
   373      {rpc::Feature::Fault, setup_fault},
   374      {rpc::Feature::Leak, setup_leak},
   375      {rpc::Feature::KCSAN, setup_kcsan},
   376      {rpc::Feature::USBEmulation, setup_usb},
   377      {rpc::Feature::LRWPANEmulation, setup_802154},
   378      {rpc::Feature::BinFmtMisc, setup_binfmt_misc},
   379      {rpc::Feature::Swap, setup_swap},
   380      {rpc::Feature::NicVF, setup_nicvf},
   381      {rpc::Feature::DevlinkPCI, setup_devlink_pci},
   382  };