github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/executor/common_linux.h (about)

     1  // Copyright 2016 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  // This file is shared between executor and csource package.
     5  
     6  #include <stdlib.h>
     7  #include <sys/syscall.h>
     8  #include <sys/types.h>
     9  #include <unistd.h>
    10  
    11  #if SYZ_EXECUTOR
    12  const int kExtraCoverSize = 1024 << 10;
    13  struct cover_t;
    14  static void cover_reset(cover_t* cov);
    15  #endif
    16  
    17  #if SYZ_EXECUTOR || SYZ_THREADED
    18  #include <linux/futex.h>
    19  #include <pthread.h>
    20  
    21  typedef struct {
    22  	int state;
    23  } event_t;
    24  
    25  static void event_init(event_t* ev)
    26  {
    27  	ev->state = 0;
    28  }
    29  
    30  static void event_reset(event_t* ev)
    31  {
    32  	ev->state = 0;
    33  }
    34  
    35  static void event_set(event_t* ev)
    36  {
    37  	if (ev->state)
    38  		exitf("event already set");
    39  	__atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
    40  	syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000);
    41  }
    42  
    43  static void event_wait(event_t* ev)
    44  {
    45  	while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
    46  		syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
    47  }
    48  
    49  static int event_isset(event_t* ev)
    50  {
    51  	return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
    52  }
    53  
    54  static int event_timedwait(event_t* ev, uint64 timeout)
    55  {
    56  	uint64 start = current_time_ms();
    57  	uint64 now = start;
    58  	for (;;) {
    59  		uint64 remain = timeout - (now - start);
    60  		struct timespec ts;
    61  		ts.tv_sec = remain / 1000;
    62  		ts.tv_nsec = (remain % 1000) * 1000 * 1000;
    63  		syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
    64  		if (__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
    65  			return 1;
    66  		now = current_time_ms();
    67  		if (now - start > timeout)
    68  			return 0;
    69  	}
    70  }
    71  #endif
    72  
    73  #if SYZ_EXECUTOR || SYZ_REPEAT || SYZ_NET_INJECTION || SYZ_FAULT || SYZ_SANDBOX_NONE || \
    74      SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID ||               \
    75      SYZ_FAULT || SYZ_LEAK || SYZ_BINFMT_MISC || SYZ_SYSCTL ||                           \
    76      ((__NR_syz_usb_connect || __NR_syz_usb_connect_ath9k) && USB_DEBUG) ||              \
    77      __NR_syz_usbip_server_init
    78  #include <errno.h>
    79  #include <fcntl.h>
    80  #include <stdarg.h>
    81  #include <stdbool.h>
    82  #include <string.h>
    83  #include <sys/stat.h>
    84  #include <sys/types.h>
    85  
    86  static bool write_file(const char* file, const char* what, ...)
    87  {
    88  	char buf[1024];
    89  	va_list args;
    90  	va_start(args, what);
    91  	vsnprintf(buf, sizeof(buf), what, args);
    92  	va_end(args);
    93  	buf[sizeof(buf) - 1] = 0;
    94  	int len = strlen(buf);
    95  
    96  	int fd = open(file, O_WRONLY | O_CLOEXEC);
    97  	if (fd == -1)
    98  		return false;
    99  	if (write(fd, buf, len) != len) {
   100  		int err = errno;
   101  		close(fd);
   102  		debug("write(%s) failed: %d\n", file, err);
   103  		errno = err;
   104  		return false;
   105  	}
   106  	close(fd);
   107  	return true;
   108  }
   109  #endif
   110  
   111  #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI || SYZ_WIFI || SYZ_802154 || \
   112      __NR_syz_genetlink_get_family_id || __NR_syz_80211_inject_frame || __NR_syz_80211_join_ibss || SYZ_NIC_VF
   113  #include <arpa/inet.h>
   114  #include <errno.h>
   115  #include <net/if.h>
   116  #include <netinet/in.h>
   117  #include <stdbool.h>
   118  #include <string.h>
   119  #include <sys/socket.h>
   120  #include <sys/types.h>
   121  
   122  #include <linux/genetlink.h>
   123  #include <linux/if_addr.h>
   124  #include <linux/if_link.h>
   125  #include <linux/in6.h>
   126  #include <linux/neighbour.h>
   127  #include <linux/net.h>
   128  #include <linux/netlink.h>
   129  #include <linux/rtnetlink.h>
   130  #include <linux/veth.h>
   131  
   132  struct nlmsg {
   133  	char* pos;
   134  	int nesting;
   135  	struct nlattr* nested[8];
   136  	char buf[4096];
   137  };
   138  
   139  static void netlink_init(struct nlmsg* nlmsg, int typ, int flags,
   140  			 const void* data, int size)
   141  {
   142  	memset(nlmsg, 0, sizeof(*nlmsg));
   143  	struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf;
   144  	hdr->nlmsg_type = typ;
   145  	hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
   146  	memcpy(hdr + 1, data, size);
   147  	nlmsg->pos = (char*)(hdr + 1) + NLMSG_ALIGN(size);
   148  }
   149  
   150  static void netlink_attr(struct nlmsg* nlmsg, int typ,
   151  			 const void* data, int size)
   152  {
   153  	struct nlattr* attr = (struct nlattr*)nlmsg->pos;
   154  	attr->nla_len = sizeof(*attr) + size;
   155  	attr->nla_type = typ;
   156  	if (size > 0)
   157  		memcpy(attr + 1, data, size);
   158  	nlmsg->pos += NLMSG_ALIGN(attr->nla_len);
   159  }
   160  
   161  #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_802154
   162  static void netlink_nest(struct nlmsg* nlmsg, int typ)
   163  {
   164  	struct nlattr* attr = (struct nlattr*)nlmsg->pos;
   165  	attr->nla_type = typ;
   166  	nlmsg->pos += sizeof(*attr);
   167  	nlmsg->nested[nlmsg->nesting++] = attr;
   168  }
   169  
   170  static void netlink_done(struct nlmsg* nlmsg)
   171  {
   172  	struct nlattr* attr = nlmsg->nested[--nlmsg->nesting];
   173  	attr->nla_len = nlmsg->pos - (char*)attr;
   174  }
   175  
   176  #if SYZ_EXECUTOR || SYZ_NIC_VF
   177  #include <ifaddrs.h>
   178  #include <linux/ethtool.h>
   179  #include <linux/sockios.h>
   180  #include <sys/ioctl.h>
   181  
   182  struct vf_intf {
   183  	char pass_thru_intf[IFNAMSIZ];
   184  	int ppid; // used by Child
   185  };
   186  
   187  static struct vf_intf vf_intf;
   188  
   189  static void find_vf_interface(void)
   190  {
   191  #if SYZ_EXECUTOR
   192  	if (!flag_nic_vf)
   193  		return;
   194  #endif
   195  	struct ifaddrs* addresses = NULL;
   196  	int pid = getpid();
   197  	int ret = 0;
   198  
   199  	memset(&vf_intf, 0, sizeof(struct vf_intf));
   200  
   201  	debug("Checking for VF pass-thru interface.\n");
   202  	if (getifaddrs(&addresses) == -1) {
   203  		debug("%s: getifaddrs() failed.\n", __func__);
   204  		return;
   205  	}
   206  
   207  	int fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
   208  
   209  	if (fd < 0) {
   210  		debug("%s: socket() failed.\n", __func__);
   211  		return;
   212  	}
   213  	struct ifreq ifr;
   214  	struct ethtool_drvinfo drvinfo;
   215  	struct ifaddrs* address = addresses;
   216  
   217  	while (address) {
   218  		debug("ifa_name: %s\n", address->ifa_name);
   219  		memset(&ifr, 0, sizeof(struct ifreq));
   220  		strcpy(ifr.ifr_name, address->ifa_name);
   221  		memset(&drvinfo, 0, sizeof(struct ethtool_drvinfo));
   222  		drvinfo.cmd = ETHTOOL_GDRVINFO;
   223  		ifr.ifr_data = (caddr_t)&drvinfo;
   224  		ret = ioctl(fd, SIOCETHTOOL, &ifr);
   225  
   226  		if (ret < 0) {
   227  			debug("%s: ioctl() failed.\n", __func__);
   228  		} else if (strlen(drvinfo.bus_info)) {
   229  			debug("bus_info: %s, strlen(drvinfo.bus_info)=%zu\n",
   230  			      drvinfo.bus_info, strlen(drvinfo.bus_info));
   231  			if (strcmp(drvinfo.bus_info, "0000:00:11.0") == 0) {
   232  				if (strlen(address->ifa_name) < IFNAMSIZ) {
   233  					strncpy(vf_intf.pass_thru_intf,
   234  						address->ifa_name, IFNAMSIZ);
   235  					vf_intf.ppid = pid;
   236  				} else {
   237  					debug("%s: %d strlen(%s) >= IFNAMSIZ.\n",
   238  					      __func__, pid, address->ifa_name);
   239  				}
   240  				break;
   241  			}
   242  		}
   243  		address = address->ifa_next;
   244  	}
   245  	freeifaddrs(addresses);
   246  	if (!vf_intf.ppid) {
   247  		memset(&vf_intf, 0, sizeof(struct vf_intf));
   248  		debug("%s: %d could not find VF pass-thru interface.\n", __func__, pid);
   249  		return;
   250  	}
   251  	debug("%s: %d found VF pass-thru interface %s\n",
   252  	      __func__, pid, vf_intf.pass_thru_intf);
   253  }
   254  #endif // SYZ_NIC_VF
   255  
   256  #endif
   257  
   258  static int netlink_send_ext(struct nlmsg* nlmsg, int sock,
   259  			    uint16 reply_type, int* reply_len, bool dofail)
   260  {
   261  #if SYZ_EXECUTOR
   262  	if (in_execute_one && dofail) {
   263  		// We can expect different sorts of breakages during fuzzing,
   264  		// we should not kill the whole process because of them.
   265  		failmsg("invalid netlink_send_ext arguments", "dofail is true during syscall execution");
   266  	}
   267  #endif
   268  	if (nlmsg->pos > nlmsg->buf + sizeof(nlmsg->buf) || nlmsg->nesting)
   269  		fail("nlmsg overflow/bad nesting");
   270  	struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf;
   271  	hdr->nlmsg_len = nlmsg->pos - nlmsg->buf;
   272  	struct sockaddr_nl addr;
   273  	memset(&addr, 0, sizeof(addr));
   274  	addr.nl_family = AF_NETLINK;
   275  	ssize_t n = sendto(sock, nlmsg->buf, hdr->nlmsg_len, 0, (struct sockaddr*)&addr, sizeof(addr));
   276  	if (n != (ssize_t)hdr->nlmsg_len) {
   277  		if (dofail)
   278  			failmsg("netlink_send_ext: short netlink write", "wrote=%zd, want=%d", n, hdr->nlmsg_len);
   279  		debug("netlink_send_ext: short netlink write: %zd/%d errno=%d\n", n, hdr->nlmsg_len, errno);
   280  		return -1;
   281  	}
   282  	n = recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0);
   283  	if (reply_len)
   284  		*reply_len = 0;
   285  	if (n < 0) {
   286  		if (dofail)
   287  			fail("netlink_send_ext: netlink read failed");
   288  		debug("netlink_send_ext: netlink read failed: errno=%d\n", errno);
   289  		return -1;
   290  	}
   291  	if (n < (ssize_t)sizeof(struct nlmsghdr)) {
   292  		errno = EINVAL;
   293  		if (dofail)
   294  			failmsg("netlink_send_ext: short netlink read", "read=%zd", n);
   295  		debug("netlink_send_ext: short netlink read: %zd\n", n);
   296  		return -1;
   297  	}
   298  	if (hdr->nlmsg_type == NLMSG_DONE)
   299  		return 0;
   300  	if (reply_len && hdr->nlmsg_type == reply_type) {
   301  		*reply_len = n;
   302  		return 0;
   303  	}
   304  	if (n < (ssize_t)(sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr))) {
   305  		errno = EINVAL;
   306  		if (dofail)
   307  			failmsg("netlink_send_ext: short netlink read", "read=%zd", n);
   308  		debug("netlink_send_ext: short netlink read: %zd\n", n);
   309  		return -1;
   310  	}
   311  	if (hdr->nlmsg_type != NLMSG_ERROR) {
   312  		errno = EINVAL;
   313  		if (dofail)
   314  			failmsg("netlink_send_ext: bad netlink ack type", "type=%d", hdr->nlmsg_type);
   315  		debug("netlink_send_ext: short netlink ack: %d\n", hdr->nlmsg_type);
   316  		return -1;
   317  	}
   318  	errno = -((struct nlmsgerr*)(hdr + 1))->error;
   319  	return -errno;
   320  }
   321  
   322  #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI || SYZ_WIFI || SYZ_802154
   323  static int netlink_send(struct nlmsg* nlmsg, int sock)
   324  {
   325  	return netlink_send_ext(nlmsg, sock, 0, NULL, true);
   326  }
   327  #endif
   328  
   329  static int netlink_query_family_id(struct nlmsg* nlmsg, int sock, const char* family_name, bool dofail)
   330  {
   331  	struct genlmsghdr genlhdr;
   332  	memset(&genlhdr, 0, sizeof(genlhdr));
   333  	genlhdr.cmd = CTRL_CMD_GETFAMILY;
   334  	netlink_init(nlmsg, GENL_ID_CTRL, 0, &genlhdr, sizeof(genlhdr));
   335  	netlink_attr(nlmsg, CTRL_ATTR_FAMILY_NAME, family_name, strnlen(family_name, GENL_NAMSIZ - 1) + 1);
   336  	int n = 0;
   337  	int err = netlink_send_ext(nlmsg, sock, GENL_ID_CTRL, &n, dofail);
   338  	if (err < 0) {
   339  		debug("netlink: failed to get family id for %.*s: %s\n", GENL_NAMSIZ, family_name, strerror(errno));
   340  		return -1;
   341  	}
   342  	uint16 id = 0;
   343  	struct nlattr* attr = (struct nlattr*)(nlmsg->buf + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(genlhdr)));
   344  	for (; (char*)attr < nlmsg->buf + n; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) {
   345  		if (attr->nla_type == CTRL_ATTR_FAMILY_ID) {
   346  			id = *(uint16*)(attr + 1);
   347  			break;
   348  		}
   349  	}
   350  	if (!id) {
   351  		debug("netlink: failed to parse family id for %.*s\n", GENL_NAMSIZ, family_name);
   352  		errno = EINVAL;
   353  		return -1;
   354  	}
   355  	recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0); // recv ack
   356  
   357  	return id;
   358  }
   359  
   360  #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_DEVLINK_PCI
   361  static int netlink_next_msg(struct nlmsg* nlmsg, unsigned int offset,
   362  			    unsigned int total_len)
   363  {
   364  	struct nlmsghdr* hdr = (struct nlmsghdr*)(nlmsg->buf + offset);
   365  
   366  	if (offset == total_len || offset + hdr->nlmsg_len > total_len)
   367  		return -1;
   368  	return hdr->nlmsg_len;
   369  }
   370  #endif
   371  
   372  #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_802154
   373  
   374  // Force few TX and RX queues per interface to avoid creating 2 sysfs entries
   375  // per CPU per interface which takes a long time on machines with many cores.
   376  static unsigned int queue_count = 2;
   377  
   378  static void netlink_add_device_impl(struct nlmsg* nlmsg, const char* type,
   379  				    const char* name, bool up)
   380  {
   381  	struct ifinfomsg hdr;
   382  	memset(&hdr, 0, sizeof(hdr));
   383  	if (up)
   384  		hdr.ifi_flags = hdr.ifi_change = IFF_UP;
   385  	netlink_init(nlmsg, RTM_NEWLINK, NLM_F_EXCL | NLM_F_CREATE, &hdr, sizeof(hdr));
   386  	if (name)
   387  		netlink_attr(nlmsg, IFLA_IFNAME, name, strlen(name));
   388  
   389  	netlink_attr(nlmsg, IFLA_NUM_TX_QUEUES, &queue_count, sizeof(queue_count));
   390  	netlink_attr(nlmsg, IFLA_NUM_RX_QUEUES, &queue_count, sizeof(queue_count));
   391  
   392  	netlink_nest(nlmsg, IFLA_LINKINFO);
   393  	netlink_attr(nlmsg, IFLA_INFO_KIND, type, strlen(type));
   394  }
   395  #endif
   396  
   397  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
   398  static void netlink_add_device(struct nlmsg* nlmsg, int sock, const char* type,
   399  			       const char* name)
   400  {
   401  	netlink_add_device_impl(nlmsg, type, name, false);
   402  	netlink_done(nlmsg);
   403  	int err = netlink_send(nlmsg, sock);
   404  	if (err < 0) {
   405  		debug("netlink: adding device %s type %s: %s\n", name, type, strerror(errno));
   406  	}
   407  }
   408  
   409  static void netlink_add_veth(struct nlmsg* nlmsg, int sock, const char* name,
   410  			     const char* peer)
   411  {
   412  	netlink_add_device_impl(nlmsg, "veth", name, false);
   413  	netlink_nest(nlmsg, IFLA_INFO_DATA);
   414  	netlink_nest(nlmsg, VETH_INFO_PEER);
   415  	nlmsg->pos += sizeof(struct ifinfomsg);
   416  	netlink_attr(nlmsg, IFLA_IFNAME, peer, strlen(peer));
   417  	netlink_attr(nlmsg, IFLA_NUM_TX_QUEUES, &queue_count, sizeof(queue_count));
   418  	netlink_attr(nlmsg, IFLA_NUM_RX_QUEUES, &queue_count, sizeof(queue_count));
   419  	netlink_done(nlmsg);
   420  	netlink_done(nlmsg);
   421  	netlink_done(nlmsg);
   422  	int err = netlink_send(nlmsg, sock);
   423  	if (err < 0) {
   424  		debug("netlink: adding device %s type veth peer %s: %s\n", name, peer, strerror(errno));
   425  	}
   426  }
   427  
   428  static void netlink_add_xfrm(struct nlmsg* nlmsg, int sock, const char* name)
   429  {
   430  	netlink_add_device_impl(nlmsg, "xfrm", name, true);
   431  	netlink_nest(nlmsg, IFLA_INFO_DATA);
   432  	int if_id = 1;
   433  	// This is IFLA_XFRM_IF_ID attr which is not present in older kernel headers.
   434  	netlink_attr(nlmsg, 2, &if_id, sizeof(if_id));
   435  	netlink_done(nlmsg);
   436  	netlink_done(nlmsg);
   437  	int err = netlink_send(nlmsg, sock);
   438  	if (err < 0) {
   439  		debug("netlink: adding device %s type xfrm if_id %d: %s\n", name, if_id, strerror(errno));
   440  	}
   441  }
   442  
   443  static void netlink_add_hsr(struct nlmsg* nlmsg, int sock, const char* name,
   444  			    const char* slave1, const char* slave2)
   445  {
   446  	netlink_add_device_impl(nlmsg, "hsr", name, false);
   447  	netlink_nest(nlmsg, IFLA_INFO_DATA);
   448  	int ifindex1 = if_nametoindex(slave1);
   449  	netlink_attr(nlmsg, IFLA_HSR_SLAVE1, &ifindex1, sizeof(ifindex1));
   450  	int ifindex2 = if_nametoindex(slave2);
   451  	netlink_attr(nlmsg, IFLA_HSR_SLAVE2, &ifindex2, sizeof(ifindex2));
   452  	netlink_done(nlmsg);
   453  	netlink_done(nlmsg);
   454  	int err = netlink_send(nlmsg, sock);
   455  	if (err < 0) {
   456  		debug("netlink: adding device %s type hsr slave1 %s slave2 %s: %s\n", name, slave1, slave2, strerror(errno));
   457  	}
   458  }
   459  
   460  static void netlink_add_linked(struct nlmsg* nlmsg, int sock, const char* type, const char* name, const char* link)
   461  {
   462  	netlink_add_device_impl(nlmsg, type, name, false);
   463  	netlink_done(nlmsg);
   464  	int ifindex = if_nametoindex(link);
   465  	netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex));
   466  	int err = netlink_send(nlmsg, sock);
   467  	if (err < 0) {
   468  		debug("netlink: adding device %s type %s link %s: %s\n", name, type, link, strerror(errno));
   469  	}
   470  }
   471  
   472  static void netlink_add_vlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link, uint16 id, uint16 proto)
   473  {
   474  	netlink_add_device_impl(nlmsg, "vlan", name, false);
   475  	netlink_nest(nlmsg, IFLA_INFO_DATA);
   476  	netlink_attr(nlmsg, IFLA_VLAN_ID, &id, sizeof(id));
   477  	netlink_attr(nlmsg, IFLA_VLAN_PROTOCOL, &proto, sizeof(proto));
   478  	netlink_done(nlmsg);
   479  	netlink_done(nlmsg);
   480  	int ifindex = if_nametoindex(link);
   481  	netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex));
   482  	int err = netlink_send(nlmsg, sock);
   483  	if (err < 0) {
   484  		debug("netlink: add %s type vlan link %s id %d: %s\n", name, link, id, strerror(errno));
   485  	}
   486  }
   487  
   488  static void netlink_add_macvlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link)
   489  {
   490  	netlink_add_device_impl(nlmsg, "macvlan", name, false);
   491  	netlink_nest(nlmsg, IFLA_INFO_DATA);
   492  	uint32 mode = MACVLAN_MODE_BRIDGE;
   493  	netlink_attr(nlmsg, IFLA_MACVLAN_MODE, &mode, sizeof(mode));
   494  	netlink_done(nlmsg);
   495  	netlink_done(nlmsg);
   496  	int ifindex = if_nametoindex(link);
   497  	netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex));
   498  	int err = netlink_send(nlmsg, sock);
   499  	if (err < 0) {
   500  		debug("netlink: add %s type macvlan link %s mode %d: %s\n", name, link, mode, strerror(errno));
   501  	}
   502  }
   503  
   504  static void netlink_add_geneve(struct nlmsg* nlmsg, int sock, const char* name, uint32 vni, struct in_addr* addr4, struct in6_addr* addr6)
   505  {
   506  	netlink_add_device_impl(nlmsg, "geneve", name, false);
   507  	netlink_nest(nlmsg, IFLA_INFO_DATA);
   508  	netlink_attr(nlmsg, IFLA_GENEVE_ID, &vni, sizeof(vni));
   509  	if (addr4)
   510  		netlink_attr(nlmsg, IFLA_GENEVE_REMOTE, addr4, sizeof(*addr4));
   511  	if (addr6)
   512  		netlink_attr(nlmsg, IFLA_GENEVE_REMOTE6, addr6, sizeof(*addr6));
   513  	netlink_done(nlmsg);
   514  	netlink_done(nlmsg);
   515  	int err = netlink_send(nlmsg, sock);
   516  	if (err < 0) {
   517  		debug("netlink: add %s type geneve vni %u: %s\n", name, vni, strerror(errno));
   518  	}
   519  }
   520  
   521  #define IFLA_IPVLAN_FLAGS 2
   522  #define IPVLAN_MODE_L3S 2
   523  #undef IPVLAN_F_VEPA
   524  #define IPVLAN_F_VEPA 2
   525  
   526  static void netlink_add_ipvlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link, uint16 mode, uint16 flags)
   527  {
   528  	netlink_add_device_impl(nlmsg, "ipvlan", name, false);
   529  	netlink_nest(nlmsg, IFLA_INFO_DATA);
   530  	netlink_attr(nlmsg, IFLA_IPVLAN_MODE, &mode, sizeof(mode));
   531  	netlink_attr(nlmsg, IFLA_IPVLAN_FLAGS, &flags, sizeof(flags));
   532  	netlink_done(nlmsg);
   533  	netlink_done(nlmsg);
   534  	int ifindex = if_nametoindex(link);
   535  	netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex));
   536  	int err = netlink_send(nlmsg, sock);
   537  	if (err < 0) {
   538  		debug("netlink: add %s type ipvlan link %s mode %d: %s\n", name, link, mode, strerror(errno));
   539  	}
   540  }
   541  #endif
   542  
   543  #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI || SYZ_802154
   544  static void netlink_device_change(struct nlmsg* nlmsg, int sock, const char* name, bool up,
   545  				  const char* master, const void* mac, int macsize,
   546  				  const char* new_name)
   547  {
   548  	struct ifinfomsg hdr;
   549  	memset(&hdr, 0, sizeof(hdr));
   550  	if (up)
   551  		hdr.ifi_flags = hdr.ifi_change = IFF_UP;
   552  	hdr.ifi_index = if_nametoindex(name);
   553  	netlink_init(nlmsg, RTM_NEWLINK, 0, &hdr, sizeof(hdr));
   554  	if (new_name)
   555  		netlink_attr(nlmsg, IFLA_IFNAME, new_name, strlen(new_name));
   556  	if (master) {
   557  		int ifindex = if_nametoindex(master);
   558  		netlink_attr(nlmsg, IFLA_MASTER, &ifindex, sizeof(ifindex));
   559  	}
   560  	if (macsize)
   561  		netlink_attr(nlmsg, IFLA_ADDRESS, mac, macsize);
   562  	int err = netlink_send(nlmsg, sock);
   563  	if (err < 0) {
   564  		debug("netlink: device %s up master %s: %s\n", name, master ? master : "NULL", strerror(errno));
   565  	}
   566  }
   567  #endif
   568  
   569  #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION
   570  static int netlink_add_addr(struct nlmsg* nlmsg, int sock, const char* dev,
   571  			    const void* addr, int addrsize)
   572  {
   573  	struct ifaddrmsg hdr;
   574  	memset(&hdr, 0, sizeof(hdr));
   575  	hdr.ifa_family = addrsize == 4 ? AF_INET : AF_INET6;
   576  	hdr.ifa_prefixlen = addrsize == 4 ? 24 : 120;
   577  	hdr.ifa_scope = RT_SCOPE_UNIVERSE;
   578  	hdr.ifa_index = if_nametoindex(dev);
   579  	netlink_init(nlmsg, RTM_NEWADDR, NLM_F_CREATE | NLM_F_REPLACE, &hdr, sizeof(hdr));
   580  	netlink_attr(nlmsg, IFA_LOCAL, addr, addrsize);
   581  	netlink_attr(nlmsg, IFA_ADDRESS, addr, addrsize);
   582  	return netlink_send(nlmsg, sock);
   583  }
   584  
   585  static void netlink_add_addr4(struct nlmsg* nlmsg, int sock,
   586  			      const char* dev, const char* addr)
   587  {
   588  	struct in_addr in_addr;
   589  	inet_pton(AF_INET, addr, &in_addr);
   590  	int err = netlink_add_addr(nlmsg, sock, dev, &in_addr, sizeof(in_addr));
   591  	if (err < 0) {
   592  		debug("netlink: add addr %s dev %s: %s\n", addr, dev, strerror(errno));
   593  	}
   594  }
   595  
   596  static void netlink_add_addr6(struct nlmsg* nlmsg, int sock,
   597  			      const char* dev, const char* addr)
   598  {
   599  	struct in6_addr in6_addr;
   600  	inet_pton(AF_INET6, addr, &in6_addr);
   601  	int err = netlink_add_addr(nlmsg, sock, dev, &in6_addr, sizeof(in6_addr));
   602  	if (err < 0) {
   603  		debug("netlink: add addr %s dev %s: %s\n", addr, dev, strerror(errno));
   604  	}
   605  }
   606  #endif
   607  
   608  #if SYZ_EXECUTOR || SYZ_NET_INJECTION
   609  static void netlink_add_neigh(struct nlmsg* nlmsg, int sock, const char* name,
   610  			      const void* addr, int addrsize, const void* mac, int macsize)
   611  {
   612  	struct ndmsg hdr;
   613  	memset(&hdr, 0, sizeof(hdr));
   614  	hdr.ndm_family = addrsize == 4 ? AF_INET : AF_INET6;
   615  	hdr.ndm_ifindex = if_nametoindex(name);
   616  	hdr.ndm_state = NUD_PERMANENT;
   617  	netlink_init(nlmsg, RTM_NEWNEIGH, NLM_F_EXCL | NLM_F_CREATE, &hdr, sizeof(hdr));
   618  	netlink_attr(nlmsg, NDA_DST, addr, addrsize);
   619  	netlink_attr(nlmsg, NDA_LLADDR, mac, macsize);
   620  	int err = netlink_send(nlmsg, sock);
   621  	if (err < 0) {
   622  		debug("netlink: add neigh %s addr %d lladdr %d: %s\n", name, addrsize, macsize, strerror(errno));
   623  	}
   624  }
   625  #endif
   626  #endif
   627  
   628  #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI || SYZ_WIFI || SYZ_802154
   629  static struct nlmsg nlmsg;
   630  #endif
   631  
   632  #if SYZ_EXECUTOR || SYZ_NET_INJECTION
   633  #include <arpa/inet.h>
   634  #include <errno.h>
   635  #include <fcntl.h>
   636  #include <net/if.h>
   637  #include <net/if_arp.h>
   638  #include <stdarg.h>
   639  #include <stdbool.h>
   640  #include <sys/ioctl.h>
   641  #include <sys/stat.h>
   642  
   643  #include <linux/if_ether.h>
   644  #include <linux/if_tun.h>
   645  #include <linux/ip.h>
   646  #include <linux/tcp.h>
   647  
   648  static int tunfd = -1;
   649  
   650  #define TUN_IFACE "syz_tun"
   651  #define LOCAL_MAC 0xaaaaaaaaaaaa
   652  #define REMOTE_MAC 0xaaaaaaaaaabb
   653  #define LOCAL_IPV4 "172.20.20.170"
   654  #define REMOTE_IPV4 "172.20.20.187"
   655  #define LOCAL_IPV6 "fe80::aa"
   656  #define REMOTE_IPV6 "fe80::bb"
   657  
   658  #ifndef IFF_NAPI
   659  #define IFF_NAPI 0x0010
   660  #endif
   661  #if ENABLE_NAPI_FRAGS
   662  static int tun_frags_enabled;
   663  #ifndef IFF_NAPI_FRAGS
   664  #define IFF_NAPI_FRAGS 0x0020
   665  #endif
   666  #endif
   667  
   668  static void initialize_tun(void)
   669  {
   670  #if SYZ_EXECUTOR
   671  	if (!flag_net_injection)
   672  		return;
   673  #endif
   674  	tunfd = open("/dev/net/tun", O_RDWR | O_NONBLOCK);
   675  	if (tunfd == -1) {
   676  #if SYZ_EXECUTOR
   677  		fail("tun: can't open /dev/net/tun");
   678  #else
   679  		printf("tun: can't open /dev/net/tun: please enable CONFIG_TUN=y\n");
   680  		printf("otherwise fuzzing or reproducing might not work as intended\n");
   681  		return;
   682  #endif
   683  	}
   684  	// Remap tun onto higher fd number to hide it from fuzzer and to keep
   685  	// fd numbers stable regardless of whether tun is opened or not (also see kMaxFd).
   686  	const int kTunFd = 200;
   687  	if (dup2(tunfd, kTunFd) < 0)
   688  		fail("dup2(tunfd, kTunFd) failed");
   689  	close(tunfd);
   690  	tunfd = kTunFd;
   691  
   692  	struct ifreq ifr;
   693  	memset(&ifr, 0, sizeof(ifr));
   694  	strncpy(ifr.ifr_name, TUN_IFACE, IFNAMSIZ);
   695  	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
   696  	// Note: SYZ_ENABLE_NAPI_FRAGS is never enabled. This is code is only for reference
   697  	// in case we figure out how IFF_NAPI_FRAGS works. With IFF_NAPI_FRAGS packets
   698  	// don't reach destinations and bail out in udp_gro_receive (see #1594).
   699  	// Also IFF_NAPI_FRAGS does not work with sandbox_namespace (see comment there).
   700  #if ENABLE_NAPI_FRAGS
   701  	ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS;
   702  #endif
   703  	if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0) {
   704  #if ENABLE_NAPI_FRAGS
   705  		// IFF_NAPI_FRAGS requires root, so try without it.
   706  		ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
   707  		if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0)
   708  #endif
   709  			fail("tun: ioctl(TUNSETIFF) failed");
   710  	}
   711  #if ENABLE_NAPI_FRAGS
   712  	// If IFF_NAPI_FRAGS is not supported it will be silently dropped,
   713  	// so query the effective flags.
   714  	if (ioctl(tunfd, TUNGETIFF, (void*)&ifr) < 0)
   715  		fail("tun: ioctl(TUNGETIFF) failed");
   716  	tun_frags_enabled = (ifr.ifr_flags & IFF_NAPI_FRAGS) != 0;
   717  	debug("tun_frags_enabled=%d\n", tun_frags_enabled);
   718  #endif
   719  
   720  	// Disable IPv6 DAD, otherwise the address remains unusable until DAD completes.
   721  	// Don't panic because this is an optional config.
   722  	char sysctl[64];
   723  	sprintf(sysctl, "/proc/sys/net/ipv6/conf/%s/accept_dad", TUN_IFACE);
   724  	write_file(sysctl, "0");
   725  	// Disable IPv6 router solicitation to prevent IPv6 spam.
   726  	// Don't panic because this is an optional config.
   727  	sprintf(sysctl, "/proc/sys/net/ipv6/conf/%s/router_solicitations", TUN_IFACE);
   728  	write_file(sysctl, "0");
   729  	// There seems to be no way to disable IPv6 MTD to prevent more IPv6 spam.
   730  
   731  	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
   732  	if (sock == -1)
   733  		fail("socket(AF_NETLINK) failed");
   734  
   735  	netlink_add_addr4(&nlmsg, sock, TUN_IFACE, LOCAL_IPV4);
   736  	netlink_add_addr6(&nlmsg, sock, TUN_IFACE, LOCAL_IPV6);
   737  	uint64 macaddr = REMOTE_MAC;
   738  	struct in_addr in_addr;
   739  	inet_pton(AF_INET, REMOTE_IPV4, &in_addr);
   740  	netlink_add_neigh(&nlmsg, sock, TUN_IFACE, &in_addr, sizeof(in_addr), &macaddr, ETH_ALEN);
   741  	struct in6_addr in6_addr;
   742  	inet_pton(AF_INET6, REMOTE_IPV6, &in6_addr);
   743  	netlink_add_neigh(&nlmsg, sock, TUN_IFACE, &in6_addr, sizeof(in6_addr), &macaddr, ETH_ALEN);
   744  	macaddr = LOCAL_MAC;
   745  	netlink_device_change(&nlmsg, sock, TUN_IFACE, true, 0, &macaddr, ETH_ALEN, NULL);
   746  	close(sock);
   747  }
   748  #endif
   749  
   750  #if SYZ_EXECUTOR || __NR_syz_init_net_socket || SYZ_DEVLINK_PCI || __NR_syz_socket_connect_nvme_tcp
   751  const int kInitNetNsFd = 201; // see kMaxFd
   752  #endif
   753  
   754  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI || SYZ_NET_DEVICES
   755  
   756  #include <linux/genetlink.h>
   757  #include <stdbool.h>
   758  
   759  #define DEVLINK_FAMILY_NAME "devlink"
   760  
   761  #define DEVLINK_CMD_PORT_GET 5
   762  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
   763  #define DEVLINK_CMD_RELOAD 37
   764  #endif
   765  #define DEVLINK_ATTR_BUS_NAME 1
   766  #define DEVLINK_ATTR_DEV_NAME 2
   767  #define DEVLINK_ATTR_NETDEV_NAME 7
   768  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
   769  #define DEVLINK_ATTR_NETNS_FD 138
   770  #endif
   771  
   772  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
   773  static void netlink_devlink_netns_move(const char* bus_name, const char* dev_name, int netns_fd)
   774  {
   775  	struct genlmsghdr genlhdr;
   776  	int sock;
   777  	int id, err;
   778  
   779  	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
   780  	if (sock == -1)
   781  		fail("socket(AF_NETLINK) failed");
   782  
   783  	id = netlink_query_family_id(&nlmsg, sock, DEVLINK_FAMILY_NAME, true);
   784  	if (id == -1)
   785  		goto error;
   786  
   787  	memset(&genlhdr, 0, sizeof(genlhdr));
   788  	genlhdr.cmd = DEVLINK_CMD_RELOAD;
   789  	netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr));
   790  	netlink_attr(&nlmsg, DEVLINK_ATTR_BUS_NAME, bus_name, strlen(bus_name) + 1);
   791  	netlink_attr(&nlmsg, DEVLINK_ATTR_DEV_NAME, dev_name, strlen(dev_name) + 1);
   792  	netlink_attr(&nlmsg, DEVLINK_ATTR_NETNS_FD, &netns_fd, sizeof(netns_fd));
   793  	err = netlink_send(&nlmsg, sock);
   794  	if (err < 0) {
   795  		debug("netlink: failed to move devlink instance %s/%s into network namespace: %s\n",
   796  		      bus_name, dev_name, strerror(errno));
   797  	}
   798  error:
   799  	close(sock);
   800  }
   801  #endif
   802  
   803  static struct nlmsg nlmsg2;
   804  
   805  static void initialize_devlink_ports(const char* bus_name, const char* dev_name,
   806  				     const char* netdev_prefix)
   807  {
   808  	struct genlmsghdr genlhdr;
   809  	int len, total_len, id, err, offset;
   810  	uint16 netdev_index;
   811  
   812  	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
   813  	if (sock == -1)
   814  		fail("socket(AF_NETLINK) failed");
   815  
   816  	int rtsock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
   817  	if (rtsock == -1)
   818  		fail("socket(AF_NETLINK) failed");
   819  
   820  	id = netlink_query_family_id(&nlmsg, sock, DEVLINK_FAMILY_NAME, true);
   821  	if (id == -1)
   822  		goto error;
   823  
   824  	memset(&genlhdr, 0, sizeof(genlhdr));
   825  	genlhdr.cmd = DEVLINK_CMD_PORT_GET;
   826  	netlink_init(&nlmsg, id, NLM_F_DUMP, &genlhdr, sizeof(genlhdr));
   827  	netlink_attr(&nlmsg, DEVLINK_ATTR_BUS_NAME, bus_name, strlen(bus_name) + 1);
   828  	netlink_attr(&nlmsg, DEVLINK_ATTR_DEV_NAME, dev_name, strlen(dev_name) + 1);
   829  
   830  	err = netlink_send_ext(&nlmsg, sock, id, &total_len, true);
   831  	if (err < 0) {
   832  		debug("netlink: failed to get port get reply: %s\n", strerror(errno));
   833  		goto error;
   834  	}
   835  
   836  	offset = 0;
   837  	netdev_index = 0;
   838  	while ((len = netlink_next_msg(&nlmsg, offset, total_len)) != -1) {
   839  		struct nlattr* attr = (struct nlattr*)(nlmsg.buf + offset + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(genlhdr)));
   840  		for (; (char*)attr < nlmsg.buf + offset + len; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) {
   841  			if (attr->nla_type == DEVLINK_ATTR_NETDEV_NAME) {
   842  				char* port_name;
   843  				char netdev_name[IFNAMSIZ];
   844  				port_name = (char*)(attr + 1);
   845  				snprintf(netdev_name, sizeof(netdev_name), "%s%d", netdev_prefix, netdev_index);
   846  				netlink_device_change(&nlmsg2, rtsock, port_name, true, 0, 0, 0, netdev_name);
   847  				break;
   848  			}
   849  		}
   850  		offset += len;
   851  		netdev_index++;
   852  	}
   853  error:
   854  	close(rtsock);
   855  	close(sock);
   856  }
   857  
   858  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
   859  #include <fcntl.h>
   860  #include <sched.h>
   861  
   862  static void initialize_devlink_pci(void)
   863  {
   864  #if SYZ_EXECUTOR
   865  	if (!flag_devlink_pci)
   866  		return;
   867  #endif
   868  	int netns = open("/proc/self/ns/net", O_RDONLY);
   869  	if (netns == -1)
   870  		fail("open(/proc/self/ns/net) failed");
   871  	int ret = setns(kInitNetNsFd, 0);
   872  	if (ret == -1)
   873  		fail("set_ns(init_netns_fd) failed");
   874  	netlink_devlink_netns_move("pci", "0000:00:10.0", netns);
   875  	ret = setns(netns, 0);
   876  	if (ret == -1)
   877  		fail("set_ns(this_netns_fd) failed");
   878  	close(netns);
   879  
   880  	initialize_devlink_ports("pci", "0000:00:10.0", "netpci");
   881  }
   882  #endif
   883  #endif
   884  
   885  #if SYZ_EXECUTOR || SYZ_WIFI || __NR_syz_80211_inject_frame || __NR_syz_80211_join_ibss
   886  
   887  #define WIFI_INITIAL_DEVICE_COUNT 2
   888  #define WIFI_MAC_BASE \
   889  	{             \
   890  	    0x08, 0x02, 0x11, 0x00, 0x00, 0x00}
   891  #define WIFI_IBSS_BSSID \
   892  	{               \
   893  	    0x50, 0x50, 0x50, 0x50, 0x50, 0x50}
   894  #define WIFI_IBSS_SSID \
   895  	{              \
   896  	    0x10, 0x10, 0x10, 0x10, 0x10, 0x10}
   897  #define WIFI_DEFAULT_FREQUENCY 2412
   898  #define WIFI_DEFAULT_SIGNAL 0
   899  #define WIFI_DEFAULT_RX_RATE 1
   900  
   901  // consts from drivers/net/wireless/mac80211_hwsim.h
   902  #define HWSIM_CMD_REGISTER 1
   903  #define HWSIM_CMD_FRAME 2
   904  #define HWSIM_CMD_NEW_RADIO 4
   905  #define HWSIM_ATTR_SUPPORT_P2P_DEVICE 14
   906  #define HWSIM_ATTR_PERM_ADDR 22
   907  
   908  #endif
   909  
   910  #if SYZ_EXECUTOR || SYZ_WIFI || __NR_syz_80211_join_ibss
   911  #include <linux/genetlink.h>
   912  #include <linux/if_ether.h>
   913  #include <linux/nl80211.h>
   914  #include <linux/rtnetlink.h>
   915  #include <net/if.h>
   916  #include <stdbool.h>
   917  #include <sys/ioctl.h>
   918  
   919  // From linux/if.h, but we cannot include the file as it conflicts with net/if.h
   920  #define IF_OPER_UP 6
   921  
   922  // IBSS parameters for nl80211_join_ibss
   923  struct join_ibss_props {
   924  	int wiphy_freq;
   925  	bool wiphy_freq_fixed;
   926  	uint8* mac;
   927  	uint8* ssid;
   928  	int ssid_len;
   929  };
   930  
   931  static int set_interface_state(const char* interface_name, int on)
   932  {
   933  	struct ifreq ifr;
   934  	int sock = socket(AF_INET, SOCK_DGRAM, 0);
   935  	if (sock < 0) {
   936  		debug("set_interface_state: failed to open socket, errno %d\n", errno);
   937  		return -1;
   938  	}
   939  
   940  	memset(&ifr, 0, sizeof(ifr));
   941  	strcpy(ifr.ifr_name, interface_name);
   942  	int ret = ioctl(sock, SIOCGIFFLAGS, &ifr);
   943  	if (ret < 0) {
   944  		debug("set_interface_state: failed to execute SIOCGIFFLAGS, ret %d\n", ret);
   945  		close(sock);
   946  		return -1;
   947  	}
   948  
   949  	if (on)
   950  		ifr.ifr_flags |= IFF_UP;
   951  	else
   952  		ifr.ifr_flags &= ~IFF_UP;
   953  
   954  	ret = ioctl(sock, SIOCSIFFLAGS, &ifr);
   955  	close(sock);
   956  	if (ret < 0) {
   957  		debug("set_interface_state: failed to execute SIOCSIFFLAGS, ret %d\n", ret);
   958  		return -1;
   959  	}
   960  	return 0;
   961  }
   962  
   963  static int nl80211_set_interface(struct nlmsg* nlmsg, int sock, int nl80211_family, uint32 ifindex,
   964  				 uint32 iftype, bool dofail)
   965  {
   966  	struct genlmsghdr genlhdr;
   967  
   968  	memset(&genlhdr, 0, sizeof(genlhdr));
   969  	genlhdr.cmd = NL80211_CMD_SET_INTERFACE;
   970  	netlink_init(nlmsg, nl80211_family, 0, &genlhdr, sizeof(genlhdr));
   971  	netlink_attr(nlmsg, NL80211_ATTR_IFINDEX, &ifindex, sizeof(ifindex));
   972  	netlink_attr(nlmsg, NL80211_ATTR_IFTYPE, &iftype, sizeof(iftype));
   973  	int err = netlink_send_ext(nlmsg, sock, 0, NULL, dofail);
   974  	if (err < 0) {
   975  		debug("nl80211_set_interface failed: %s\n", strerror(errno));
   976  	}
   977  	return err;
   978  }
   979  
   980  static int nl80211_join_ibss(struct nlmsg* nlmsg, int sock, int nl80211_family, uint32 ifindex,
   981  			     struct join_ibss_props* props, bool dofail)
   982  {
   983  	struct genlmsghdr genlhdr;
   984  
   985  	memset(&genlhdr, 0, sizeof(genlhdr));
   986  	genlhdr.cmd = NL80211_CMD_JOIN_IBSS;
   987  	netlink_init(nlmsg, nl80211_family, 0, &genlhdr, sizeof(genlhdr));
   988  	netlink_attr(nlmsg, NL80211_ATTR_IFINDEX, &ifindex, sizeof(ifindex));
   989  	netlink_attr(nlmsg, NL80211_ATTR_SSID, props->ssid, props->ssid_len);
   990  	netlink_attr(nlmsg, NL80211_ATTR_WIPHY_FREQ, &(props->wiphy_freq), sizeof(props->wiphy_freq));
   991  	if (props->mac)
   992  		netlink_attr(nlmsg, NL80211_ATTR_MAC, props->mac, ETH_ALEN);
   993  	if (props->wiphy_freq_fixed)
   994  		netlink_attr(nlmsg, NL80211_ATTR_FREQ_FIXED, NULL, 0);
   995  	int err = netlink_send_ext(nlmsg, sock, 0, NULL, dofail);
   996  	if (err < 0) {
   997  		debug("nl80211_join_ibss failed: %s\n", strerror(errno));
   998  	}
   999  	return err;
  1000  }
  1001  
  1002  static int get_ifla_operstate(struct nlmsg* nlmsg, int ifindex, bool dofail)
  1003  {
  1004  	struct ifinfomsg info;
  1005  	memset(&info, 0, sizeof(info));
  1006  	info.ifi_family = AF_UNSPEC;
  1007  	info.ifi_index = ifindex;
  1008  
  1009  	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  1010  	if (sock == -1) {
  1011  		debug("get_ifla_operstate: socket failed: %d\n", errno);
  1012  		return -1;
  1013  	}
  1014  
  1015  	netlink_init(nlmsg, RTM_GETLINK, 0, &info, sizeof(info));
  1016  	int n;
  1017  	int err = netlink_send_ext(nlmsg, sock, RTM_NEWLINK, &n, dofail);
  1018  	close(sock);
  1019  
  1020  	if (err) {
  1021  		debug("get_ifla_operstate: failed to query: %s\n", strerror(errno));
  1022  		return -1;
  1023  	}
  1024  
  1025  	struct rtattr* attr = IFLA_RTA(NLMSG_DATA(nlmsg->buf));
  1026  	for (; RTA_OK(attr, n); attr = RTA_NEXT(attr, n)) {
  1027  		if (attr->rta_type == IFLA_OPERSTATE)
  1028  			return *((int32_t*)RTA_DATA(attr));
  1029  	}
  1030  
  1031  	return -1;
  1032  }
  1033  
  1034  static int await_ifla_operstate(struct nlmsg* nlmsg, char* interface, int operstate, bool dofail)
  1035  {
  1036  	int ifindex = if_nametoindex(interface);
  1037  	while (true) {
  1038  		usleep(1000); // 1 ms
  1039  		int ret = get_ifla_operstate(nlmsg, ifindex, dofail);
  1040  		if (ret < 0)
  1041  			return ret;
  1042  		if (ret == operstate)
  1043  			return 0;
  1044  	}
  1045  	return 0;
  1046  }
  1047  
  1048  static int nl80211_setup_ibss_interface(struct nlmsg* nlmsg, int sock, int nl80211_family_id, char* interface,
  1049  					struct join_ibss_props* ibss_props, bool dofail)
  1050  {
  1051  	int ifindex = if_nametoindex(interface);
  1052  	if (ifindex == 0) {
  1053  		debug("nl80211_setup_ibss_interface: if_nametoindex failed for %.32s, ret 0\n", interface);
  1054  		return -1;
  1055  	}
  1056  
  1057  	int ret = nl80211_set_interface(nlmsg, sock, nl80211_family_id, ifindex, NL80211_IFTYPE_ADHOC, dofail);
  1058  	if (ret < 0) {
  1059  		debug("nl80211_setup_ibss_interface: nl80211_set_interface failed for %.32s, ret %d\n", interface, ret);
  1060  		return -1;
  1061  	}
  1062  
  1063  	ret = set_interface_state(interface, 1);
  1064  	if (ret < 0) {
  1065  		debug("nl80211_setup_ibss_interface: set_interface_state failed for %.32s, ret %d\n", interface, ret);
  1066  		return -1;
  1067  	}
  1068  
  1069  	ret = nl80211_join_ibss(nlmsg, sock, nl80211_family_id, ifindex, ibss_props, dofail);
  1070  	if (ret < 0) {
  1071  		debug("nl80211_setup_ibss_interface: nl80211_join_ibss failed for %.32s, ret %d\n", interface, ret);
  1072  		return -1;
  1073  	}
  1074  
  1075  	return 0;
  1076  }
  1077  #endif
  1078  
  1079  #if SYZ_EXECUTOR || SYZ_WIFI
  1080  #include <fcntl.h>
  1081  #include <linux/rfkill.h>
  1082  #include <sys/stat.h>
  1083  #include <sys/types.h>
  1084  
  1085  static int hwsim80211_create_device(struct nlmsg* nlmsg, int sock, int hwsim_family, uint8 mac_addr[ETH_ALEN])
  1086  {
  1087  	struct genlmsghdr genlhdr;
  1088  	memset(&genlhdr, 0, sizeof(genlhdr));
  1089  	genlhdr.cmd = HWSIM_CMD_NEW_RADIO;
  1090  	netlink_init(nlmsg, hwsim_family, 0, &genlhdr, sizeof(genlhdr));
  1091  	netlink_attr(nlmsg, HWSIM_ATTR_SUPPORT_P2P_DEVICE, NULL, 0);
  1092  	netlink_attr(nlmsg, HWSIM_ATTR_PERM_ADDR, mac_addr, ETH_ALEN);
  1093  	int err = netlink_send(nlmsg, sock);
  1094  	if (err < 0) {
  1095  		debug("hwsim80211_create_device failed: %s\n", strerror(errno));
  1096  	}
  1097  	return err;
  1098  }
  1099  
  1100  static void initialize_wifi_devices(void)
  1101  {
  1102  	// Set up virtual wifi devices and join them into an IBSS network.
  1103  	// An IBSS network is created here in order to put these devices in an operable state right from
  1104  	// the beginning. It has the following positive effects.
  1105  	// 1. Frame injection becomes possible from the very start.
  1106  	// 2. A number of nl80211 commands expect their target wireless interface to be in an operable state.
  1107  	// 3. Simplification of reproducer generation - in many cases the reproducer will not have to spend time
  1108  	//    selecting system calls that set up the environment.
  1109  	//
  1110  	// IBSS network was chosen as the simplest network type to begin with.
  1111  
  1112  #if SYZ_EXECUTOR
  1113  	if (!flag_wifi)
  1114  		return;
  1115  #endif
  1116  	int rfkill = open("/dev/rfkill", O_RDWR);
  1117  	if (rfkill == -1)
  1118  		fail("open(/dev/rfkill) failed");
  1119  	struct rfkill_event event = {0};
  1120  	event.type = RFKILL_TYPE_ALL;
  1121  	event.op = RFKILL_OP_CHANGE_ALL;
  1122  	if (write(rfkill, &event, sizeof(event)) != (ssize_t)(sizeof(event)))
  1123  		fail("write(/dev/rfkill) failed");
  1124  	close(rfkill);
  1125  
  1126  	uint8 mac_addr[6] = WIFI_MAC_BASE;
  1127  	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
  1128  	if (sock < 0)
  1129  		fail("initialize_wifi_devices: failed to create socket");
  1130  	int hwsim_family_id = netlink_query_family_id(&nlmsg, sock, "MAC80211_HWSIM", true);
  1131  	int nl80211_family_id = netlink_query_family_id(&nlmsg, sock, "nl80211", true);
  1132  	if (hwsim_family_id < 0 || nl80211_family_id < 0)
  1133  		fail("netlink_query_family_id failed");
  1134  	uint8 ssid[] = WIFI_IBSS_SSID;
  1135  	uint8 bssid[] = WIFI_IBSS_BSSID;
  1136  	struct join_ibss_props ibss_props = {
  1137  	    .wiphy_freq = WIFI_DEFAULT_FREQUENCY, .wiphy_freq_fixed = true, .mac = bssid, .ssid = ssid, .ssid_len = sizeof(ssid)};
  1138  
  1139  	for (int device_id = 0; device_id < WIFI_INITIAL_DEVICE_COUNT; device_id++) {
  1140  		// Virtual wifi devices will have consequtive mac addresses
  1141  		mac_addr[5] = device_id;
  1142  		int ret = hwsim80211_create_device(&nlmsg, sock, hwsim_family_id, mac_addr);
  1143  		if (ret < 0)
  1144  			failmsg("initialize_wifi_devices: failed to create device", "device=%d", device_id);
  1145  
  1146  		// For each device, unless HWSIM_ATTR_NO_VIF is passed, a network interface is created
  1147  		// automatically. Such interfaces are named "wlan0", "wlan1" and so on.
  1148  		char interface[6] = "wlan0";
  1149  		interface[4] += device_id;
  1150  
  1151  		if (nl80211_setup_ibss_interface(&nlmsg, sock, nl80211_family_id, interface, &ibss_props, true) < 0)
  1152  			failmsg("initialize_wifi_devices: failed set up IBSS network", "device=%d", device_id);
  1153  	}
  1154  
  1155  	// Wait for all devices to join the IBSS network
  1156  	for (int device_id = 0; device_id < WIFI_INITIAL_DEVICE_COUNT; device_id++) {
  1157  		char interface[6] = "wlan0";
  1158  		interface[4] += device_id;
  1159  		int ret = await_ifla_operstate(&nlmsg, interface, IF_OPER_UP, true);
  1160  		if (ret < 0)
  1161  			failmsg("initialize_wifi_devices: get_ifla_operstate failed",
  1162  				"device=%d, ret=%d", device_id, ret);
  1163  	}
  1164  
  1165  	close(sock);
  1166  }
  1167  #endif
  1168  
  1169  #if SYZ_EXECUTOR || (SYZ_NET_DEVICES && SYZ_NIC_VF) || SYZ_SWAP
  1170  static int runcmdline(char* cmdline)
  1171  {
  1172  	debug("%s\n", cmdline);
  1173  	int ret = system(cmdline);
  1174  	if (ret) {
  1175  		debug("FAIL: %s\n", cmdline);
  1176  	}
  1177  	return ret;
  1178  }
  1179  #endif
  1180  
  1181  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  1182  #include <arpa/inet.h>
  1183  #include <errno.h>
  1184  #include <fcntl.h>
  1185  #include <net/if.h>
  1186  #include <net/if_arp.h>
  1187  #include <stdarg.h>
  1188  #include <stdbool.h>
  1189  #include <sys/ioctl.h>
  1190  #include <sys/stat.h>
  1191  #include <sys/uio.h>
  1192  
  1193  #include <linux/if_ether.h>
  1194  #include <linux/if_tun.h>
  1195  #include <linux/ip.h>
  1196  #include <linux/tcp.h>
  1197  
  1198  // Addresses are chosen to be in the same subnet as tun addresses.
  1199  #define DEV_IPV4 "172.20.20.%d"
  1200  #define DEV_IPV6 "fe80::%02x"
  1201  #define DEV_MAC 0x00aaaaaaaaaa
  1202  
  1203  static void netdevsim_add(unsigned int addr, unsigned int port_count)
  1204  {
  1205  	// These devices are sticky and are not deleted on net namespace destruction.
  1206  	// So try to delete the previous version of the device.
  1207  	write_file("/sys/bus/netdevsim/del_device", "%u", addr);
  1208  	if (write_file("/sys/bus/netdevsim/new_device", "%u %u", addr, port_count)) {
  1209  		char buf[32];
  1210  		snprintf(buf, sizeof(buf), "netdevsim%d", addr);
  1211  		initialize_devlink_ports("netdevsim", buf, "netdevsim");
  1212  	}
  1213  }
  1214  
  1215  #define WG_GENL_NAME "wireguard"
  1216  enum wg_cmd {
  1217  	WG_CMD_GET_DEVICE,
  1218  	WG_CMD_SET_DEVICE,
  1219  };
  1220  enum wgdevice_attribute {
  1221  	WGDEVICE_A_UNSPEC,
  1222  	WGDEVICE_A_IFINDEX,
  1223  	WGDEVICE_A_IFNAME,
  1224  	WGDEVICE_A_PRIVATE_KEY,
  1225  	WGDEVICE_A_PUBLIC_KEY,
  1226  	WGDEVICE_A_FLAGS,
  1227  	WGDEVICE_A_LISTEN_PORT,
  1228  	WGDEVICE_A_FWMARK,
  1229  	WGDEVICE_A_PEERS,
  1230  };
  1231  enum wgpeer_attribute {
  1232  	WGPEER_A_UNSPEC,
  1233  	WGPEER_A_PUBLIC_KEY,
  1234  	WGPEER_A_PRESHARED_KEY,
  1235  	WGPEER_A_FLAGS,
  1236  	WGPEER_A_ENDPOINT,
  1237  	WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
  1238  	WGPEER_A_LAST_HANDSHAKE_TIME,
  1239  	WGPEER_A_RX_BYTES,
  1240  	WGPEER_A_TX_BYTES,
  1241  	WGPEER_A_ALLOWEDIPS,
  1242  	WGPEER_A_PROTOCOL_VERSION,
  1243  };
  1244  enum wgallowedip_attribute {
  1245  	WGALLOWEDIP_A_UNSPEC,
  1246  	WGALLOWEDIP_A_FAMILY,
  1247  	WGALLOWEDIP_A_IPADDR,
  1248  	WGALLOWEDIP_A_CIDR_MASK,
  1249  };
  1250  
  1251  static void netlink_wireguard_setup(void)
  1252  {
  1253  	const char ifname_a[] = "wg0";
  1254  	const char ifname_b[] = "wg1";
  1255  	const char ifname_c[] = "wg2";
  1256  	const char private_a[] = "\xa0\x5c\xa8\x4f\x6c\x9c\x8e\x38\x53\xe2\xfd\x7a\x70\xae\x0f\xb2\x0f\xa1\x52\x60\x0c\xb0\x08\x45\x17\x4f\x08\x07\x6f\x8d\x78\x43";
  1257  	const char private_b[] = "\xb0\x80\x73\xe8\xd4\x4e\x91\xe3\xda\x92\x2c\x22\x43\x82\x44\xbb\x88\x5c\x69\xe2\x69\xc8\xe9\xd8\x35\xb1\x14\x29\x3a\x4d\xdc\x6e";
  1258  	const char private_c[] = "\xa0\xcb\x87\x9a\x47\xf5\xbc\x64\x4c\x0e\x69\x3f\xa6\xd0\x31\xc7\x4a\x15\x53\xb6\xe9\x01\xb9\xff\x2f\x51\x8c\x78\x04\x2f\xb5\x42";
  1259  	const char public_a[] = "\x97\x5c\x9d\x81\xc9\x83\xc8\x20\x9e\xe7\x81\x25\x4b\x89\x9f\x8e\xd9\x25\xae\x9f\x09\x23\xc2\x3c\x62\xf5\x3c\x57\xcd\xbf\x69\x1c";
  1260  	const char public_b[] = "\xd1\x73\x28\x99\xf6\x11\xcd\x89\x94\x03\x4d\x7f\x41\x3d\xc9\x57\x63\x0e\x54\x93\xc2\x85\xac\xa4\x00\x65\xcb\x63\x11\xbe\x69\x6b";
  1261  	const char public_c[] = "\xf4\x4d\xa3\x67\xa8\x8e\xe6\x56\x4f\x02\x02\x11\x45\x67\x27\x08\x2f\x5c\xeb\xee\x8b\x1b\xf5\xeb\x73\x37\x34\x1b\x45\x9b\x39\x22";
  1262  	const uint16 listen_a = 20001;
  1263  	const uint16 listen_b = 20002;
  1264  	const uint16 listen_c = 20003;
  1265  	const uint16 af_inet = AF_INET;
  1266  	const uint16 af_inet6 = AF_INET6;
  1267  	// Unused, but useful in case we change this:
  1268  	// const struct sockaddr_in endpoint_a_v4 = {
  1269  	//     .sin_family = AF_INET,
  1270  	//     .sin_port = htons(listen_a),
  1271  	//     .sin_addr = {htonl(INADDR_LOOPBACK)}};
  1272  	const struct sockaddr_in endpoint_b_v4 = {
  1273  	    .sin_family = AF_INET,
  1274  	    .sin_port = htons(listen_b),
  1275  	    .sin_addr = {htonl(INADDR_LOOPBACK)}};
  1276  	const struct sockaddr_in endpoint_c_v4 = {
  1277  	    .sin_family = AF_INET,
  1278  	    .sin_port = htons(listen_c),
  1279  	    .sin_addr = {htonl(INADDR_LOOPBACK)}};
  1280  	struct sockaddr_in6 endpoint_a_v6 = {
  1281  	    .sin6_family = AF_INET6,
  1282  	    .sin6_port = htons(listen_a)};
  1283  	endpoint_a_v6.sin6_addr = in6addr_loopback;
  1284  	// Unused, but useful in case we change this:
  1285  	// const struct sockaddr_in6 endpoint_b_v6 = {
  1286  	//     .sin6_family = AF_INET6,
  1287  	//     .sin6_port = htons(listen_b)};
  1288  	// endpoint_b_v6.sin6_addr = in6addr_loopback;
  1289  	struct sockaddr_in6 endpoint_c_v6 = {
  1290  	    .sin6_family = AF_INET6,
  1291  	    .sin6_port = htons(listen_c)};
  1292  	endpoint_c_v6.sin6_addr = in6addr_loopback;
  1293  	const struct in_addr first_half_v4 = {0};
  1294  	const struct in_addr second_half_v4 = {(uint32)htonl(128 << 24)};
  1295  	const struct in6_addr first_half_v6 = {{{0}}};
  1296  	const struct in6_addr second_half_v6 = {{{0x80}}};
  1297  	const uint8 half_cidr = 1;
  1298  	const uint16 persistent_keepalives[] = {1, 3, 7, 9, 14, 19};
  1299  
  1300  	struct genlmsghdr genlhdr = {
  1301  	    .cmd = WG_CMD_SET_DEVICE,
  1302  	    .version = 1};
  1303  	int sock;
  1304  	int id, err;
  1305  
  1306  	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
  1307  	if (sock == -1) {
  1308  		debug("socket(AF_NETLINK) failed: %s\n", strerror(errno));
  1309  		return;
  1310  	}
  1311  
  1312  	id = netlink_query_family_id(&nlmsg, sock, WG_GENL_NAME, true);
  1313  	if (id == -1)
  1314  		goto error;
  1315  
  1316  	netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr));
  1317  	netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_a, strlen(ifname_a) + 1);
  1318  	netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_a, 32);
  1319  	netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_a, 2);
  1320  	netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS);
  1321  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1322  	netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_b, 32);
  1323  	netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_b_v4, sizeof(endpoint_b_v4));
  1324  	netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[0], 2);
  1325  	netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
  1326  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1327  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
  1328  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4));
  1329  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1330  	netlink_done(&nlmsg);
  1331  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1332  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
  1333  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6));
  1334  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1335  	netlink_done(&nlmsg);
  1336  	netlink_done(&nlmsg);
  1337  	netlink_done(&nlmsg);
  1338  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1339  	netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_c, 32);
  1340  	netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_c_v6, sizeof(endpoint_c_v6));
  1341  	netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[1], 2);
  1342  	netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
  1343  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1344  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
  1345  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4));
  1346  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1347  	netlink_done(&nlmsg);
  1348  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1349  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
  1350  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6));
  1351  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1352  	netlink_done(&nlmsg);
  1353  	netlink_done(&nlmsg);
  1354  	netlink_done(&nlmsg);
  1355  	netlink_done(&nlmsg);
  1356  	err = netlink_send(&nlmsg, sock);
  1357  	if (err < 0) {
  1358  		debug("netlink: failed to setup wireguard instance: %s\n", strerror(errno));
  1359  	}
  1360  
  1361  	netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr));
  1362  	netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_b, strlen(ifname_b) + 1);
  1363  	netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_b, 32);
  1364  	netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_b, 2);
  1365  	netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS);
  1366  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1367  	netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_a, 32);
  1368  	netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_a_v6, sizeof(endpoint_a_v6));
  1369  	netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[2], 2);
  1370  	netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
  1371  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1372  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
  1373  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4));
  1374  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1375  	netlink_done(&nlmsg);
  1376  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1377  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
  1378  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6));
  1379  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1380  	netlink_done(&nlmsg);
  1381  	netlink_done(&nlmsg);
  1382  	netlink_done(&nlmsg);
  1383  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1384  	netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_c, 32);
  1385  	netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_c_v4, sizeof(endpoint_c_v4));
  1386  	netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[3], 2);
  1387  	netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
  1388  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1389  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
  1390  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4));
  1391  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1392  	netlink_done(&nlmsg);
  1393  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1394  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
  1395  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6));
  1396  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1397  	netlink_done(&nlmsg);
  1398  	netlink_done(&nlmsg);
  1399  	netlink_done(&nlmsg);
  1400  	netlink_done(&nlmsg);
  1401  	err = netlink_send(&nlmsg, sock);
  1402  	if (err < 0) {
  1403  		debug("netlink: failed to setup wireguard instance: %s\n", strerror(errno));
  1404  	}
  1405  
  1406  	netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr));
  1407  	netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_c, strlen(ifname_c) + 1);
  1408  	netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_c, 32);
  1409  	netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_c, 2);
  1410  	netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS);
  1411  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1412  	netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_a, 32);
  1413  	netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_a_v6, sizeof(endpoint_a_v6));
  1414  	netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[4], 2);
  1415  	netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
  1416  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1417  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
  1418  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4));
  1419  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1420  	netlink_done(&nlmsg);
  1421  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1422  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
  1423  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6));
  1424  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1425  	netlink_done(&nlmsg);
  1426  	netlink_done(&nlmsg);
  1427  	netlink_done(&nlmsg);
  1428  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1429  	netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_b, 32);
  1430  	netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_b_v4, sizeof(endpoint_b_v4));
  1431  	netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[5], 2);
  1432  	netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
  1433  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1434  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
  1435  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4));
  1436  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1437  	netlink_done(&nlmsg);
  1438  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1439  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
  1440  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6));
  1441  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1442  	netlink_done(&nlmsg);
  1443  	netlink_done(&nlmsg);
  1444  	netlink_done(&nlmsg);
  1445  	netlink_done(&nlmsg);
  1446  	err = netlink_send(&nlmsg, sock);
  1447  	if (err < 0) {
  1448  		debug("netlink: failed to setup wireguard instance: %s\n", strerror(errno));
  1449  	}
  1450  
  1451  error:
  1452  	close(sock);
  1453  }
  1454  
  1455  #if SYZ_EXECUTOR || SYZ_NIC_VF
  1456  
  1457  static void netlink_nicvf_setup(void)
  1458  {
  1459  	char cmdline[256];
  1460  
  1461  #if SYZ_EXECUTOR
  1462  	if (!flag_nic_vf)
  1463  		return;
  1464  #endif
  1465  	if (!vf_intf.ppid)
  1466  		return;
  1467  
  1468  	debug("ppid = %d, vf_intf.pass_thru_intf: %s\n",
  1469  	      vf_intf.ppid, vf_intf.pass_thru_intf);
  1470  
  1471  	sprintf(cmdline, "nsenter -t 1 -n ip link set %s netns %d",
  1472  		vf_intf.pass_thru_intf, getpid());
  1473  	if (runcmdline(cmdline))
  1474  		failmsg("failed to run command", "%s", cmdline);
  1475  	sprintf(cmdline, "ip a s %s", vf_intf.pass_thru_intf);
  1476  	if (runcmdline(cmdline))
  1477  		failmsg("failed to run command", "%s", cmdline);
  1478  	sprintf(cmdline, "ip link set %s down", vf_intf.pass_thru_intf);
  1479  	if (runcmdline(cmdline))
  1480  		failmsg("failed to run command", "%s", cmdline);
  1481  	sprintf(cmdline, "ip link set %s name nicvf0", vf_intf.pass_thru_intf);
  1482  	if (runcmdline(cmdline))
  1483  		failmsg("failed to run command", "%s", cmdline);
  1484  	debug("nicvf0 VF pass-through setup complete.\n");
  1485  }
  1486  #endif // SYZ_NIC_VF
  1487  
  1488  // We test in a separate namespace, which does not have any network devices initially (even lo).
  1489  // Create/up as many as we can.
  1490  static void initialize_netdevices(void)
  1491  {
  1492  #if SYZ_EXECUTOR
  1493  	if (!flag_net_devices)
  1494  		return;
  1495  #endif
  1496  	// TODO: add the following devices:
  1497  	// - vxlan
  1498  	// - ipip
  1499  	// - lowpan (requires link to device of type IEEE802154, e.g. wpan0)
  1500  	// - ipoib (requires link to device of type ARPHRD_INFINIBAND)
  1501  	// - vrf
  1502  	// - rmnet
  1503  	// - openvswitch
  1504  	// Naive attempts to add devices of these types fail with various errors.
  1505  	// Also init namespace contains the following devices (which presumably can't be
  1506  	// created in non-init namespace), can we use them somehow?
  1507  	// - ifb0/1
  1508  	// - teql0
  1509  	// - eql
  1510  	// Note: netdevsim devices can't have the same name even in different namespaces.
  1511  	char netdevsim[16];
  1512  	sprintf(netdevsim, "netdevsim%d", (int)procid);
  1513  	struct {
  1514  		const char* type;
  1515  		const char* dev;
  1516  	} devtypes[] = {
  1517  	    // Note: ip6erspan device can't be added if ip6gretap exists in the same namespace.
  1518  	    {"ip6gretap", "ip6gretap0"},
  1519  	    {"bridge", "bridge0"},
  1520  	    {"vcan", "vcan0"},
  1521  	    {"bond", "bond0"},
  1522  	    {"team", "team0"},
  1523  	    {"dummy", "dummy0"},
  1524  #if SYZ_EXECUTOR || SYZ_NIC_VF
  1525  	    {"nicvf", "nicvf0"},
  1526  #endif
  1527  	    {"nlmon", "nlmon0"},
  1528  	    {"caif", "caif0"},
  1529  	    {"batadv", "batadv0"},
  1530  	    // Note: this adds vxcan0/vxcan1 pair, similar to veth (creating vxcan0 would fail).
  1531  	    {"vxcan", "vxcan1"},
  1532  	    // This adds connected veth0 and veth1 devices.
  1533  	    {"veth", 0},
  1534  	    {"wireguard", "wg0"},
  1535  	    {"wireguard", "wg1"},
  1536  	    {"wireguard", "wg2"},
  1537  	};
  1538  	const char* devmasters[] = {"bridge", "bond", "team", "batadv"};
  1539  	// If you extend this array, also update netdev_addr_id in vnet.txt
  1540  	// and devnames in socket.txt.
  1541  	struct {
  1542  		const char* name;
  1543  		int macsize;
  1544  		bool noipv6;
  1545  	} devices[] = {
  1546  	    {"lo", ETH_ALEN},
  1547  	    {"sit0", 0},
  1548  	    {"bridge0", ETH_ALEN},
  1549  	    {"vcan0", 0, true},
  1550  	    {"tunl0", 0},
  1551  	    {"gre0", 0},
  1552  	    {"gretap0", ETH_ALEN},
  1553  	    {"ip_vti0", 0},
  1554  	    {"ip6_vti0", 0},
  1555  	    {"ip6tnl0", 0},
  1556  	    {"ip6gre0", 0},
  1557  	    {"ip6gretap0", ETH_ALEN},
  1558  	    {"erspan0", ETH_ALEN},
  1559  	    {"bond0", ETH_ALEN},
  1560  	    {"veth0", ETH_ALEN},
  1561  	    {"veth1", ETH_ALEN},
  1562  	    {"team0", ETH_ALEN},
  1563  	    {"veth0_to_bridge", ETH_ALEN},
  1564  	    {"veth1_to_bridge", ETH_ALEN},
  1565  	    {"veth0_to_bond", ETH_ALEN},
  1566  	    {"veth1_to_bond", ETH_ALEN},
  1567  	    {"veth0_to_team", ETH_ALEN},
  1568  	    {"veth1_to_team", ETH_ALEN},
  1569  	    {"veth0_to_hsr", ETH_ALEN},
  1570  	    {"veth1_to_hsr", ETH_ALEN},
  1571  	    {"hsr0", 0},
  1572  	    {"dummy0", ETH_ALEN},
  1573  #if SYZ_EXECUTOR || SYZ_NIC_VF
  1574  	    {"nicvf0", 0, true},
  1575  #endif
  1576  	    {"nlmon0", 0},
  1577  	    {"vxcan0", 0, true},
  1578  	    {"vxcan1", 0, true},
  1579  	    {"caif0", ETH_ALEN}, // TODO: up'ing caif fails with ENODEV
  1580  	    {"batadv0", ETH_ALEN},
  1581  	    {netdevsim, ETH_ALEN},
  1582  	    {"xfrm0", ETH_ALEN},
  1583  	    {"veth0_virt_wifi", ETH_ALEN},
  1584  	    {"veth1_virt_wifi", ETH_ALEN},
  1585  	    {"virt_wifi0", ETH_ALEN},
  1586  	    {"veth0_vlan", ETH_ALEN},
  1587  	    {"veth1_vlan", ETH_ALEN},
  1588  	    {"vlan0", ETH_ALEN},
  1589  	    {"vlan1", ETH_ALEN},
  1590  	    {"macvlan0", ETH_ALEN},
  1591  	    {"macvlan1", ETH_ALEN},
  1592  	    {"ipvlan0", ETH_ALEN},
  1593  	    {"ipvlan1", ETH_ALEN},
  1594  	    {"veth0_macvtap", ETH_ALEN},
  1595  	    {"veth1_macvtap", ETH_ALEN},
  1596  	    {"macvtap0", ETH_ALEN},
  1597  	    {"macsec0", ETH_ALEN},
  1598  	    {"veth0_to_batadv", ETH_ALEN},
  1599  	    {"veth1_to_batadv", ETH_ALEN},
  1600  	    {"batadv_slave_0", ETH_ALEN},
  1601  	    {"batadv_slave_1", ETH_ALEN},
  1602  	    {"geneve0", ETH_ALEN},
  1603  	    {"geneve1", ETH_ALEN},
  1604  	    {"wg0", 0},
  1605  	    {"wg1", 0},
  1606  	    {"wg2", 0},
  1607  	};
  1608  	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  1609  	if (sock == -1)
  1610  		fail("socket(AF_NETLINK) failed");
  1611  	unsigned i;
  1612  	for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++)
  1613  		netlink_add_device(&nlmsg, sock, devtypes[i].type, devtypes[i].dev);
  1614  	// This creates connected bridge/bond/team_slave devices of type veth,
  1615  	// and makes them slaves of bridge/bond/team devices, respectively.
  1616  	// Note: slave devices don't need MAC/IP addresses, only master devices.
  1617  	//       veth0_to_* is not slave devices, which still need ip addresses.
  1618  	for (i = 0; i < sizeof(devmasters) / (sizeof(devmasters[0])); i++) {
  1619  		char master[32], slave0[32], veth0[32], slave1[32], veth1[32];
  1620  		sprintf(slave0, "%s_slave_0", devmasters[i]);
  1621  		sprintf(veth0, "veth0_to_%s", devmasters[i]);
  1622  		netlink_add_veth(&nlmsg, sock, slave0, veth0);
  1623  		sprintf(slave1, "%s_slave_1", devmasters[i]);
  1624  		sprintf(veth1, "veth1_to_%s", devmasters[i]);
  1625  		netlink_add_veth(&nlmsg, sock, slave1, veth1);
  1626  		sprintf(master, "%s0", devmasters[i]);
  1627  		netlink_device_change(&nlmsg, sock, slave0, false, master, 0, 0, NULL);
  1628  		netlink_device_change(&nlmsg, sock, slave1, false, master, 0, 0, NULL);
  1629  	}
  1630  	netlink_add_xfrm(&nlmsg, sock, "xfrm0");
  1631  
  1632  	// bond/team_slave_* will set up automatically when set their master.
  1633  	// But bridge_slave_* need to set up manually.
  1634  	netlink_device_change(&nlmsg, sock, "bridge_slave_0", true, 0, 0, 0, NULL);
  1635  	netlink_device_change(&nlmsg, sock, "bridge_slave_1", true, 0, 0, 0, NULL);
  1636  
  1637  	// Setup hsr device (slightly different from what we do for devmasters).
  1638  	netlink_add_veth(&nlmsg, sock, "hsr_slave_0", "veth0_to_hsr");
  1639  	netlink_add_veth(&nlmsg, sock, "hsr_slave_1", "veth1_to_hsr");
  1640  	netlink_add_hsr(&nlmsg, sock, "hsr0", "hsr_slave_0", "hsr_slave_1");
  1641  	netlink_device_change(&nlmsg, sock, "hsr_slave_0", true, 0, 0, 0, NULL);
  1642  	netlink_device_change(&nlmsg, sock, "hsr_slave_1", true, 0, 0, 0, NULL);
  1643  
  1644  	netlink_add_veth(&nlmsg, sock, "veth0_virt_wifi", "veth1_virt_wifi");
  1645  	netlink_add_linked(&nlmsg, sock, "virt_wifi", "virt_wifi0", "veth1_virt_wifi");
  1646  
  1647  	netlink_add_veth(&nlmsg, sock, "veth0_vlan", "veth1_vlan");
  1648  	netlink_add_vlan(&nlmsg, sock, "vlan0", "veth0_vlan", 0, htons(ETH_P_8021Q));
  1649  	netlink_add_vlan(&nlmsg, sock, "vlan1", "veth0_vlan", 1, htons(ETH_P_8021AD));
  1650  	netlink_add_macvlan(&nlmsg, sock, "macvlan0", "veth1_vlan");
  1651  	netlink_add_macvlan(&nlmsg, sock, "macvlan1", "veth1_vlan");
  1652  	netlink_add_ipvlan(&nlmsg, sock, "ipvlan0", "veth0_vlan", IPVLAN_MODE_L2, 0);
  1653  	netlink_add_ipvlan(&nlmsg, sock, "ipvlan1", "veth0_vlan", IPVLAN_MODE_L3S, IPVLAN_F_VEPA);
  1654  
  1655  	netlink_add_veth(&nlmsg, sock, "veth0_macvtap", "veth1_macvtap");
  1656  	netlink_add_linked(&nlmsg, sock, "macvtap", "macvtap0", "veth0_macvtap");
  1657  	netlink_add_linked(&nlmsg, sock, "macsec", "macsec0", "veth1_macvtap");
  1658  
  1659  	char addr[32];
  1660  	sprintf(addr, DEV_IPV4, 14 + 10); // should point to veth0
  1661  	struct in_addr geneve_addr4;
  1662  	if (inet_pton(AF_INET, addr, &geneve_addr4) <= 0)
  1663  		fail("geneve0 inet_pton failed");
  1664  	struct in6_addr geneve_addr6;
  1665  	// Must not be link local (our device addresses are link local).
  1666  	if (inet_pton(AF_INET6, "fc00::01", &geneve_addr6) <= 0)
  1667  		fail("geneve1 inet_pton failed");
  1668  	netlink_add_geneve(&nlmsg, sock, "geneve0", 0, &geneve_addr4, 0);
  1669  	netlink_add_geneve(&nlmsg, sock, "geneve1", 1, 0, &geneve_addr6);
  1670  
  1671  	netdevsim_add((int)procid, 4); // Number of port is in sync with value in sys/linux/socket_netlink_generic_devlink.txt
  1672  
  1673  	netlink_wireguard_setup();
  1674  
  1675  #if SYZ_EXECUTOR || SYZ_NIC_VF
  1676  	netlink_nicvf_setup();
  1677  #endif
  1678  
  1679  	for (i = 0; i < sizeof(devices) / (sizeof(devices[0])); i++) {
  1680  		// Assign some unique address to devices. Some devices won't up without this.
  1681  		// Shift addresses by 10 because 0 subnet address can mean special things.
  1682  		char addr[32];
  1683  		sprintf(addr, DEV_IPV4, i + 10);
  1684  		netlink_add_addr4(&nlmsg, sock, devices[i].name, addr);
  1685  		if (!devices[i].noipv6) {
  1686  			sprintf(addr, DEV_IPV6, i + 10);
  1687  			netlink_add_addr6(&nlmsg, sock, devices[i].name, addr);
  1688  		}
  1689  		uint64 macaddr = DEV_MAC + ((i + 10ull) << 40);
  1690  		netlink_device_change(&nlmsg, sock, devices[i].name, true, 0, &macaddr, devices[i].macsize, NULL);
  1691  	}
  1692  	close(sock);
  1693  }
  1694  
  1695  // Same as initialize_netdevices, but called in init net namespace.
  1696  static void initialize_netdevices_init(void)
  1697  {
  1698  #if SYZ_EXECUTOR
  1699  	if (!flag_net_devices)
  1700  		return;
  1701  #endif
  1702  	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  1703  	if (sock == -1)
  1704  		fail("socket(AF_NETLINK) failed");
  1705  	struct {
  1706  		const char* type;
  1707  		int macsize;
  1708  		bool noipv6;
  1709  		bool noup;
  1710  	} devtypes[] = {
  1711  	    // NETROM device, see net/netrom/{af_netrom,nr_dev}.c
  1712  	    {"nr", 7, true},
  1713  	    // ROSE device, see net/rose/{af_rose,rose_dev}.c
  1714  	    // We don't up it yet because it crashes kernel right away:
  1715  	    // https://groups.google.com/d/msg/syzkaller/v-4B3zoBC-4/02SCKEzJBwAJ
  1716  	    {"rose", 5, true, true},
  1717  	};
  1718  	unsigned i;
  1719  	for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++) {
  1720  		char dev[32], addr[32];
  1721  		sprintf(dev, "%s%d", devtypes[i].type, (int)procid);
  1722  		// Note: syscall descriptions know these addresses.
  1723  		sprintf(addr, "172.30.%d.%d", i, (int)procid + 1);
  1724  		netlink_add_addr4(&nlmsg, sock, dev, addr);
  1725  		if (!devtypes[i].noipv6) {
  1726  			sprintf(addr, "fe88::%02x:%02x", i, (int)procid + 1);
  1727  			netlink_add_addr6(&nlmsg, sock, dev, addr);
  1728  		}
  1729  		int macsize = devtypes[i].macsize;
  1730  		uint64 macaddr = 0xbbbbbb + ((unsigned long long)i << (8 * (macsize - 2))) +
  1731  				 (procid << (8 * (macsize - 1)));
  1732  		netlink_device_change(&nlmsg, sock, dev, !devtypes[i].noup, 0, &macaddr, macsize, NULL);
  1733  	}
  1734  	close(sock);
  1735  
  1736  #if SYZ_EXECUTOR || SYZ_NIC_VF
  1737  	find_vf_interface();
  1738  #endif
  1739  }
  1740  #endif
  1741  
  1742  #if SYZ_EXECUTOR || SYZ_NET_INJECTION && (__NR_syz_extract_tcp_res || SYZ_REPEAT)
  1743  #include <errno.h>
  1744  
  1745  static int read_tun(char* data, int size)
  1746  {
  1747  	if (tunfd < 0)
  1748  		return -1;
  1749  
  1750  	int rv = read(tunfd, data, size);
  1751  	if (rv < 0) {
  1752  		// EBADF can be returned if the test closes tunfd with close_range syscall.
  1753  		// Tun sometimes returns EBADFD, unclear if it's a kernel bug or not.
  1754  		if (errno == EAGAIN || errno == EBADF || errno == EBADFD)
  1755  			return -1;
  1756  		fail("tun read failed");
  1757  	}
  1758  	return rv;
  1759  }
  1760  #endif
  1761  
  1762  #if SYZ_EXECUTOR || __NR_syz_emit_ethernet && SYZ_NET_INJECTION
  1763  #include <stdbool.h>
  1764  #include <sys/uio.h>
  1765  
  1766  #if ENABLE_NAPI_FRAGS
  1767  #define MAX_FRAGS 4
  1768  struct vnet_fragmentation {
  1769  	uint32 full;
  1770  	uint32 count;
  1771  	uint32 frags[MAX_FRAGS];
  1772  };
  1773  #endif
  1774  
  1775  static long syz_emit_ethernet(volatile long a0, volatile long a1, volatile long a2)
  1776  {
  1777  	// syz_emit_ethernet(len len[packet], packet ptr[in, eth_packet], frags ptr[in, vnet_fragmentation, opt])
  1778  	// vnet_fragmentation {
  1779  	// 	full	int32[0:1]
  1780  	// 	count	int32[1:4]
  1781  	// 	frags	array[int32[0:4096], 4]
  1782  	// }
  1783  	if (tunfd < 0)
  1784  		return (uintptr_t)-1;
  1785  
  1786  	uint32 length = a0;
  1787  	char* data = (char*)a1;
  1788  	debug_dump_data(data, length);
  1789  
  1790  #if ENABLE_NAPI_FRAGS
  1791  	struct vnet_fragmentation* frags = (struct vnet_fragmentation*)a2;
  1792  	struct iovec vecs[MAX_FRAGS + 1];
  1793  	uint32 nfrags = 0;
  1794  	if (!tun_frags_enabled || frags == NULL) {
  1795  		vecs[nfrags].iov_base = data;
  1796  		vecs[nfrags].iov_len = length;
  1797  		nfrags++;
  1798  	} else {
  1799  		bool full = frags->full;
  1800  		uint32 count = frags->count;
  1801  		if (count > MAX_FRAGS)
  1802  			count = MAX_FRAGS;
  1803  		uint32 i;
  1804  		for (i = 0; i < count && length != 0; i++) {
  1805  			uint32 size = frags->frags[i];
  1806  			if (size > length)
  1807  				size = length;
  1808  			vecs[nfrags].iov_base = data;
  1809  			vecs[nfrags].iov_len = size;
  1810  			nfrags++;
  1811  			data += size;
  1812  			length -= size;
  1813  		}
  1814  		if (length != 0 && (full || nfrags == 0)) {
  1815  			vecs[nfrags].iov_base = data;
  1816  			vecs[nfrags].iov_len = length;
  1817  			nfrags++;
  1818  		}
  1819  	}
  1820  	return writev(tunfd, vecs, nfrags);
  1821  #else
  1822  	return write(tunfd, data, length);
  1823  #endif
  1824  }
  1825  #endif
  1826  
  1827  #if SYZ_EXECUTOR || __NR_syz_io_uring_submit || __NR_syz_io_uring_complete || __NR_syz_io_uring_setup
  1828  
  1829  #define SIZEOF_IO_URING_SQE 64
  1830  #define SIZEOF_IO_URING_CQE 16
  1831  
  1832  // Once a io_uring is set up by calling io_uring_setup, the offsets to the member fields
  1833  // to be used on the mmap'ed area are set in structs io_sqring_offsets and io_cqring_offsets.
  1834  // Except io_sqring_offsets.array, the offsets are static while all depend on how struct io_rings
  1835  // is organized in code. The offsets can be marked as resources in syzkaller descriptions but
  1836  // this makes it difficult to generate correct programs by the fuzzer. Thus, the offsets are
  1837  // hard-coded here (and in the descriptions), and array offset is later computed once the number
  1838  // of entries is available. Another way to obtain the offsets is to setup another io_uring here
  1839  // and use what it returns. It is slower but might be more maintainable.
  1840  #define SQ_HEAD_OFFSET 0
  1841  #define SQ_TAIL_OFFSET 64
  1842  #define SQ_RING_MASK_OFFSET 256
  1843  #define SQ_RING_ENTRIES_OFFSET 264
  1844  #define SQ_FLAGS_OFFSET 276
  1845  #define SQ_DROPPED_OFFSET 272
  1846  #define CQ_HEAD_OFFSET 128
  1847  #define CQ_TAIL_OFFSET 192
  1848  #define CQ_RING_MASK_OFFSET 260
  1849  #define CQ_RING_ENTRIES_OFFSET 268
  1850  #define CQ_RING_OVERFLOW_OFFSET 284
  1851  #define CQ_FLAGS_OFFSET 280
  1852  #define CQ_CQES_OFFSET 320
  1853  
  1854  #if SYZ_EXECUTOR || __NR_syz_io_uring_complete
  1855  
  1856  // From linux/io_uring.h
  1857  struct io_uring_cqe {
  1858  	uint64 user_data;
  1859  	uint32 res;
  1860  	uint32 flags;
  1861  };
  1862  
  1863  static long syz_io_uring_complete(volatile long a0)
  1864  {
  1865  	// syzlang: syz_io_uring_complete(ring_ptr ring_ptr)
  1866  	// C:       syz_io_uring_complete(char* ring_ptr)
  1867  
  1868  	// It is not checked if the ring is empty
  1869  
  1870  	// Cast to original
  1871  	char* ring_ptr = (char*)a0;
  1872  
  1873  	// Compute the head index and the next head value
  1874  	uint32 cq_ring_mask = *(uint32*)(ring_ptr + CQ_RING_MASK_OFFSET);
  1875  	uint32* cq_head_ptr = (uint32*)(ring_ptr + CQ_HEAD_OFFSET);
  1876  	uint32 cq_head = *cq_head_ptr & cq_ring_mask;
  1877  	uint32 cq_head_next = *cq_head_ptr + 1;
  1878  
  1879  	// Compute the ptr to the src cq entry on the ring
  1880  	char* cqe_src = ring_ptr + CQ_CQES_OFFSET + cq_head * SIZEOF_IO_URING_CQE;
  1881  
  1882  	// Get the cq entry from the ring
  1883  	struct io_uring_cqe cqe;
  1884  	memcpy(&cqe, cqe_src, sizeof(cqe));
  1885  
  1886  	// Advance the head. Head is a free-flowing integer and relies on natural wrapping.
  1887  	// Ensure that the kernel will never see a head update without the preceeding CQE
  1888  	// stores being done.
  1889  	__atomic_store_n(cq_head_ptr, cq_head_next, __ATOMIC_RELEASE);
  1890  
  1891  	// In the descriptions (sys/linux/io_uring.txt), openat and openat2 are passed
  1892  	// with a unique range of sqe.user_data (0x12345 and 0x23456) to identify the operations
  1893  	// which produces an fd instance. Check cqe.user_data, which should be the same
  1894  	// as sqe.user_data for that operation. If it falls in that unique range, return
  1895  	// cqe.res as fd. Otherwise, just return an invalid fd.
  1896  	return (cqe.user_data == 0x12345 || cqe.user_data == 0x23456) ? (long)cqe.res : (long)-1;
  1897  }
  1898  
  1899  #endif
  1900  
  1901  #if SYZ_EXECUTOR || __NR_syz_io_uring_setup
  1902  
  1903  struct io_sqring_offsets {
  1904  	uint32 head;
  1905  	uint32 tail;
  1906  	uint32 ring_mask;
  1907  	uint32 ring_entries;
  1908  	uint32 flags;
  1909  	uint32 dropped;
  1910  	uint32 array;
  1911  	uint32 resv1;
  1912  	uint64 resv2;
  1913  };
  1914  
  1915  struct io_cqring_offsets {
  1916  	uint32 head;
  1917  	uint32 tail;
  1918  	uint32 ring_mask;
  1919  	uint32 ring_entries;
  1920  	uint32 overflow;
  1921  	uint32 cqes;
  1922  	uint64 resv[2];
  1923  };
  1924  
  1925  struct io_uring_params {
  1926  	uint32 sq_entries;
  1927  	uint32 cq_entries;
  1928  	uint32 flags;
  1929  	uint32 sq_thread_cpu;
  1930  	uint32 sq_thread_idle;
  1931  	uint32 features;
  1932  	uint32 resv[4];
  1933  	struct io_sqring_offsets sq_off;
  1934  	struct io_cqring_offsets cq_off;
  1935  };
  1936  
  1937  #define IORING_OFF_SQ_RING 0
  1938  #define IORING_OFF_SQES 0x10000000ULL
  1939  #define IORING_SETUP_SQE128 (1U << 10)
  1940  #define IORING_SETUP_CQE32 (1U << 11)
  1941  
  1942  #include <sys/mman.h>
  1943  #include <unistd.h>
  1944  
  1945  // Wrapper for io_uring_setup and the subsequent mmap calls that map the ring and the sqes
  1946  static long syz_io_uring_setup(volatile long a0, volatile long a1, volatile long a2, volatile long a3)
  1947  {
  1948  	// syzlang: syz_io_uring_setup(entries int32[1:IORING_MAX_ENTRIES], params ptr[inout, io_uring_params], ring_ptr ptr[out, ring_ptr], sqes_ptr ptr[out, sqes_ptr]) fd_io_uring
  1949  	// C:       syz_io_uring_setup(uint32 entries, struct io_uring_params* params, void** ring_ptr_out, void** sqes_ptr_out) // returns uint32 fd_io_uring
  1950  
  1951  	// Cast to original
  1952  	uint32 entries = (uint32)a0;
  1953  	struct io_uring_params* setup_params = (struct io_uring_params*)a1;
  1954  	void** ring_ptr_out = (void**)a2;
  1955  	void** sqes_ptr_out = (void**)a3;
  1956  	// Temporarily disable IORING_SETUP_CQE32 and IORING_SETUP_SQE128 that may change SIZEOF_IO_URING_CQE and SIZEOF_IO_URING_SQE.
  1957  	// Tracking bug: https://github.com/google/syzkaller/issues/4531.
  1958  	setup_params->flags &= ~(IORING_SETUP_CQE32 | IORING_SETUP_SQE128);
  1959  	uint32 fd_io_uring = syscall(__NR_io_uring_setup, entries, setup_params);
  1960  
  1961  	// Compute the ring sizes
  1962  	uint32 sq_ring_sz = setup_params->sq_off.array + setup_params->sq_entries * sizeof(uint32);
  1963  	uint32 cq_ring_sz = setup_params->cq_off.cqes + setup_params->cq_entries * SIZEOF_IO_URING_CQE;
  1964  
  1965  	// Asssumed IORING_FEAT_SINGLE_MMAP, which is always the case with the current implementation
  1966  	// The implication is that the sq_ring_ptr and the cq_ring_ptr are the same but the
  1967  	// difference is in the offsets to access the fields of these rings.
  1968  	uint32 ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz;
  1969  	*ring_ptr_out = mmap(0, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd_io_uring, IORING_OFF_SQ_RING);
  1970  
  1971  	uint32 sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE;
  1972  	*sqes_ptr_out = mmap(0, sqes_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd_io_uring, IORING_OFF_SQES);
  1973  
  1974  	uint32* array = (uint32*)((uintptr_t)*ring_ptr_out + setup_params->sq_off.array);
  1975  	for (uint32 index = 0; index < entries; index++)
  1976  		array[index] = index;
  1977  
  1978  	return fd_io_uring;
  1979  }
  1980  
  1981  #endif
  1982  
  1983  #if SYZ_EXECUTOR || __NR_syz_io_uring_submit
  1984  
  1985  static long syz_io_uring_submit(volatile long a0, volatile long a1, volatile long a2)
  1986  {
  1987  	// syzlang: syz_io_uring_submit(ring_ptr ring_ptr, sqes_ptr sqes_ptr, 		sqe ptr[in, io_uring_sqe])
  1988  	// C:       syz_io_uring_submit(char* ring_ptr,       io_uring_sqe* sqes_ptr,    io_uring_sqe* sqe)
  1989  
  1990  	// It is not checked if the ring is full
  1991  
  1992  	// Cast to original
  1993  	char* ring_ptr = (char*)a0; // This will be exposed to offsets in bytes
  1994  	char* sqes_ptr = (char*)a1;
  1995  
  1996  	char* sqe = (char*)a2;
  1997  
  1998  	uint32 sq_ring_mask = *(uint32*)(ring_ptr + SQ_RING_MASK_OFFSET);
  1999  	uint32* sq_tail_ptr = (uint32*)(ring_ptr + SQ_TAIL_OFFSET);
  2000  	uint32 sq_tail = *sq_tail_ptr & sq_ring_mask;
  2001  
  2002  	// Get the ptr to the destination for the sqe
  2003  	char* sqe_dest = sqes_ptr + sq_tail * SIZEOF_IO_URING_SQE;
  2004  
  2005  	// Write the sqe entry to its destination in sqes
  2006  	memcpy(sqe_dest, sqe, SIZEOF_IO_URING_SQE);
  2007  
  2008  	// Write the index to the sqe array
  2009  	uint32 sq_tail_next = *sq_tail_ptr + 1;
  2010  
  2011  	// Advance the tail. Tail is a free-flowing integer and relies on natural wrapping.
  2012  	// Ensure that the kernel will never see a tail update without the preceeding SQE
  2013  	// stores being done.
  2014  	__atomic_store_n(sq_tail_ptr, sq_tail_next, __ATOMIC_RELEASE);
  2015  
  2016  	// Now the application is free to call io_uring_enter() to submit the sqe
  2017  	return 0;
  2018  }
  2019  
  2020  #endif
  2021  
  2022  #endif
  2023  
  2024  #if SYZ_EXECUTOR || __NR_syz_usbip_server_init
  2025  
  2026  #include <errno.h>
  2027  #include <fcntl.h>
  2028  #include <linux/usb/ch9.h>
  2029  #include <stdbool.h>
  2030  #include <stdio.h>
  2031  #include <stdlib.h>
  2032  #include <string.h>
  2033  #include <sys/socket.h>
  2034  #include <unistd.h>
  2035  
  2036  // This should be coherent with CONFIG_USBIP_VHCI_HC_PORTS.
  2037  #define VHCI_HC_PORTS 8
  2038  #define VHCI_PORTS (VHCI_HC_PORTS * 2)
  2039  
  2040  static long syz_usbip_server_init(volatile long a0)
  2041  {
  2042  	// port_alloc[0] corresponds to ports which can be used by usb2 and
  2043  	// port_alloc[1] corresponds to ports which can be used by usb3.
  2044  	static int port_alloc[2];
  2045  
  2046  	int speed = (int)a0;
  2047  	bool usb3 = (speed == USB_SPEED_SUPER);
  2048  
  2049  	int socket_pair[2];
  2050  	if (socketpair(AF_UNIX, SOCK_STREAM, 0, socket_pair)) {
  2051  		// This can happen if the test calls prlimit(RLIMIT_AS).
  2052  		debug("syz_usbip_server_init: socketpair failed (%d)\n", errno);
  2053  		return -1;
  2054  	}
  2055  
  2056  	int client_fd = socket_pair[0];
  2057  	int server_fd = socket_pair[1];
  2058  
  2059  	int available_port_num = __atomic_fetch_add(&port_alloc[usb3], 1, __ATOMIC_RELAXED);
  2060  	if (available_port_num > VHCI_HC_PORTS) {
  2061  		debug("syz_usbip_server_init : no more available port for : %d\n", available_port_num);
  2062  		return -1;
  2063  	}
  2064  
  2065  	// Each port number corresponds to a particular vhci_hcd (USB/IP Virtual Host Controller) and it is used by either
  2066  	// an usb2 device or usb3 device. There are 16 ports available in each vhci_hcd.
  2067  	// (VHCI_PORTS = 16 in our case.) When they are occupied, the following vhci_hcd's ports are used.
  2068  	// First 16 ports correspond to vhci_hcd0, next 16 ports correspond to
  2069  	// vhci_hcd1 etc. In a vhci_hcd, first 8 ports are used by usb2 devices and last 8 are used by usb3 devices.
  2070  	int port_num = procid * VHCI_PORTS + usb3 * VHCI_HC_PORTS + available_port_num;
  2071  
  2072  	// Under normal USB/IP usage, devid represents the device ID on the server.
  2073  	// When fuzzing with syzkaller we don't have an actual server or an actual device, so use 0 for devid.
  2074  	char buffer[100];
  2075  	sprintf(buffer, "%d %d %s %d", port_num, client_fd, "0", speed);
  2076  
  2077  	write_file("/sys/devices/platform/vhci_hcd.0/attach", buffer);
  2078  	return server_fd;
  2079  }
  2080  
  2081  #endif
  2082  
  2083  #if SYZ_EXECUTOR || __NR_syz_btf_id_by_name
  2084  
  2085  #include <errno.h>
  2086  #include <fcntl.h>
  2087  #include <stdbool.h>
  2088  #include <stddef.h>
  2089  #include <stdio.h>
  2090  #include <stdlib.h>
  2091  #include <string.h>
  2092  #include <sys/stat.h>
  2093  #include <unistd.h>
  2094  
  2095  // Some items in linux/btf.h are relatively new, so we copy them here for
  2096  // backward compatibility.
  2097  #define BTF_MAGIC 0xeB9F
  2098  
  2099  struct btf_header {
  2100  	__u16 magic;
  2101  	__u8 version;
  2102  	__u8 flags;
  2103  	__u32 hdr_len;
  2104  	__u32 type_off;
  2105  	__u32 type_len;
  2106  	__u32 str_off;
  2107  	__u32 str_len;
  2108  };
  2109  
  2110  #define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
  2111  #define BTF_INFO_VLEN(info) ((info) & 0xffff)
  2112  
  2113  #define BTF_KIND_INT 1
  2114  #define BTF_KIND_ARRAY 3
  2115  #define BTF_KIND_STRUCT 4
  2116  #define BTF_KIND_UNION 5
  2117  #define BTF_KIND_ENUM 6
  2118  #define BTF_KIND_FUNC_PROTO 13
  2119  #define BTF_KIND_VAR 14
  2120  #define BTF_KIND_DATASEC 15
  2121  
  2122  struct btf_type {
  2123  	__u32 name_off;
  2124  	__u32 info;
  2125  	union {
  2126  		__u32 size;
  2127  		__u32 type;
  2128  	};
  2129  };
  2130  
  2131  struct btf_enum {
  2132  	__u32 name_off;
  2133  	__s32 val;
  2134  };
  2135  
  2136  struct btf_array {
  2137  	__u32 type;
  2138  	__u32 index_type;
  2139  	__u32 nelems;
  2140  };
  2141  
  2142  struct btf_member {
  2143  	__u32 name_off;
  2144  	__u32 type;
  2145  	__u32 offset;
  2146  };
  2147  
  2148  struct btf_param {
  2149  	__u32 name_off;
  2150  	__u32 type;
  2151  };
  2152  
  2153  struct btf_var {
  2154  	__u32 linkage;
  2155  };
  2156  
  2157  struct btf_var_secinfo {
  2158  	__u32 type;
  2159  	__u32 offset;
  2160  	__u32 size;
  2161  };
  2162  
  2163  // Set the limit on the maximum size of btf/vmlinux to be 10 MiB.
  2164  #define VMLINUX_MAX_SUPPORT_SIZE (10 * 1024 * 1024)
  2165  
  2166  // Read out all the content of /sys/kernel/btf/vmlinux to the fixed address
  2167  // buffer and return it. Return NULL if failed.
  2168  static char* read_btf_vmlinux()
  2169  {
  2170  	static bool is_read = false;
  2171  	static char buf[VMLINUX_MAX_SUPPORT_SIZE];
  2172  
  2173  	// There could be a race condition here, but it should not be harmful.
  2174  	if (is_read)
  2175  		return buf;
  2176  
  2177  	int fd = open("/sys/kernel/btf/vmlinux", O_RDONLY);
  2178  	if (fd < 0)
  2179  		return NULL;
  2180  
  2181  	unsigned long bytes_read = 0;
  2182  	for (;;) {
  2183  		ssize_t ret = read(fd, buf + bytes_read,
  2184  				   VMLINUX_MAX_SUPPORT_SIZE - bytes_read);
  2185  
  2186  		if (ret < 0 || bytes_read + ret == VMLINUX_MAX_SUPPORT_SIZE)
  2187  			return NULL;
  2188  
  2189  		if (ret == 0)
  2190  			break;
  2191  
  2192  		bytes_read += ret;
  2193  	}
  2194  
  2195  	is_read = true;
  2196  	return buf;
  2197  }
  2198  
  2199  // Given a pointer to a C-string as the only argument a0, return the
  2200  // corresponding btf ID for this name. Return -1 if there is an error when
  2201  // opening the vmlinux file or the name is not found in vmlinux.
  2202  static long syz_btf_id_by_name(volatile long a0)
  2203  {
  2204  	// syzlang: syz_btf_id_by_name(name ptr[in, string]) btf_id
  2205  	// C:		syz_btf_id_by_name(char* name)
  2206  	char* target = (char*)a0;
  2207  
  2208  	char* vmlinux = read_btf_vmlinux();
  2209  	if (vmlinux == NULL)
  2210  		return -1;
  2211  
  2212  	struct btf_header* btf_header = (struct btf_header*)vmlinux;
  2213  	if (btf_header->magic != BTF_MAGIC)
  2214  		return -1;
  2215  	// These offsets are bytes relative to the end of the header.
  2216  	char* btf_type_sec = vmlinux + btf_header->hdr_len + btf_header->type_off;
  2217  	char* btf_str_sec = vmlinux + btf_header->hdr_len + btf_header->str_off;
  2218  	// Scan through the btf type section, and find a type description that
  2219  	// matches the provided name.
  2220  	unsigned int bytes_parsed = 0;
  2221  	// BTF index starts at 1.
  2222  	long idx = 1;
  2223  	while (bytes_parsed < btf_header->type_len) {
  2224  		struct btf_type* btf_type = (struct btf_type*)(btf_type_sec + bytes_parsed);
  2225  		uint32 kind = BTF_INFO_KIND(btf_type->info);
  2226  		uint32 vlen = BTF_INFO_VLEN(btf_type->info);
  2227  		char* name = btf_str_sec + btf_type->name_off;
  2228  
  2229  		if (strcmp(name, target) == 0)
  2230  			return idx;
  2231  
  2232  		// From /include/uapi/linux/btf.h, some kinds of types are
  2233  		// followed by extra data.
  2234  		size_t skip;
  2235  		switch (kind) {
  2236  		case BTF_KIND_INT:
  2237  			skip = sizeof(uint32);
  2238  			break;
  2239  		case BTF_KIND_ENUM:
  2240  			skip = sizeof(struct btf_enum) * vlen;
  2241  			break;
  2242  		case BTF_KIND_ARRAY:
  2243  			skip = sizeof(struct btf_array);
  2244  			break;
  2245  		case BTF_KIND_STRUCT:
  2246  		case BTF_KIND_UNION:
  2247  			skip = sizeof(struct btf_member) * vlen;
  2248  			break;
  2249  		case BTF_KIND_FUNC_PROTO:
  2250  			skip = sizeof(struct btf_param) * vlen;
  2251  			break;
  2252  		case BTF_KIND_VAR:
  2253  			skip = sizeof(struct btf_var);
  2254  			break;
  2255  		case BTF_KIND_DATASEC:
  2256  			skip = sizeof(struct btf_var_secinfo) * vlen;
  2257  			break;
  2258  		default:
  2259  			skip = 0;
  2260  		}
  2261  
  2262  		bytes_parsed += sizeof(struct btf_type) + skip;
  2263  		idx++;
  2264  	}
  2265  
  2266  	return -1;
  2267  }
  2268  
  2269  #endif // SYZ_EXECUTOR || __NR_syz_btf_id_by_name
  2270  
  2271  // Same as memcpy except that it accepts offset to dest and src.
  2272  #if SYZ_EXECUTOR || __NR_syz_memcpy_off
  2273  static long syz_memcpy_off(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4)
  2274  {
  2275  	// C:       syz_memcpy_off(void* dest, uint32 dest_off, void* src, uint32 src_off, size_t n)
  2276  
  2277  	// Cast to original
  2278  	char* dest = (char*)a0;
  2279  	uint32 dest_off = (uint32)a1;
  2280  	char* src = (char*)a2;
  2281  	uint32 src_off = (uint32)a3;
  2282  	size_t n = (size_t)a4;
  2283  
  2284  	return (long)memcpy(dest + dest_off, src + src_off, n);
  2285  }
  2286  #endif
  2287  
  2288  #if SYZ_EXECUTOR || __NR_syz_create_resource
  2289  // syz_create_resource(val intptr) intptr
  2290  // Variants of this pseudo-syscall are used to create resources from arbitrary values.
  2291  // For example:
  2292  //   syz_create_resource$foo(x int32) resource_foo
  2293  // allows the fuzzer to use the same random int32 value in multiple syscalls,
  2294  // and should increase probability of generation of syscalls related to foo.
  2295  static long syz_create_resource(volatile long val)
  2296  {
  2297  	return val;
  2298  }
  2299  #endif
  2300  
  2301  #if (SYZ_EXECUTOR || SYZ_REPEAT && SYZ_NET_INJECTION) && SYZ_EXECUTOR_USES_FORK_SERVER
  2302  static void flush_tun()
  2303  {
  2304  #if SYZ_EXECUTOR
  2305  	if (!flag_net_injection)
  2306  		return;
  2307  #endif
  2308  	char data[1000];
  2309  	while (read_tun(&data[0], sizeof(data)) != -1) {
  2310  	}
  2311  }
  2312  #endif
  2313  
  2314  #if SYZ_EXECUTOR || __NR_syz_extract_tcp_res && SYZ_NET_INJECTION
  2315  #ifndef __ANDROID__
  2316  // Can't include <linux/ipv6.h>, since it causes
  2317  // conflicts due to some structs redefinition.
  2318  struct ipv6hdr {
  2319  	__u8 priority : 4,
  2320  	    version : 4;
  2321  	__u8 flow_lbl[3];
  2322  
  2323  	__be16 payload_len;
  2324  	__u8 nexthdr;
  2325  	__u8 hop_limit;
  2326  
  2327  	struct in6_addr saddr;
  2328  	struct in6_addr daddr;
  2329  };
  2330  #endif
  2331  
  2332  struct tcp_resources {
  2333  	uint32 seq;
  2334  	uint32 ack;
  2335  };
  2336  
  2337  static long syz_extract_tcp_res(volatile long a0, volatile long a1, volatile long a2)
  2338  {
  2339  	// syz_extract_tcp_res(res ptr[out, tcp_resources], seq_inc int32, ack_inc int32)
  2340  
  2341  	if (tunfd < 0)
  2342  		return (uintptr_t)-1;
  2343  
  2344  	// We just need this to be large enough to hold headers that we parse (ethernet/ip/tcp).
  2345  	// Rest of the packet (if any) will be silently truncated which is fine.
  2346  	char data[1000];
  2347  	int rv = read_tun(&data[0], sizeof(data));
  2348  	if (rv == -1)
  2349  		return (uintptr_t)-1;
  2350  	size_t length = rv;
  2351  	debug_dump_data(data, length);
  2352  
  2353  	if (length < sizeof(struct ethhdr))
  2354  		return (uintptr_t)-1;
  2355  	struct ethhdr* ethhdr = (struct ethhdr*)&data[0];
  2356  
  2357  	struct tcphdr* tcphdr = 0;
  2358  	if (ethhdr->h_proto == htons(ETH_P_IP)) {
  2359  		if (length < sizeof(struct ethhdr) + sizeof(struct iphdr))
  2360  			return (uintptr_t)-1;
  2361  		struct iphdr* iphdr = (struct iphdr*)&data[sizeof(struct ethhdr)];
  2362  		if (iphdr->protocol != IPPROTO_TCP)
  2363  			return (uintptr_t)-1;
  2364  		if (length < sizeof(struct ethhdr) + iphdr->ihl * 4 + sizeof(struct tcphdr))
  2365  			return (uintptr_t)-1;
  2366  		tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + iphdr->ihl * 4];
  2367  	} else {
  2368  		if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr))
  2369  			return (uintptr_t)-1;
  2370  		struct ipv6hdr* ipv6hdr = (struct ipv6hdr*)&data[sizeof(struct ethhdr)];
  2371  		// TODO: parse and skip extension headers.
  2372  		if (ipv6hdr->nexthdr != IPPROTO_TCP)
  2373  			return (uintptr_t)-1;
  2374  		if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
  2375  			return (uintptr_t)-1;
  2376  		tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr)];
  2377  	}
  2378  
  2379  	struct tcp_resources* res = (struct tcp_resources*)a0;
  2380  	res->seq = htonl((ntohl(tcphdr->seq) + (uint32)a1));
  2381  	res->ack = htonl((ntohl(tcphdr->ack_seq) + (uint32)a2));
  2382  
  2383  	debug("extracted seq: %08x\n", res->seq);
  2384  	debug("extracted ack: %08x\n", res->ack);
  2385  
  2386  	return 0;
  2387  }
  2388  #endif
  2389  
  2390  #if SYZ_EXECUTOR || SYZ_CLOSE_FDS || __NR_syz_usb_connect || __NR_syz_usb_connect_ath9k
  2391  #define MAX_FDS 30
  2392  #endif
  2393  
  2394  #if SYZ_EXECUTOR || __NR_syz_usb_connect || __NR_syz_usb_connect_ath9k ||       \
  2395      __NR_syz_usb_ep_write || __NR_syz_usb_ep_read || __NR_syz_usb_control_io || \
  2396      __NR_syz_usb_disconnect
  2397  #include <errno.h>
  2398  #include <fcntl.h>
  2399  #include <linux/usb/ch9.h>
  2400  #include <stdarg.h>
  2401  #include <stdbool.h>
  2402  #include <stddef.h>
  2403  #include <stdio.h>
  2404  #include <sys/mount.h>
  2405  #include <sys/stat.h>
  2406  #include <sys/types.h>
  2407  
  2408  #include "common_usb_linux.h"
  2409  #endif
  2410  
  2411  #if SYZ_EXECUTOR || __NR_syz_open_dev
  2412  #include <fcntl.h>
  2413  #include <string.h>
  2414  #include <sys/stat.h>
  2415  #include <sys/types.h>
  2416  
  2417  static long syz_open_dev(volatile long a0, volatile long a1, volatile long a2)
  2418  {
  2419  	if (a0 == 0xc || a0 == 0xb) {
  2420  		// syz_open_dev$char(dev const[0xc], major intptr, minor intptr) fd
  2421  		// syz_open_dev$block(dev const[0xb], major intptr, minor intptr) fd
  2422  		char buf[128];
  2423  		sprintf(buf, "/dev/%s/%d:%d", a0 == 0xc ? "char" : "block", (uint8)a1, (uint8)a2);
  2424  		return open(buf, O_RDWR, 0);
  2425  	} else {
  2426  		// syz_open_dev(dev ptr[in, string["/dev/foo#"]], id intptr, flags flags[open_flags]) fd
  2427  		unsigned long nb = a1;
  2428  		char buf[1024];
  2429  		char* hash;
  2430  		strncpy(buf, (char*)a0, sizeof(buf) - 1);
  2431  		buf[sizeof(buf) - 1] = 0;
  2432  		while ((hash = strchr(buf, '#'))) {
  2433  			*hash = '0' + (char)(nb % 10); // 10 devices should be enough for everyone.
  2434  			nb /= 10;
  2435  		}
  2436  		return open(buf, a2 & ~O_CREAT, 0);
  2437  	}
  2438  }
  2439  #endif
  2440  
  2441  #if SYZ_EXECUTOR || __NR_syz_open_procfs
  2442  #include <fcntl.h>
  2443  #include <string.h>
  2444  #include <sys/stat.h>
  2445  #include <sys/types.h>
  2446  
  2447  static long syz_open_procfs(volatile long a0, volatile long a1)
  2448  {
  2449  	// syz_open_procfs(pid pid, file ptr[in, string[procfs_file]]) fd
  2450  
  2451  	char buf[128];
  2452  	memset(buf, 0, sizeof(buf));
  2453  	if (a0 == 0) {
  2454  		snprintf(buf, sizeof(buf), "/proc/self/%s", (char*)a1);
  2455  	} else if (a0 == -1) {
  2456  		snprintf(buf, sizeof(buf), "/proc/thread-self/%s", (char*)a1);
  2457  	} else {
  2458  		snprintf(buf, sizeof(buf), "/proc/self/task/%d/%s", (int)a0, (char*)a1);
  2459  	}
  2460  	int fd = open(buf, O_RDWR);
  2461  	if (fd == -1)
  2462  		fd = open(buf, O_RDONLY);
  2463  	return fd;
  2464  }
  2465  #endif
  2466  
  2467  #if SYZ_EXECUTOR || __NR_syz_open_pts
  2468  #include <fcntl.h>
  2469  #include <sys/ioctl.h>
  2470  #include <sys/stat.h>
  2471  #include <sys/types.h>
  2472  
  2473  static long syz_open_pts(volatile long a0, volatile long a1)
  2474  {
  2475  	// syz_openpts(fd fd[tty], flags flags[open_flags]) fd[tty]
  2476  	int ptyno = 0;
  2477  	if (ioctl(a0, TIOCGPTN, &ptyno))
  2478  		return -1;
  2479  	char buf[128];
  2480  	sprintf(buf, "/dev/pts/%d", ptyno);
  2481  	return open(buf, a1, 0);
  2482  }
  2483  #endif
  2484  
  2485  #if SYZ_EXECUTOR || __NR_syz_init_net_socket
  2486  #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID
  2487  #include <fcntl.h>
  2488  #include <sched.h>
  2489  #include <sys/stat.h>
  2490  #include <sys/types.h>
  2491  #include <unistd.h>
  2492  
  2493  // syz_init_net_socket opens a socket in init net namespace.
  2494  // Used for families that can only be created in init net namespace.
  2495  static long syz_init_net_socket(volatile long domain, volatile long type, volatile long proto)
  2496  {
  2497  	int netns = open("/proc/self/ns/net", O_RDONLY);
  2498  	if (netns == -1)
  2499  		return netns;
  2500  	if (setns(kInitNetNsFd, 0))
  2501  		return -1;
  2502  	int sock = syscall(__NR_socket, domain, type, proto);
  2503  	int err = errno;
  2504  	if (setns(netns, 0)) {
  2505  		// The operation may fail if the fd is closed by
  2506  		// a syscall from another thread.
  2507  		exitf("setns(netns) failed");
  2508  	}
  2509  	close(netns);
  2510  	errno = err;
  2511  	return sock;
  2512  }
  2513  #else
  2514  static long syz_init_net_socket(volatile long domain, volatile long type, volatile long proto)
  2515  {
  2516  	return syscall(__NR_socket, domain, type, proto);
  2517  }
  2518  #endif
  2519  #endif
  2520  
  2521  #if SYZ_EXECUTOR || __NR_syz_socket_connect_nvme_tcp
  2522  #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE
  2523  #include <arpa/inet.h>
  2524  #include <fcntl.h>
  2525  #include <netinet/in.h>
  2526  #include <sched.h>
  2527  #include <sys/socket.h>
  2528  #include <sys/stat.h>
  2529  #include <sys/types.h>
  2530  #include <unistd.h>
  2531  
  2532  static long syz_socket_connect_nvme_tcp()
  2533  {
  2534  	struct sockaddr_in nvme_local_address;
  2535  	int netns = open("/proc/self/ns/net", O_RDONLY);
  2536  	if (netns == -1)
  2537  		return netns;
  2538  	if (setns(kInitNetNsFd, 0))
  2539  		return -1;
  2540  	int sock = syscall(__NR_socket, AF_INET, SOCK_STREAM, 0x0);
  2541  	int err = errno;
  2542  	if (setns(netns, 0)) {
  2543  		// The operation may fail if the fd is closed by
  2544  		// a syscall from another thread.
  2545  		exitf("setns(netns) failed");
  2546  	}
  2547  	close(netns);
  2548  	errno = err;
  2549  	// We only connect to an NVMe-oF/TCP server on 127.0.0.1:4420
  2550  	nvme_local_address.sin_family = AF_INET;
  2551  	nvme_local_address.sin_port = htobe16(4420);
  2552  	nvme_local_address.sin_addr.s_addr = htobe32(0x7f000001);
  2553  	err = syscall(__NR_connect, sock, &nvme_local_address, sizeof(nvme_local_address));
  2554  	if (err != 0) {
  2555  		close(sock);
  2556  		return -1;
  2557  	}
  2558  	return sock;
  2559  }
  2560  #else
  2561  static long syz_socket_connect_nvme_tcp()
  2562  {
  2563  	return syscall(__NR_socket, -1, 0, 0);
  2564  }
  2565  #endif
  2566  #endif
  2567  
  2568  #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION
  2569  #include <errno.h>
  2570  #include <fcntl.h>
  2571  #include <linux/rfkill.h>
  2572  #include <pthread.h>
  2573  #include <sys/epoll.h>
  2574  #include <sys/ioctl.h>
  2575  #include <sys/socket.h>
  2576  #include <sys/uio.h>
  2577  
  2578  #define BTPROTO_HCI 1
  2579  #define ACL_LINK 1
  2580  #define SCAN_PAGE 2
  2581  
  2582  typedef struct {
  2583  	uint8 b[6];
  2584  } __attribute__((packed)) bdaddr_t;
  2585  
  2586  #define HCI_COMMAND_PKT 1
  2587  #define HCI_EVENT_PKT 4
  2588  #define HCI_VENDOR_PKT 0xff
  2589  
  2590  struct hci_command_hdr {
  2591  	uint16 opcode;
  2592  	uint8 plen;
  2593  } __attribute__((packed));
  2594  
  2595  struct hci_event_hdr {
  2596  	uint8 evt;
  2597  	uint8 plen;
  2598  } __attribute__((packed));
  2599  
  2600  #define HCI_EV_CONN_COMPLETE 0x03
  2601  struct hci_ev_conn_complete {
  2602  	uint8 status;
  2603  	uint16 handle;
  2604  	bdaddr_t bdaddr;
  2605  	uint8 link_type;
  2606  	uint8 encr_mode;
  2607  } __attribute__((packed));
  2608  
  2609  #define HCI_EV_CONN_REQUEST 0x04
  2610  struct hci_ev_conn_request {
  2611  	bdaddr_t bdaddr;
  2612  	uint8 dev_class[3];
  2613  	uint8 link_type;
  2614  } __attribute__((packed));
  2615  
  2616  #define HCI_EV_REMOTE_FEATURES 0x0b
  2617  struct hci_ev_remote_features {
  2618  	uint8 status;
  2619  	uint16 handle;
  2620  	uint8 features[8];
  2621  } __attribute__((packed));
  2622  
  2623  #define HCI_EV_CMD_COMPLETE 0x0e
  2624  struct hci_ev_cmd_complete {
  2625  	uint8 ncmd;
  2626  	uint16 opcode;
  2627  } __attribute__((packed));
  2628  
  2629  #define HCI_OP_WRITE_SCAN_ENABLE 0x0c1a
  2630  
  2631  #define HCI_OP_READ_BUFFER_SIZE 0x1005
  2632  struct hci_rp_read_buffer_size {
  2633  	uint8 status;
  2634  	uint16 acl_mtu;
  2635  	uint8 sco_mtu;
  2636  	uint16 acl_max_pkt;
  2637  	uint16 sco_max_pkt;
  2638  } __attribute__((packed));
  2639  
  2640  #define HCI_OP_READ_BD_ADDR 0x1009
  2641  struct hci_rp_read_bd_addr {
  2642  	uint8 status;
  2643  	bdaddr_t bdaddr;
  2644  } __attribute__((packed));
  2645  
  2646  #define HCI_EV_LE_META 0x3e
  2647  struct hci_ev_le_meta {
  2648  	uint8 subevent;
  2649  } __attribute__((packed));
  2650  
  2651  #define HCI_EV_LE_CONN_COMPLETE 0x01
  2652  struct hci_ev_le_conn_complete {
  2653  	uint8 status;
  2654  	uint16 handle;
  2655  	uint8 role;
  2656  	uint8 bdaddr_type;
  2657  	bdaddr_t bdaddr;
  2658  	uint16 interval;
  2659  	uint16 latency;
  2660  	uint16 supervision_timeout;
  2661  	uint8 clk_accurancy;
  2662  } __attribute__((packed));
  2663  
  2664  struct hci_dev_req {
  2665  	uint16 dev_id;
  2666  	uint32 dev_opt;
  2667  };
  2668  
  2669  struct vhci_vendor_pkt_request {
  2670  	uint8 type;
  2671  	uint8 opcode;
  2672  } __attribute__((packed));
  2673  
  2674  struct vhci_pkt {
  2675  	uint8 type;
  2676  	union {
  2677  		struct {
  2678  			uint8 opcode;
  2679  			uint16 id;
  2680  		} __attribute__((packed)) vendor_pkt;
  2681  		struct hci_command_hdr command_hdr;
  2682  	};
  2683  } __attribute__((packed));
  2684  
  2685  #define HCIDEVUP _IOW('H', 201, int)
  2686  #define HCISETSCAN _IOW('H', 221, int)
  2687  
  2688  static int vhci_fd = -1;
  2689  
  2690  static void rfkill_unblock_all()
  2691  {
  2692  	int fd = open("/dev/rfkill", O_WRONLY);
  2693  	if (fd < 0)
  2694  		fail("open /dev/rfkill failed");
  2695  	struct rfkill_event event = {0};
  2696  	event.idx = 0;
  2697  	event.type = RFKILL_TYPE_ALL;
  2698  	event.op = RFKILL_OP_CHANGE_ALL;
  2699  	event.soft = 0;
  2700  	event.hard = 0;
  2701  	if (write(fd, &event, sizeof(event)) < 0)
  2702  		fail("write rfkill event failed");
  2703  	close(fd);
  2704  }
  2705  
  2706  static void hci_send_event_packet(int fd, uint8 evt, void* data, size_t data_len)
  2707  {
  2708  	struct iovec iv[3];
  2709  
  2710  	struct hci_event_hdr hdr;
  2711  	hdr.evt = evt;
  2712  	hdr.plen = data_len;
  2713  
  2714  	uint8 type = HCI_EVENT_PKT;
  2715  
  2716  	iv[0].iov_base = &type;
  2717  	iv[0].iov_len = sizeof(type);
  2718  	iv[1].iov_base = &hdr;
  2719  	iv[1].iov_len = sizeof(hdr);
  2720  	iv[2].iov_base = data;
  2721  	iv[2].iov_len = data_len;
  2722  
  2723  	if (writev(fd, iv, sizeof(iv) / sizeof(struct iovec)) < 0)
  2724  		fail("writev failed");
  2725  }
  2726  
  2727  static void hci_send_event_cmd_complete(int fd, uint16 opcode, void* data, size_t data_len)
  2728  {
  2729  	struct iovec iv[4];
  2730  
  2731  	struct hci_event_hdr hdr;
  2732  	hdr.evt = HCI_EV_CMD_COMPLETE;
  2733  	hdr.plen = sizeof(struct hci_ev_cmd_complete) + data_len;
  2734  
  2735  	struct hci_ev_cmd_complete evt_hdr;
  2736  	evt_hdr.ncmd = 1;
  2737  	evt_hdr.opcode = opcode;
  2738  
  2739  	uint8 type = HCI_EVENT_PKT;
  2740  
  2741  	iv[0].iov_base = &type;
  2742  	iv[0].iov_len = sizeof(type);
  2743  	iv[1].iov_base = &hdr;
  2744  	iv[1].iov_len = sizeof(hdr);
  2745  	iv[2].iov_base = &evt_hdr;
  2746  	iv[2].iov_len = sizeof(evt_hdr);
  2747  	iv[3].iov_base = data;
  2748  	iv[3].iov_len = data_len;
  2749  
  2750  	if (writev(fd, iv, sizeof(iv) / sizeof(struct iovec)) < 0)
  2751  		fail("writev failed");
  2752  }
  2753  
  2754  static bool process_command_pkt(int fd, char* buf, ssize_t buf_size)
  2755  {
  2756  	struct hci_command_hdr* hdr = (struct hci_command_hdr*)buf;
  2757  	if (buf_size < (ssize_t)sizeof(struct hci_command_hdr) ||
  2758  	    hdr->plen != buf_size - sizeof(struct hci_command_hdr))
  2759  		failmsg("process_command_pkt: invalid size", "suze=%zx", buf_size);
  2760  
  2761  	switch (hdr->opcode) {
  2762  	case HCI_OP_WRITE_SCAN_ENABLE: {
  2763  		uint8 status = 0;
  2764  		hci_send_event_cmd_complete(fd, hdr->opcode, &status, sizeof(status));
  2765  		return true;
  2766  	}
  2767  	case HCI_OP_READ_BD_ADDR: {
  2768  		struct hci_rp_read_bd_addr rp = {0};
  2769  		rp.status = 0;
  2770  		memset(&rp.bdaddr, 0xaa, 6);
  2771  		hci_send_event_cmd_complete(fd, hdr->opcode, &rp, sizeof(rp));
  2772  		return false;
  2773  	}
  2774  	case HCI_OP_READ_BUFFER_SIZE: {
  2775  		struct hci_rp_read_buffer_size rp = {0};
  2776  		rp.status = 0;
  2777  		rp.acl_mtu = 1021;
  2778  		rp.sco_mtu = 96;
  2779  		rp.acl_max_pkt = 4;
  2780  		rp.sco_max_pkt = 6;
  2781  		hci_send_event_cmd_complete(fd, hdr->opcode, &rp, sizeof(rp));
  2782  		return false;
  2783  	}
  2784  	}
  2785  
  2786  	char dummy[0xf9] = {0};
  2787  	hci_send_event_cmd_complete(fd, hdr->opcode, dummy, sizeof(dummy));
  2788  	return false;
  2789  }
  2790  
  2791  static void* event_thread(void* arg)
  2792  {
  2793  	while (1) {
  2794  		char buf[1024] = {0};
  2795  		ssize_t buf_size = read(vhci_fd, buf, sizeof(buf));
  2796  		if (buf_size < 0)
  2797  			fail("read failed");
  2798  		debug_dump_data(buf, buf_size);
  2799  		if (buf_size > 0 && buf[0] == HCI_COMMAND_PKT) {
  2800  			if (process_command_pkt(vhci_fd, buf + 1, buf_size - 1))
  2801  				break;
  2802  		}
  2803  	}
  2804  	return NULL;
  2805  }
  2806  
  2807  // Matches hci_handles in sys/linux/dev_vhci.txt.
  2808  #define HCI_HANDLE_1 200
  2809  #define HCI_HANDLE_2 201
  2810  
  2811  #define HCI_PRIMARY 0
  2812  #define HCI_OP_RESET 0x0c03
  2813  
  2814  static void initialize_vhci()
  2815  {
  2816  #if SYZ_EXECUTOR
  2817  	if (!flag_vhci_injection)
  2818  		return;
  2819  #endif
  2820  
  2821  	int hci_sock = socket(AF_BLUETOOTH, SOCK_RAW, BTPROTO_HCI);
  2822  	if (hci_sock < 0)
  2823  		fail("socket(AF_BLUETOOTH, SOCK_RAW, BTPROTO_HCI) failed");
  2824  
  2825  	vhci_fd = open("/dev/vhci", O_RDWR);
  2826  	if (vhci_fd == -1)
  2827  		fail("open /dev/vhci failed");
  2828  
  2829  	// Remap vhci onto higher fd number to hide it from fuzzer and to keep
  2830  	// fd numbers stable regardless of whether vhci is opened or not (also see kMaxFd).
  2831  	const int kVhciFd = 202;
  2832  	if (dup2(vhci_fd, kVhciFd) < 0)
  2833  		fail("dup2(vhci_fd, kVhciFd) failed");
  2834  	close(vhci_fd);
  2835  	vhci_fd = kVhciFd;
  2836  
  2837  	struct vhci_vendor_pkt_request vendor_pkt_req = {HCI_VENDOR_PKT, HCI_PRIMARY};
  2838  	if (write(vhci_fd, &vendor_pkt_req, sizeof(vendor_pkt_req)) != sizeof(vendor_pkt_req))
  2839  		fail("vendor_pkt_req write failed");
  2840  
  2841  	struct vhci_pkt vhci_pkt;
  2842  	if (read(vhci_fd, &vhci_pkt, sizeof(vhci_pkt)) != sizeof(vhci_pkt))
  2843  		fail("vhci_pkt read failed");
  2844  
  2845  	if (vhci_pkt.type == HCI_COMMAND_PKT && vhci_pkt.command_hdr.opcode == HCI_OP_RESET) {
  2846  		char response[1] = {0};
  2847  		hci_send_event_cmd_complete(vhci_fd, HCI_OP_RESET, response, sizeof(response));
  2848  
  2849  		if (read(vhci_fd, &vhci_pkt, sizeof(vhci_pkt)) != sizeof(vhci_pkt))
  2850  			fail("vhci_pkt read failed");
  2851  	}
  2852  
  2853  	if (vhci_pkt.type != HCI_VENDOR_PKT)
  2854  		fail("wrong response packet");
  2855  
  2856  	int dev_id = vhci_pkt.vendor_pkt.id;
  2857  	debug("hci dev id: %x\n", dev_id);
  2858  
  2859  	pthread_t th;
  2860  	if (pthread_create(&th, NULL, event_thread, NULL))
  2861  		fail("pthread_create failed");
  2862  
  2863  	// Bring hci device up
  2864  	int ret = ioctl(hci_sock, HCIDEVUP, dev_id);
  2865  	if (ret) {
  2866  		if (errno == ERFKILL) {
  2867  			rfkill_unblock_all();
  2868  			ret = ioctl(hci_sock, HCIDEVUP, dev_id);
  2869  		}
  2870  
  2871  		if (ret && errno != EALREADY)
  2872  			fail("ioctl(HCIDEVUP) failed");
  2873  	}
  2874  
  2875  	// Activate page scanning mode which is required to fake a connection.
  2876  	struct hci_dev_req dr = {0};
  2877  	dr.dev_id = dev_id;
  2878  	dr.dev_opt = SCAN_PAGE;
  2879  	if (ioctl(hci_sock, HCISETSCAN, &dr))
  2880  		fail("ioctl(HCISETSCAN) failed");
  2881  
  2882  	// Fake a connection with bd address 10:aa:aa:aa:aa:aa.
  2883  	// This is a fixed address used in sys/linux/socket_bluetooth.txt.
  2884  	struct hci_ev_conn_request request;
  2885  	memset(&request, 0, sizeof(request));
  2886  	memset(&request.bdaddr, 0xaa, 6);
  2887  	*(uint8*)&request.bdaddr.b[5] = 0x10;
  2888  	request.link_type = ACL_LINK;
  2889  	hci_send_event_packet(vhci_fd, HCI_EV_CONN_REQUEST, &request, sizeof(request));
  2890  
  2891  	struct hci_ev_conn_complete complete;
  2892  	memset(&complete, 0, sizeof(complete));
  2893  	complete.status = 0;
  2894  	complete.handle = HCI_HANDLE_1;
  2895  	memset(&complete.bdaddr, 0xaa, 6);
  2896  	*(uint8*)&complete.bdaddr.b[5] = 0x10;
  2897  	complete.link_type = ACL_LINK;
  2898  	complete.encr_mode = 0;
  2899  	hci_send_event_packet(vhci_fd, HCI_EV_CONN_COMPLETE, &complete, sizeof(complete));
  2900  
  2901  	struct hci_ev_remote_features features;
  2902  	memset(&features, 0, sizeof(features));
  2903  	features.status = 0;
  2904  	features.handle = HCI_HANDLE_1;
  2905  	hci_send_event_packet(vhci_fd, HCI_EV_REMOTE_FEATURES, &features, sizeof(features));
  2906  
  2907  	// Fake a low-energy connection with bd address 11:aa:aa:aa:aa:aa.
  2908  	// This is a fixed address used in sys/linux/socket_bluetooth.txt.
  2909  	struct {
  2910  		struct hci_ev_le_meta le_meta;
  2911  		struct hci_ev_le_conn_complete le_conn;
  2912  	} le_conn;
  2913  	memset(&le_conn, 0, sizeof(le_conn));
  2914  	le_conn.le_meta.subevent = HCI_EV_LE_CONN_COMPLETE;
  2915  	memset(&le_conn.le_conn.bdaddr, 0xaa, 6);
  2916  	*(uint8*)&le_conn.le_conn.bdaddr.b[5] = 0x11;
  2917  	le_conn.le_conn.role = 1;
  2918  	le_conn.le_conn.handle = HCI_HANDLE_2;
  2919  	hci_send_event_packet(vhci_fd, HCI_EV_LE_META, &le_conn, sizeof(le_conn));
  2920  
  2921  	pthread_join(th, NULL);
  2922  	close(hci_sock);
  2923  }
  2924  #endif
  2925  
  2926  #if SYZ_EXECUTOR || __NR_syz_emit_vhci && SYZ_VHCI_INJECTION
  2927  static long syz_emit_vhci(volatile long a0, volatile long a1)
  2928  {
  2929  	if (vhci_fd < 0)
  2930  		return (uintptr_t)-1;
  2931  
  2932  	char* data = (char*)a0;
  2933  	uint32 length = a1;
  2934  
  2935  	return write(vhci_fd, data, length);
  2936  }
  2937  #endif
  2938  
  2939  #if SYZ_EXECUTOR || __NR_syz_genetlink_get_family_id
  2940  #include <errno.h>
  2941  #include <sys/socket.h>
  2942  
  2943  static long syz_genetlink_get_family_id(volatile long name, volatile long sock_arg)
  2944  {
  2945  	debug("syz_genetlink_get_family_id(%s, %d)\n", (char*)name, (int)sock_arg);
  2946  	int fd = sock_arg;
  2947  	if (fd < 0) {
  2948  		fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
  2949  		if (fd == -1) {
  2950  			debug("syz_genetlink_get_family_id: socket failed: %d\n", errno);
  2951  			return -1;
  2952  		}
  2953  	}
  2954  	struct nlmsg nlmsg_tmp;
  2955  	int ret = netlink_query_family_id(&nlmsg_tmp, fd, (char*)name, false);
  2956  	if ((int)sock_arg < 0)
  2957  		close(fd);
  2958  	if (ret < 0) {
  2959  		debug("syz_genetlink_get_family_id: netlink_query_family_id failed: %d\n", ret);
  2960  		return -1;
  2961  	}
  2962  
  2963  	return ret;
  2964  }
  2965  #endif
  2966  
  2967  #if SYZ_EXECUTOR || __NR_syz_mount_image || __NR_syz_read_part_table
  2968  #include "common_zlib.h"
  2969  #include <errno.h>
  2970  #include <fcntl.h>
  2971  #include <linux/loop.h>
  2972  #include <stdbool.h>
  2973  #include <sys/ioctl.h>
  2974  #include <sys/stat.h>
  2975  #include <sys/types.h>
  2976  
  2977  // Setup the loop device needed for mounting a filesystem image. Takes care of
  2978  // creating and initializing the underlying file backing the loop device and
  2979  // returns the fds to the file and device.
  2980  // Returns 0 on success, -1 otherwise.
  2981  static int setup_loop_device(unsigned char* data, unsigned long size, const char* loopname, int* loopfd_p)
  2982  {
  2983  	int err = 0, loopfd = -1;
  2984  	int memfd = syscall(__NR_memfd_create, "syzkaller", 0);
  2985  	if (memfd == -1) {
  2986  		err = errno;
  2987  		goto error;
  2988  	}
  2989  	if (puff_zlib_to_file(data, size, memfd)) {
  2990  		err = errno;
  2991  		debug("setup_loop_device: could not decompress data: %d\n", errno);
  2992  		goto error_close_memfd;
  2993  	}
  2994  
  2995  	loopfd = open(loopname, O_RDWR);
  2996  	if (loopfd == -1) {
  2997  		err = errno;
  2998  		debug("setup_loop_device: open failed: %d\n", errno);
  2999  		goto error_close_memfd;
  3000  	}
  3001  	if (ioctl(loopfd, LOOP_SET_FD, memfd)) {
  3002  		if (errno != EBUSY) {
  3003  			err = errno;
  3004  			goto error_close_loop;
  3005  		}
  3006  		ioctl(loopfd, LOOP_CLR_FD, 0);
  3007  		usleep(1000);
  3008  		if (ioctl(loopfd, LOOP_SET_FD, memfd)) {
  3009  			err = errno;
  3010  			goto error_close_loop;
  3011  		}
  3012  	}
  3013  
  3014  	close(memfd);
  3015  	*loopfd_p = loopfd;
  3016  	return 0;
  3017  
  3018  error_close_loop:
  3019  	close(loopfd);
  3020  error_close_memfd:
  3021  	close(memfd);
  3022  error:
  3023  	errno = err;
  3024  	return -1;
  3025  }
  3026  
  3027  #if SYZ_EXECUTOR || __NR_syz_mount_image
  3028  
  3029  static void reset_loop_device(const char* loopname)
  3030  {
  3031  	int loopfd = open(loopname, O_RDWR);
  3032  	if (loopfd == -1) {
  3033  		debug("reset_loop_device: open failed: %d\n", errno);
  3034  		return;
  3035  	}
  3036  	if (ioctl(loopfd, LOOP_CLR_FD, 0)) {
  3037  		debug("reset_loop_device: LOOP_CLR_FD failed: %d\n", errno);
  3038  	}
  3039  	close(loopfd);
  3040  }
  3041  
  3042  #endif
  3043  
  3044  #endif
  3045  
  3046  #if SYZ_EXECUTOR || __NR_syz_read_part_table
  3047  // syz_read_part_table(size len[img], img ptr[in, compressed_image])
  3048  static long syz_read_part_table(volatile unsigned long size, volatile long image)
  3049  {
  3050  	unsigned char* data = (unsigned char*)image;
  3051  	int err = 0, res = -1, loopfd = -1;
  3052  	char loopname[64];
  3053  
  3054  	snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid);
  3055  	if (setup_loop_device(data, size, loopname, &loopfd) == -1)
  3056  		return -1;
  3057  
  3058  	struct loop_info64 info;
  3059  	if (ioctl(loopfd, LOOP_GET_STATUS64, &info)) {
  3060  		err = errno;
  3061  		goto error_clear_loop;
  3062  	}
  3063  #if SYZ_EXECUTOR
  3064  	cover_reset(0);
  3065  #endif
  3066  	info.lo_flags |= LO_FLAGS_PARTSCAN;
  3067  	if (ioctl(loopfd, LOOP_SET_STATUS64, &info)) {
  3068  		err = errno;
  3069  		goto error_clear_loop;
  3070  	}
  3071  	res = 0;
  3072  	// If we managed to parse some partitions, symlink them into our work dir.
  3073  	for (unsigned long i = 1, j = 0; i < 8; i++) {
  3074  		snprintf(loopname, sizeof(loopname), "/dev/loop%llup%d", procid, (int)i);
  3075  		struct stat statbuf;
  3076  		if (stat(loopname, &statbuf) == 0) {
  3077  			char linkname[64];
  3078  			snprintf(linkname, sizeof(linkname), "./file%d", (int)j++);
  3079  			if (symlink(loopname, linkname)) {
  3080  				debug("syz_read_part_table: symlink(%s, %s) failed: %d\n", loopname, linkname, errno);
  3081  			}
  3082  		}
  3083  	}
  3084  error_clear_loop:
  3085  	if (res)
  3086  		ioctl(loopfd, LOOP_CLR_FD, 0);
  3087  	close(loopfd);
  3088  	errno = err;
  3089  	return res;
  3090  }
  3091  #endif
  3092  
  3093  #if SYZ_EXECUTOR || __NR_syz_mount_image
  3094  #include <stddef.h>
  3095  #include <string.h>
  3096  #include <sys/mount.h>
  3097  
  3098  // syz_mount_image(
  3099  // 	fs ptr[in, string[fs]],
  3100  // 	dir ptr[in, filename],
  3101  // 	flags flags[mount_flags],
  3102  // 	opts ptr[in, fs_options],
  3103  // 	chdir bool8,
  3104  // 	size len[img],
  3105  // 	img ptr[in, compressed_image]
  3106  // ) fd_dir
  3107  static long syz_mount_image(
  3108      volatile long fsarg,
  3109      volatile long dir,
  3110      volatile long flags,
  3111      volatile long optsarg,
  3112      volatile long change_dir,
  3113      volatile unsigned long size,
  3114      volatile long image)
  3115  {
  3116  	unsigned char* data = (unsigned char*)image;
  3117  	int res = -1, err = 0, need_loop_device = !!size;
  3118  	char* mount_opts = (char*)optsarg;
  3119  	char* target = (char*)dir;
  3120  	char* fs = (char*)fsarg;
  3121  	char* source = NULL;
  3122  	char loopname[64];
  3123  
  3124  	if (need_loop_device) {
  3125  		int loopfd;
  3126  		// Some filesystems (e.g. FUSE) do not need a backing device or
  3127  		// filesystem image.
  3128  		memset(loopname, 0, sizeof(loopname));
  3129  		snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid);
  3130  		if (setup_loop_device(data, size, loopname, &loopfd) == -1)
  3131  			return -1;
  3132  		// If BLK_DEV_WRITE_MOUNTED is set, we won't be able to mount()
  3133  		// while holding the loop device fd.
  3134  		close(loopfd);
  3135  		source = loopname;
  3136  	}
  3137  
  3138  	mkdir(target, 0777);
  3139  	char opts[256];
  3140  	memset(opts, 0, sizeof(opts));
  3141  	// Leave some space for the additional options we append below.
  3142  	if (strlen(mount_opts) > (sizeof(opts) - 32)) {
  3143  		debug("ERROR: syz_mount_image parameter optsarg bigger than internal opts\n");
  3144  	}
  3145  	strncpy(opts, mount_opts, sizeof(opts) - 32);
  3146  	if (strcmp(fs, "iso9660") == 0) {
  3147  		flags |= MS_RDONLY;
  3148  	} else if (strncmp(fs, "ext", 3) == 0) {
  3149  		// For ext2/3/4 we have to have errors=continue because the image
  3150  		// can contain errors=panic flag and can legally crash kernel.
  3151  		bool has_remount_ro = false;
  3152  		char* remount_ro_start = strstr(opts, "errors=remount-ro");
  3153  		if (remount_ro_start != NULL) {
  3154  			// syzkaller can sometimes break the options format, so we have to make sure this option can really be parsed.
  3155  			char after = *(remount_ro_start + strlen("errors=remount-ro"));
  3156  			char before = remount_ro_start == opts ? '\0' : *(remount_ro_start - 1);
  3157  			has_remount_ro = ((before == '\0' || before == ',') && (after == '\0' || after == ','));
  3158  		}
  3159  		if (strstr(opts, "errors=panic") || !has_remount_ro)
  3160  			strcat(opts, ",errors=continue");
  3161  	} else if (strcmp(fs, "xfs") == 0) {
  3162  		// For xfs we need nouuid because xfs has a global uuids table
  3163  		// and if two parallel executors mounts fs with the same uuid, second mount fails.
  3164  		strcat(opts, ",nouuid");
  3165  	} else if (strncmp(fs, "gfs2", 4) == 0 && (strstr(opts, "errors=panic") || strstr(opts, "debug"))) {
  3166  		// Otherwise ordinary withdrawals turn into kernel panics, see #6189.
  3167  		strcat(opts, ",errors=withdraw");
  3168  	}
  3169  	debug("syz_mount_image: size=%llu loop='%s' dir='%s' fs='%s' flags=%llu opts='%s'\n", (uint64)size, loopname, target, fs, (uint64)flags, opts);
  3170  #if SYZ_EXECUTOR
  3171  	cover_reset(0);
  3172  #endif
  3173  	res = mount(source, target, fs, flags, opts);
  3174  	if (res == -1) {
  3175  		debug("syz_mount_image > mount error: %d\n", errno);
  3176  		err = errno;
  3177  		goto error_clear_loop;
  3178  	}
  3179  	res = open(target, O_RDONLY | O_DIRECTORY);
  3180  	if (res == -1) {
  3181  		debug("syz_mount_image > open error: %d\n", errno);
  3182  		err = errno;
  3183  		goto error_clear_loop;
  3184  	}
  3185  	if (change_dir) {
  3186  		res = chdir(target);
  3187  		if (res == -1) {
  3188  			debug("syz_mount_image > chdir error: %d\n", errno);
  3189  			err = errno;
  3190  		}
  3191  	}
  3192  
  3193  error_clear_loop:
  3194  	if (need_loop_device)
  3195  		reset_loop_device(loopname);
  3196  	errno = err;
  3197  	return res;
  3198  }
  3199  #endif
  3200  
  3201  #if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu || __NR_syz_kvm_vgic_v3_setup || __NR_syz_kvm_setup_syzos_vm || __NR_syz_kvm_add_vcpu || __NR_syz_kvm_assert_syzos_uexit || __NR_syz_kvm_assert_reg || __NR_syz_kvm_assert_syzos_kvm_exit
  3202  // KVM is not yet supported on RISC-V
  3203  #if !GOARCH_riscv64 && !GOARCH_arm
  3204  #include <errno.h>
  3205  #include <fcntl.h>
  3206  #include <linux/kvm.h>
  3207  #include <stdarg.h>
  3208  #include <stddef.h>
  3209  #include <sys/ioctl.h>
  3210  #include <sys/stat.h>
  3211  
  3212  #if GOARCH_amd64
  3213  #include "common_kvm_amd64.h"
  3214  #elif GOARCH_386
  3215  #include "common_kvm_386.h"
  3216  #elif GOARCH_arm64
  3217  #include "common_kvm_arm64.h"
  3218  #elif GOARCH_ppc64 || GOARCH_ppc64le
  3219  #include "common_kvm_ppc64.h"
  3220  #elif !GOARCH_arm && (SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu)
  3221  static volatile long syz_kvm_setup_cpu(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4, volatile long a5, volatile long a6, volatile long a7)
  3222  {
  3223  	return 0;
  3224  }
  3225  #endif
  3226  #endif
  3227  #endif
  3228  
  3229  #if (SYZ_EXECUTOR || SYZ_NET_RESET) && SYZ_EXECUTOR_USES_FORK_SERVER
  3230  #include <errno.h>
  3231  #include <net/if.h>
  3232  #include <netinet/in.h>
  3233  #include <string.h>
  3234  #include <sys/socket.h>
  3235  
  3236  #include <linux/net.h>
  3237  
  3238  // checkpoint/reset_net_namespace partially resets net namespace to initial state
  3239  // after each test. Currently it resets only ipv4 netfilter state.
  3240  // Ideally, we just create a new net namespace for each test,
  3241  // however it's too slow (1-1.5 seconds per namespace, not parallelizable).
  3242  
  3243  // Linux headers do not compile for C++, so we have to define the structs manualy.
  3244  #define XT_TABLE_SIZE 1536
  3245  #define XT_MAX_ENTRIES 10
  3246  
  3247  struct xt_counters {
  3248  	uint64 pcnt, bcnt;
  3249  };
  3250  
  3251  struct ipt_getinfo {
  3252  	char name[32];
  3253  	unsigned int valid_hooks;
  3254  	unsigned int hook_entry[5];
  3255  	unsigned int underflow[5];
  3256  	unsigned int num_entries;
  3257  	unsigned int size;
  3258  };
  3259  
  3260  struct ipt_get_entries {
  3261  	char name[32];
  3262  	unsigned int size;
  3263  	uint64 entrytable[XT_TABLE_SIZE / sizeof(uint64)];
  3264  };
  3265  
  3266  struct ipt_replace {
  3267  	char name[32];
  3268  	unsigned int valid_hooks;
  3269  	unsigned int num_entries;
  3270  	unsigned int size;
  3271  	unsigned int hook_entry[5];
  3272  	unsigned int underflow[5];
  3273  	unsigned int num_counters;
  3274  	struct xt_counters* counters;
  3275  	uint64 entrytable[XT_TABLE_SIZE / sizeof(uint64)];
  3276  };
  3277  
  3278  struct ipt_table_desc {
  3279  	const char* name;
  3280  	struct ipt_getinfo info;
  3281  	struct ipt_replace replace;
  3282  };
  3283  
  3284  static struct ipt_table_desc ipv4_tables[] = {
  3285      {.name = "filter"},
  3286      {.name = "nat"},
  3287      {.name = "mangle"},
  3288      {.name = "raw"},
  3289      {.name = "security"},
  3290  };
  3291  
  3292  static struct ipt_table_desc ipv6_tables[] = {
  3293      {.name = "filter"},
  3294      {.name = "nat"},
  3295      {.name = "mangle"},
  3296      {.name = "raw"},
  3297      {.name = "security"},
  3298  };
  3299  
  3300  #define IPT_BASE_CTL 64
  3301  #define IPT_SO_SET_REPLACE (IPT_BASE_CTL)
  3302  #define IPT_SO_GET_INFO (IPT_BASE_CTL)
  3303  #define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1)
  3304  
  3305  struct arpt_getinfo {
  3306  	char name[32];
  3307  	unsigned int valid_hooks;
  3308  	unsigned int hook_entry[3];
  3309  	unsigned int underflow[3];
  3310  	unsigned int num_entries;
  3311  	unsigned int size;
  3312  };
  3313  
  3314  struct arpt_get_entries {
  3315  	char name[32];
  3316  	unsigned int size;
  3317  	uint64 entrytable[XT_TABLE_SIZE / sizeof(uint64)];
  3318  };
  3319  
  3320  struct arpt_replace {
  3321  	char name[32];
  3322  	unsigned int valid_hooks;
  3323  	unsigned int num_entries;
  3324  	unsigned int size;
  3325  	unsigned int hook_entry[3];
  3326  	unsigned int underflow[3];
  3327  	unsigned int num_counters;
  3328  	struct xt_counters* counters;
  3329  	uint64 entrytable[XT_TABLE_SIZE / sizeof(uint64)];
  3330  };
  3331  
  3332  struct arpt_table_desc {
  3333  	const char* name;
  3334  	struct arpt_getinfo info;
  3335  	struct arpt_replace replace;
  3336  };
  3337  
  3338  static struct arpt_table_desc arpt_tables[] = {
  3339      {.name = "filter"},
  3340  };
  3341  
  3342  #define ARPT_BASE_CTL 96
  3343  #define ARPT_SO_SET_REPLACE (ARPT_BASE_CTL)
  3344  #define ARPT_SO_GET_INFO (ARPT_BASE_CTL)
  3345  #define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1)
  3346  
  3347  static void checkpoint_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level)
  3348  {
  3349  	int fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
  3350  	if (fd == -1) {
  3351  		switch (errno) {
  3352  		case EAFNOSUPPORT:
  3353  		case ENOPROTOOPT:
  3354  		// ENOENT can be returned if smack lsm is used. Smack tried to aplly netlbl to created sockets,
  3355  		// but the fuzzer can manage to remove netlbl entry for SOCK_STREAM/IPPROTO_TCP using
  3356  		// NLBL_MGMT_C_REMOVE, which is unfortunately global (not part of net namespace). In this state
  3357  		// creation of such sockets will fail all the time in all processes (so in some sense the machine
  3358  		// is indeed broken), but ignoring the error is still probably the best option given we allow
  3359  		// the fuzzer to invoke NLBL_MGMT_C_REMOVE in the first place.
  3360  		case ENOENT:
  3361  			return;
  3362  		}
  3363  		failmsg("iptable checkpoint: socket(SOCK_STREAM, IPPROTO_TCP) failed", "family=%d", family);
  3364  	}
  3365  	for (int i = 0; i < num_tables; i++) {
  3366  		struct ipt_table_desc* table = &tables[i];
  3367  		strcpy(table->info.name, table->name);
  3368  		strcpy(table->replace.name, table->name);
  3369  		socklen_t optlen = sizeof(table->info);
  3370  		if (getsockopt(fd, level, IPT_SO_GET_INFO, &table->info, &optlen)) {
  3371  			switch (errno) {
  3372  			case EPERM:
  3373  			case ENOENT:
  3374  			case ENOPROTOOPT:
  3375  				continue;
  3376  			}
  3377  			failmsg("iptable checkpoint: getsockopt(IPT_SO_GET_INFO) failed",
  3378  				"table=%s, family=%d", table->name, family);
  3379  		}
  3380  		debug("iptable checkpoint %s/%d: checkpoint entries=%d hooks=%x size=%d\n",
  3381  		      table->name, family, table->info.num_entries,
  3382  		      table->info.valid_hooks, table->info.size);
  3383  		if (table->info.size > sizeof(table->replace.entrytable))
  3384  			failmsg("iptable checkpoint: table size is too large", "table=%s, family=%d, size=%u",
  3385  				table->name, family, table->info.size);
  3386  		if (table->info.num_entries > XT_MAX_ENTRIES)
  3387  			failmsg("iptable checkpoint: too many counters", "table=%s, family=%d, counters=%d",
  3388  				table->name, family, table->info.num_entries);
  3389  		struct ipt_get_entries entries;
  3390  		memset(&entries, 0, sizeof(entries));
  3391  		strcpy(entries.name, table->name);
  3392  		entries.size = table->info.size;
  3393  		optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size;
  3394  		if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen))
  3395  			failmsg("iptable checkpoint: getsockopt(IPT_SO_GET_ENTRIES) failed",
  3396  				"table=%s, family=%d", table->name, family);
  3397  		table->replace.valid_hooks = table->info.valid_hooks;
  3398  		table->replace.num_entries = table->info.num_entries;
  3399  		table->replace.size = table->info.size;
  3400  		memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry));
  3401  		memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow));
  3402  		memcpy(table->replace.entrytable, entries.entrytable, table->info.size);
  3403  	}
  3404  	close(fd);
  3405  }
  3406  
  3407  static void reset_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level)
  3408  {
  3409  	int fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
  3410  	if (fd == -1) {
  3411  		switch (errno) {
  3412  		case EAFNOSUPPORT:
  3413  		case ENOPROTOOPT:
  3414  		case ENOENT:
  3415  			return;
  3416  		}
  3417  		failmsg("iptable: socket(SOCK_STREAM, IPPROTO_TCP) failed", "family=%d", family);
  3418  	}
  3419  	for (int i = 0; i < num_tables; i++) {
  3420  		struct ipt_table_desc* table = &tables[i];
  3421  		if (table->info.valid_hooks == 0)
  3422  			continue;
  3423  		struct ipt_getinfo info;
  3424  		memset(&info, 0, sizeof(info));
  3425  		strcpy(info.name, table->name);
  3426  		socklen_t optlen = sizeof(info);
  3427  		if (getsockopt(fd, level, IPT_SO_GET_INFO, &info, &optlen))
  3428  			failmsg("iptable: getsockopt(IPT_SO_GET_INFO) failed",
  3429  				"table=%s, family=%d", table->name, family);
  3430  		if (memcmp(&table->info, &info, sizeof(table->info)) == 0) {
  3431  			struct ipt_get_entries entries;
  3432  			memset(&entries, 0, sizeof(entries));
  3433  			strcpy(entries.name, table->name);
  3434  			entries.size = table->info.size;
  3435  			optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size;
  3436  			if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen))
  3437  				failmsg("iptable: getsockopt(IPT_SO_GET_ENTRIES) failed",
  3438  					"table=%s, family=%d", table->name, family);
  3439  			if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0)
  3440  				continue;
  3441  		}
  3442  		debug("iptable %s/%d: resetting\n", table->name, family);
  3443  		struct xt_counters counters[XT_MAX_ENTRIES];
  3444  		table->replace.num_counters = info.num_entries;
  3445  		table->replace.counters = counters;
  3446  		optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size;
  3447  		if (setsockopt(fd, level, IPT_SO_SET_REPLACE, &table->replace, optlen))
  3448  			failmsg("iptable: setsockopt(IPT_SO_SET_REPLACE) failed",
  3449  				"table=%s, family=%d", table->name, family);
  3450  	}
  3451  	close(fd);
  3452  }
  3453  
  3454  static void checkpoint_arptables(void)
  3455  {
  3456  	int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
  3457  	if (fd == -1) {
  3458  		switch (errno) {
  3459  		case EAFNOSUPPORT:
  3460  		case ENOPROTOOPT:
  3461  		case ENOENT:
  3462  			return;
  3463  		}
  3464  		fail("arptable checkpoint: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP) failed");
  3465  	}
  3466  	for (unsigned i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) {
  3467  		struct arpt_table_desc* table = &arpt_tables[i];
  3468  		strcpy(table->info.name, table->name);
  3469  		strcpy(table->replace.name, table->name);
  3470  		socklen_t optlen = sizeof(table->info);
  3471  		if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &table->info, &optlen)) {
  3472  			switch (errno) {
  3473  			case EPERM:
  3474  			case ENOENT:
  3475  			case ENOPROTOOPT:
  3476  				continue;
  3477  			}
  3478  			failmsg("arptable checkpoint: getsockopt(ARPT_SO_GET_INFO) failed", "table=%s", table->name);
  3479  		}
  3480  		debug("arptable checkpoint %s: entries=%d hooks=%x size=%d\n",
  3481  		      table->name, table->info.num_entries, table->info.valid_hooks, table->info.size);
  3482  		if (table->info.size > sizeof(table->replace.entrytable))
  3483  			failmsg("arptable checkpoint: table size is too large",
  3484  				"table=%s, size=%u", table->name, table->info.size);
  3485  		if (table->info.num_entries > XT_MAX_ENTRIES)
  3486  			failmsg("arptable checkpoint: too many counters",
  3487  				"table=%s, counters=%u", table->name, table->info.num_entries);
  3488  		struct arpt_get_entries entries;
  3489  		memset(&entries, 0, sizeof(entries));
  3490  		strcpy(entries.name, table->name);
  3491  		entries.size = table->info.size;
  3492  		optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size;
  3493  		if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen))
  3494  			failmsg("arptable checkpoint: getsockopt(ARPT_SO_GET_ENTRIES) failed", "table=%s", table->name);
  3495  		table->replace.valid_hooks = table->info.valid_hooks;
  3496  		table->replace.num_entries = table->info.num_entries;
  3497  		table->replace.size = table->info.size;
  3498  		memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry));
  3499  		memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow));
  3500  		memcpy(table->replace.entrytable, entries.entrytable, table->info.size);
  3501  	}
  3502  	close(fd);
  3503  }
  3504  
  3505  static void reset_arptables()
  3506  {
  3507  	int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
  3508  	if (fd == -1) {
  3509  		switch (errno) {
  3510  		case EAFNOSUPPORT:
  3511  		case ENOPROTOOPT:
  3512  		case ENOENT:
  3513  			return;
  3514  		}
  3515  		fail("arptable: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
  3516  	}
  3517  	for (unsigned i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) {
  3518  		struct arpt_table_desc* table = &arpt_tables[i];
  3519  		if (table->info.valid_hooks == 0)
  3520  			continue;
  3521  		struct arpt_getinfo info;
  3522  		memset(&info, 0, sizeof(info));
  3523  		strcpy(info.name, table->name);
  3524  		socklen_t optlen = sizeof(info);
  3525  		if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &info, &optlen))
  3526  			failmsg("arptable: getsockopt(ARPT_SO_GET_INFO) failed", "table=%s", table->name);
  3527  		if (memcmp(&table->info, &info, sizeof(table->info)) == 0) {
  3528  			struct arpt_get_entries entries;
  3529  			memset(&entries, 0, sizeof(entries));
  3530  			strcpy(entries.name, table->name);
  3531  			entries.size = table->info.size;
  3532  			optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size;
  3533  			if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen))
  3534  				failmsg("arptable: getsockopt(ARPT_SO_GET_ENTRIES) failed", "table=%s", table->name);
  3535  			if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0)
  3536  				continue;
  3537  			debug("arptable %s: data changed\n", table->name);
  3538  		} else {
  3539  			debug("arptable %s: header changed\n", table->name);
  3540  		}
  3541  		debug("arptable %s: resetting\n", table->name);
  3542  		struct xt_counters counters[XT_MAX_ENTRIES];
  3543  		table->replace.num_counters = info.num_entries;
  3544  		table->replace.counters = counters;
  3545  		optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size;
  3546  		if (setsockopt(fd, SOL_IP, ARPT_SO_SET_REPLACE, &table->replace, optlen))
  3547  			failmsg("arptable: setsockopt(ARPT_SO_SET_REPLACE) failed",
  3548  				"table=%s", table->name);
  3549  	}
  3550  	close(fd);
  3551  }
  3552  
  3553  // ebtables.h is broken too:
  3554  // ebtables.h: In function ‘ebt_entry_target* ebt_get_target(ebt_entry*)’:
  3555  // ebtables.h:197:19: error: invalid conversion from ‘void*’ to ‘ebt_entry_target*’
  3556  
  3557  #define NF_BR_NUMHOOKS 6
  3558  #define EBT_TABLE_MAXNAMELEN 32
  3559  #define EBT_CHAIN_MAXNAMELEN 32
  3560  #define EBT_BASE_CTL 128
  3561  #define EBT_SO_SET_ENTRIES (EBT_BASE_CTL)
  3562  #define EBT_SO_GET_INFO (EBT_BASE_CTL)
  3563  #define EBT_SO_GET_ENTRIES (EBT_SO_GET_INFO + 1)
  3564  #define EBT_SO_GET_INIT_INFO (EBT_SO_GET_ENTRIES + 1)
  3565  #define EBT_SO_GET_INIT_ENTRIES (EBT_SO_GET_INIT_INFO + 1)
  3566  
  3567  struct ebt_replace {
  3568  	char name[EBT_TABLE_MAXNAMELEN];
  3569  	unsigned int valid_hooks;
  3570  	unsigned int nentries;
  3571  	unsigned int entries_size;
  3572  	struct ebt_entries* hook_entry[NF_BR_NUMHOOKS];
  3573  	unsigned int num_counters;
  3574  	struct ebt_counter* counters;
  3575  	char* entries;
  3576  };
  3577  
  3578  struct ebt_entries {
  3579  	unsigned int distinguisher;
  3580  	char name[EBT_CHAIN_MAXNAMELEN];
  3581  	unsigned int counter_offset;
  3582  	int policy;
  3583  	unsigned int nentries;
  3584  	char data[0] __attribute__((aligned(__alignof__(struct ebt_replace))));
  3585  };
  3586  
  3587  struct ebt_table_desc {
  3588  	const char* name;
  3589  	struct ebt_replace replace;
  3590  	char entrytable[XT_TABLE_SIZE];
  3591  };
  3592  
  3593  static struct ebt_table_desc ebt_tables[] = {
  3594      {.name = "filter"},
  3595      {.name = "nat"},
  3596      {.name = "broute"},
  3597  };
  3598  
  3599  static void checkpoint_ebtables(void)
  3600  {
  3601  	int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
  3602  	if (fd == -1) {
  3603  		switch (errno) {
  3604  		case EAFNOSUPPORT:
  3605  		case ENOPROTOOPT:
  3606  		case ENOENT:
  3607  			return;
  3608  		}
  3609  		fail("ebtable checkpoint: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
  3610  	}
  3611  	for (size_t i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) {
  3612  		struct ebt_table_desc* table = &ebt_tables[i];
  3613  		strcpy(table->replace.name, table->name);
  3614  		socklen_t optlen = sizeof(table->replace);
  3615  		if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_INFO, &table->replace, &optlen)) {
  3616  			switch (errno) {
  3617  			case EPERM:
  3618  			case ENOENT:
  3619  			case ENOPROTOOPT:
  3620  				continue;
  3621  			}
  3622  			failmsg("ebtable checkpoint: getsockopt(EBT_SO_GET_INIT_INFO) failed",
  3623  				"table=%s", table->name);
  3624  		}
  3625  		debug("ebtable checkpoint %s: entries=%d hooks=%x size=%d\n",
  3626  		      table->name, table->replace.nentries, table->replace.valid_hooks,
  3627  		      table->replace.entries_size);
  3628  		if (table->replace.entries_size > sizeof(table->entrytable))
  3629  			failmsg("ebtable checkpoint: table size is too large", "table=%s, size=%u",
  3630  				table->name, table->replace.entries_size);
  3631  		table->replace.num_counters = 0;
  3632  		table->replace.entries = table->entrytable;
  3633  		optlen = sizeof(table->replace) + table->replace.entries_size;
  3634  		if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_ENTRIES, &table->replace, &optlen))
  3635  			failmsg("ebtable checkpoint: getsockopt(EBT_SO_GET_INIT_ENTRIES) failed",
  3636  				"table=%s", table->name);
  3637  	}
  3638  	close(fd);
  3639  }
  3640  
  3641  static void reset_ebtables()
  3642  {
  3643  	int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
  3644  	if (fd == -1) {
  3645  		switch (errno) {
  3646  		case EAFNOSUPPORT:
  3647  		case ENOPROTOOPT:
  3648  		case ENOENT:
  3649  			return;
  3650  		}
  3651  		fail("ebtable: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
  3652  	}
  3653  	for (unsigned i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) {
  3654  		struct ebt_table_desc* table = &ebt_tables[i];
  3655  		if (table->replace.valid_hooks == 0)
  3656  			continue;
  3657  		struct ebt_replace replace;
  3658  		memset(&replace, 0, sizeof(replace));
  3659  		strcpy(replace.name, table->name);
  3660  		socklen_t optlen = sizeof(replace);
  3661  		if (getsockopt(fd, SOL_IP, EBT_SO_GET_INFO, &replace, &optlen))
  3662  			failmsg("ebtable: getsockopt(EBT_SO_GET_INFO)", "table=%s", table->name);
  3663  		replace.num_counters = 0;
  3664  		table->replace.entries = 0;
  3665  		for (unsigned h = 0; h < NF_BR_NUMHOOKS; h++)
  3666  			table->replace.hook_entry[h] = 0;
  3667  		if (memcmp(&table->replace, &replace, sizeof(table->replace)) == 0) {
  3668  			char entrytable[XT_TABLE_SIZE];
  3669  			memset(&entrytable, 0, sizeof(entrytable));
  3670  			replace.entries = entrytable;
  3671  			optlen = sizeof(replace) + replace.entries_size;
  3672  			if (getsockopt(fd, SOL_IP, EBT_SO_GET_ENTRIES, &replace, &optlen))
  3673  				failmsg("ebtable: getsockopt(EBT_SO_GET_ENTRIES) failed", "table=%s", table->name);
  3674  			if (memcmp(table->entrytable, entrytable, replace.entries_size) == 0)
  3675  				continue;
  3676  		}
  3677  		debug("ebtable %s: resetting\n", table->name);
  3678  		// Kernel does not seem to return actual entry points (wat?).
  3679  		for (unsigned j = 0, h = 0; h < NF_BR_NUMHOOKS; h++) {
  3680  			if (table->replace.valid_hooks & (1 << h)) {
  3681  				table->replace.hook_entry[h] = (struct ebt_entries*)table->entrytable + j;
  3682  				j++;
  3683  			}
  3684  		}
  3685  		table->replace.entries = table->entrytable;
  3686  		optlen = sizeof(table->replace) + table->replace.entries_size;
  3687  		if (setsockopt(fd, SOL_IP, EBT_SO_SET_ENTRIES, &table->replace, optlen))
  3688  			failmsg("ebtable: setsockopt(EBT_SO_SET_ENTRIES) failed", "table=%s", table->name);
  3689  	}
  3690  	close(fd);
  3691  }
  3692  
  3693  static void checkpoint_net_namespace(void)
  3694  {
  3695  #if SYZ_EXECUTOR
  3696  	if (!flag_net_reset || flag_sandbox_setuid)
  3697  		return;
  3698  #endif
  3699  	checkpoint_ebtables();
  3700  	checkpoint_arptables();
  3701  	checkpoint_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP);
  3702  	checkpoint_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6);
  3703  }
  3704  
  3705  static void reset_net_namespace(void)
  3706  {
  3707  #if SYZ_EXECUTOR
  3708  	if (!flag_net_reset || flag_sandbox_setuid)
  3709  		return;
  3710  #endif
  3711  	reset_ebtables();
  3712  	reset_arptables();
  3713  	reset_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP);
  3714  	reset_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6);
  3715  }
  3716  #endif
  3717  
  3718  #if SYZ_EXECUTOR || (SYZ_CGROUPS && (SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID))
  3719  #include <fcntl.h>
  3720  #include <string.h>
  3721  #include <sys/mount.h>
  3722  #include <sys/stat.h>
  3723  #include <sys/types.h>
  3724  
  3725  static void mount_cgroups(const char* dir, const char** controllers, int count)
  3726  {
  3727  	if (mkdir(dir, 0777)) {
  3728  		debug("mkdir(%s) failed: %d\n", dir, errno);
  3729  		return;
  3730  	}
  3731  	// First, probe one-by-one to understand what controllers are present.
  3732  	char enabled[128] = {0};
  3733  	int i = 0;
  3734  	for (; i < count; i++) {
  3735  		if (mount("none", dir, "cgroup", 0, controllers[i])) {
  3736  			debug("mount(%s, %s) failed: %d\n", dir, controllers[i], errno);
  3737  			continue;
  3738  		}
  3739  		umount(dir);
  3740  		strcat(enabled, ",");
  3741  		strcat(enabled, controllers[i]);
  3742  	}
  3743  	if (enabled[0] == 0) {
  3744  		if (rmdir(dir) && errno != EBUSY)
  3745  			failmsg("rmdir failed", "dir=%s", dir);
  3746  		return;
  3747  	}
  3748  	// Now mount all at once.
  3749  	if (mount("none", dir, "cgroup", 0, enabled + 1)) {
  3750  		// In systemd/stretch images this is failing with EBUSY
  3751  		// (systemd starts messing with these mounts?),
  3752  		// so we don't fail, but just log the error.
  3753  		debug("mount(%s, %s) failed: %d\n", dir, enabled + 1, errno);
  3754  		if (rmdir(dir) && errno != EBUSY)
  3755  			failmsg("rmdir failed", "dir=%s enabled=%s", dir, enabled);
  3756  	}
  3757  	if (chmod(dir, 0777)) {
  3758  		debug("chmod(%s) failed: %d\n", dir, errno);
  3759  	}
  3760  }
  3761  
  3762  static void mount_cgroups2(const char** controllers, int count)
  3763  {
  3764  	if (mkdir("/syzcgroup/unified", 0777)) {
  3765  		debug("mkdir(/syzcgroup/unified) failed: %d\n", errno);
  3766  		return;
  3767  	}
  3768  	if (mount("none", "/syzcgroup/unified", "cgroup2", 0, NULL)) {
  3769  		debug("mount(cgroup2) failed: %d\n", errno);
  3770  		// For all cases when we don't end up mounting cgroup/cgroup2
  3771  		// in /syzcgroup/{unified,net,cpu}, we need to remove the dir.
  3772  		// Otherwise these will end up as normal dirs and the fuzzer may
  3773  		// create huge files there. These files won't be cleaned up
  3774  		// after tests and may easily consume all disk space.
  3775  		// EBUSY usually means that cgroup is already mounted there
  3776  		// by a previous run of e.g. syz-execprog.
  3777  		if (rmdir("/syzcgroup/unified") && errno != EBUSY)
  3778  			fail("rmdir(/syzcgroup/unified) failed");
  3779  		return;
  3780  	}
  3781  	if (chmod("/syzcgroup/unified", 0777)) {
  3782  		debug("chmod(/syzcgroup/unified) failed: %d\n", errno);
  3783  	}
  3784  	int control = open("/syzcgroup/unified/cgroup.subtree_control", O_WRONLY);
  3785  	if (control == -1)
  3786  		return;
  3787  	int i;
  3788  	for (i = 0; i < count; i++)
  3789  		if (write(control, controllers[i], strlen(controllers[i])) < 0) {
  3790  			debug("write(cgroup.subtree_control, %s) failed: %d\n", controllers[i], errno);
  3791  		}
  3792  	close(control);
  3793  }
  3794  
  3795  static void setup_cgroups()
  3796  {
  3797  	// We want to cover both cgroup and cgroup2.
  3798  	// Each resource controller can be bound to only one of them,
  3799  	// so to cover both we divide all controllers into 3 arbitrary groups.
  3800  	// One group is then bound to cgroup2/unified, and 2 other groups
  3801  	// are bound to 2 cgroup hierarchies.
  3802  	// Note: we need to enable controllers one-by-one for both cgroup and cgroup2.
  3803  	// If we enable all at the same time and one of them fails (b/c of older kernel
  3804  	// or not enabled configs), then all will fail.
  3805  	const char* unified_controllers[] = {"+cpu", "+io", "+pids"};
  3806  	const char* net_controllers[] = {"net", "net_prio", "devices", "blkio", "freezer"};
  3807  	const char* cpu_controllers[] = {"cpuset", "cpuacct", "hugetlb", "rlimit", "memory"};
  3808  	if (mkdir("/syzcgroup", 0777)) {
  3809  		// Can happen due to e.g. read-only file system (EROFS).
  3810  		debug("mkdir(/syzcgroup) failed: %d\n", errno);
  3811  		return;
  3812  	}
  3813  	mount_cgroups2(unified_controllers, sizeof(unified_controllers) / sizeof(unified_controllers[0]));
  3814  	mount_cgroups("/syzcgroup/net", net_controllers, sizeof(net_controllers) / sizeof(net_controllers[0]));
  3815  	mount_cgroups("/syzcgroup/cpu", cpu_controllers, sizeof(cpu_controllers) / sizeof(cpu_controllers[0]));
  3816  	write_file("/syzcgroup/cpu/cgroup.clone_children", "1");
  3817  	write_file("/syzcgroup/cpu/cpuset.memory_pressure_enabled", "1");
  3818  }
  3819  
  3820  #if (SYZ_EXECUTOR || SYZ_REPEAT) && SYZ_EXECUTOR_USES_FORK_SERVER
  3821  static void setup_cgroups_loop()
  3822  {
  3823  #if SYZ_EXECUTOR
  3824  	if (!flag_cgroups)
  3825  		return;
  3826  #endif
  3827  	int pid = getpid();
  3828  	char file[128];
  3829  	char cgroupdir[64];
  3830  	snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
  3831  	if (mkdir(cgroupdir, 0777)) {
  3832  		debug("mkdir(%s) failed: %d\n", cgroupdir, errno);
  3833  	}
  3834  	// Restrict number of pids per test process to prevent fork bombs.
  3835  	// We have up to 16 threads + main process + loop.
  3836  	// 32 pids should be enough for everyone.
  3837  	snprintf(file, sizeof(file), "%s/pids.max", cgroupdir);
  3838  	write_file(file, "32");
  3839  	// Setup some v1 groups to make things more interesting.
  3840  	snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
  3841  	write_file(file, "%d", pid);
  3842  	snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
  3843  	if (mkdir(cgroupdir, 0777)) {
  3844  		debug("mkdir(%s) failed: %d\n", cgroupdir, errno);
  3845  	}
  3846  	snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
  3847  	write_file(file, "%d", pid);
  3848  	// Restrict memory consumption.
  3849  	// We have some syscalls that inherently consume lots of memory,
  3850  	// e.g. mounting some filesystem images requires at least 128MB
  3851  	// image in memory. We restrict RLIMIT_AS to 200MB. Here we gradually
  3852  	// increase memory limits to make things more interesting.
  3853  	// Also this takes into account KASAN quarantine size.
  3854  	// If the limit is lower than KASAN quarantine size, then it can happen
  3855  	// so that we kill the process, but all of its memory is in quarantine
  3856  	// and is still accounted against memcg. As the result memcg won't
  3857  	// allow to allocate any memory in the parent and in the new test process.
  3858  	// The current limit of 300MB supports up to 9.6GB RAM (quarantine is 1/32).
  3859  	snprintf(file, sizeof(file), "%s/memory.soft_limit_in_bytes", cgroupdir);
  3860  	write_file(file, "%d", 299 << 20);
  3861  	snprintf(file, sizeof(file), "%s/memory.limit_in_bytes", cgroupdir);
  3862  	write_file(file, "%d", 300 << 20);
  3863  	snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
  3864  	if (mkdir(cgroupdir, 0777)) {
  3865  		debug("mkdir(%s) failed: %d\n", cgroupdir, errno);
  3866  	}
  3867  	snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
  3868  	write_file(file, "%d", pid);
  3869  }
  3870  
  3871  static void setup_cgroups_test()
  3872  {
  3873  #if SYZ_EXECUTOR
  3874  	if (!flag_cgroups)
  3875  		return;
  3876  #endif
  3877  	char cgroupdir[64];
  3878  	snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
  3879  	if (symlink(cgroupdir, "./cgroup")) {
  3880  		debug("symlink(%s, ./cgroup) failed: %d\n", cgroupdir, errno);
  3881  	}
  3882  	snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
  3883  	if (symlink(cgroupdir, "./cgroup.cpu")) {
  3884  		debug("symlink(%s, ./cgroup.cpu) failed: %d\n", cgroupdir, errno);
  3885  	}
  3886  	snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
  3887  	if (symlink(cgroupdir, "./cgroup.net")) {
  3888  		debug("symlink(%s, ./cgroup.net) failed: %d\n", cgroupdir, errno);
  3889  	}
  3890  }
  3891  #endif
  3892  
  3893  #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_NAMESPACE
  3894  static void initialize_cgroups()
  3895  {
  3896  #if SYZ_EXECUTOR
  3897  	if (!flag_cgroups)
  3898  		return;
  3899  #endif
  3900  	if (mkdir("./syz-tmp/newroot/syzcgroup", 0700))
  3901  		fail("mkdir failed");
  3902  	if (mkdir("./syz-tmp/newroot/syzcgroup/unified", 0700))
  3903  		fail("mkdir failed");
  3904  	if (mkdir("./syz-tmp/newroot/syzcgroup/cpu", 0700))
  3905  		fail("mkdir failed");
  3906  	if (mkdir("./syz-tmp/newroot/syzcgroup/net", 0700))
  3907  		fail("mkdir failed");
  3908  	unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE;
  3909  	if (mount("/syzcgroup/unified", "./syz-tmp/newroot/syzcgroup/unified", NULL, bind_mount_flags, NULL)) {
  3910  		debug("mount(cgroup2, MS_BIND) failed: %d\n", errno);
  3911  	}
  3912  	if (mount("/syzcgroup/cpu", "./syz-tmp/newroot/syzcgroup/cpu", NULL, bind_mount_flags, NULL)) {
  3913  		debug("mount(cgroup/cpu, MS_BIND) failed: %d\n", errno);
  3914  	}
  3915  	if (mount("/syzcgroup/net", "./syz-tmp/newroot/syzcgroup/net", NULL, bind_mount_flags, NULL)) {
  3916  		debug("mount(cgroup/net, MS_BIND) failed: %d\n", errno);
  3917  	}
  3918  }
  3919  #endif
  3920  #endif
  3921  
  3922  #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_NAMESPACE
  3923  static void setup_gadgetfs();
  3924  static void setup_binderfs();
  3925  static void setup_fusectl();
  3926  // Mount tmpfs and chroot into it in sandbox=none and sandbox=namespace.
  3927  // This is to prevent persistent changes to the root file system (e.g. setting attributes) that may
  3928  // hinder fuzzing.
  3929  // See https://github.com/google/syzkaller/issues/4939 for more details.
  3930  static void sandbox_common_mount_tmpfs(void)
  3931  {
  3932  	// Android systems set fs.mount-max to a very low value, causing ENOSPC when doing the mounts below
  3933  	// (see https://github.com/google/syzkaller/issues/4972). 100K mounts should be enough for everyone.
  3934  	write_file("/proc/sys/fs/mount-max", "100000");
  3935  	if (mkdir("./syz-tmp", 0777))
  3936  		fail("mkdir(syz-tmp) failed");
  3937  	if (mount("", "./syz-tmp", "tmpfs", 0, NULL))
  3938  		fail("mount(tmpfs) failed");
  3939  	if (mkdir("./syz-tmp/newroot", 0777))
  3940  		fail("mkdir failed");
  3941  	if (mkdir("./syz-tmp/newroot/dev", 0700))
  3942  		fail("mkdir failed");
  3943  	unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE;
  3944  	if (mount("/dev", "./syz-tmp/newroot/dev", NULL, bind_mount_flags, NULL))
  3945  		fail("mount(dev) failed");
  3946  	if (mkdir("./syz-tmp/newroot/proc", 0700))
  3947  		fail("mkdir failed");
  3948  	if (mount("syz-proc", "./syz-tmp/newroot/proc", "proc", 0, NULL))
  3949  		fail("mount(proc) failed");
  3950  	if (mkdir("./syz-tmp/newroot/selinux", 0700))
  3951  		fail("mkdir failed");
  3952  	// selinux mount used to be at /selinux, but then moved to /sys/fs/selinux.
  3953  	const char* selinux_path = "./syz-tmp/newroot/selinux";
  3954  	if (mount("/selinux", selinux_path, NULL, bind_mount_flags, NULL)) {
  3955  		if (errno != ENOENT)
  3956  			fail("mount(/selinux) failed");
  3957  		if (mount("/sys/fs/selinux", selinux_path, NULL, bind_mount_flags, NULL) && errno != ENOENT)
  3958  			fail("mount(/sys/fs/selinux) failed");
  3959  	}
  3960  	if (mkdir("./syz-tmp/newroot/sys", 0700))
  3961  		fail("mkdir(/sys) failed");
  3962  	if (mount("/sys", "./syz-tmp/newroot/sys", 0, bind_mount_flags, NULL))
  3963  		fail("mount(sysfs) failed");
  3964  	if (mount("/sys/kernel/debug", "./syz-tmp/newroot/sys/kernel/debug", NULL, bind_mount_flags, NULL) && errno != ENOENT)
  3965  		fail("mount(debug) failed");
  3966  	if (mount("/sys/fs/smackfs", "./syz-tmp/newroot/sys/fs/smackfs", NULL, bind_mount_flags, NULL) && errno != ENOENT)
  3967  		fail("mount(smackfs) failed");
  3968  	if (mount("/proc/sys/fs/binfmt_misc", "./syz-tmp/newroot/proc/sys/fs/binfmt_misc", NULL, bind_mount_flags, NULL) && errno != ENOENT)
  3969  		fail("mount(binfmt_misc) failed");
  3970  
  3971  	// If user wants to supply custom inputs, those can be placed to /syz-inputs
  3972  	// That folder will be mounted to fuzzer sandbox
  3973  	// https://groups.google.com/g/syzkaller/c/U-DISFjKLzg
  3974  	if (mkdir("./syz-tmp/newroot/syz-inputs", 0700))
  3975  		fail("mkdir(/syz-inputs) failed");
  3976  
  3977  	if (mount("/syz-inputs", "./syz-tmp/newroot/syz-inputs", NULL, bind_mount_flags | MS_RDONLY, NULL) && errno != ENOENT)
  3978  		fail("mount(syz-inputs) failed");
  3979  
  3980  #if SYZ_EXECUTOR || SYZ_CGROUPS
  3981  	initialize_cgroups();
  3982  #endif
  3983  	if (mkdir("./syz-tmp/pivot", 0777))
  3984  		fail("mkdir failed");
  3985  	if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) {
  3986  		debug("pivot_root failed\n");
  3987  		if (chdir("./syz-tmp"))
  3988  			fail("chdir failed");
  3989  	} else {
  3990  		debug("pivot_root OK\n");
  3991  		if (chdir("/"))
  3992  			fail("chdir failed");
  3993  		if (umount2("./pivot", MNT_DETACH))
  3994  			fail("umount failed");
  3995  	}
  3996  	if (chroot("./newroot"))
  3997  		fail("chroot failed");
  3998  	if (chdir("/"))
  3999  		fail("chdir failed");
  4000  	setup_gadgetfs();
  4001  	setup_binderfs();
  4002  	setup_fusectl();
  4003  }
  4004  #endif
  4005  
  4006  #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_NAMESPACE
  4007  #include <sys/mount.h>
  4008  #include <sys/stat.h>
  4009  
  4010  static void setup_gadgetfs()
  4011  {
  4012  	if (mkdir("/dev/gadgetfs", 0777)) {
  4013  		debug("mkdir(/dev/gadgetfs) failed: %d\n", errno);
  4014  	}
  4015  	if (mount("gadgetfs", "/dev/gadgetfs", "gadgetfs", 0, NULL)) {
  4016  		debug("mount of gadgetfs at /dev/gadgetfs failed: %d\n", errno);
  4017  	}
  4018  }
  4019  #endif
  4020  
  4021  #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID
  4022  #include <errno.h>
  4023  #include <sys/mount.h>
  4024  #include <sys/stat.h>
  4025  #include <unistd.h>
  4026  
  4027  static void setup_fusectl()
  4028  {
  4029  	if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) {
  4030  		debug("mount(fusectl) failed: %d\n", errno);
  4031  	}
  4032  }
  4033  
  4034  static void setup_binderfs()
  4035  {
  4036  	// NOTE: this function must be called after chroot.
  4037  	// Bind an instance of binderfs specific just to this executor - it will
  4038  	// only be visible in its mount namespace and will help isolate binder
  4039  	// devices during fuzzing.
  4040  	// These commands will just silently fail if binderfs is not supported.
  4041  	// Ideally it should have been added as a separate feature (with lots of
  4042  	// minor changes throughout the code base), but it seems to be an overkill
  4043  	// for just 2 simple lines of code.
  4044  	if (mkdir("/dev/binderfs", 0777)) {
  4045  		debug("mkdir(/dev/binderfs) failed: %d\n", errno);
  4046  	}
  4047  
  4048  	if (mount("binder", "/dev/binderfs", "binder", 0, NULL)) {
  4049  		debug("mount of binder at /dev/binderfs failed: %d\n", errno);
  4050  	}
  4051  #if !SYZ_EXECUTOR && !SYZ_USE_TMP_DIR
  4052  	// Do a local symlink right away.
  4053  	if (symlink("/dev/binderfs", "./binderfs")) {
  4054  		debug("symlink(/dev/binderfs, ./binderfs) failed: %d\n", errno);
  4055  	}
  4056  #endif
  4057  }
  4058  
  4059  #include <sched.h>
  4060  #include <sys/prctl.h>
  4061  #include <sys/resource.h>
  4062  #include <sys/time.h>
  4063  #include <sys/wait.h>
  4064  
  4065  static void loop();
  4066  
  4067  static void sandbox_common()
  4068  {
  4069  	prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
  4070  	if (getppid() == 1)
  4071  		exitf("the sandbox parent process was killed");
  4072  
  4073  #if SYZ_EXECUTOR || __NR_syz_init_net_socket || SYZ_DEVLINK_PCI || __NR_syz_socket_connect_nvme_tcp
  4074  	int netns = open("/proc/self/ns/net", O_RDONLY);
  4075  	if (netns == -1)
  4076  		fail("open(/proc/self/ns/net) failed");
  4077  	if (dup2(netns, kInitNetNsFd) < 0)
  4078  		fail("dup2(netns, kInitNetNsFd) failed");
  4079  	close(netns);
  4080  #endif
  4081  
  4082  	struct rlimit rlim;
  4083  #if SYZ_EXECUTOR
  4084  	rlim.rlim_cur = rlim.rlim_max = (200 << 20) +
  4085  					(kMaxThreads * kCoverSize + kExtraCoverSize) * sizeof(void*);
  4086  #else
  4087  	rlim.rlim_cur = rlim.rlim_max = (200 << 20);
  4088  #endif
  4089  	setrlimit(RLIMIT_AS, &rlim);
  4090  	rlim.rlim_cur = rlim.rlim_max = 32 << 20;
  4091  	setrlimit(RLIMIT_MEMLOCK, &rlim);
  4092  	rlim.rlim_cur = rlim.rlim_max = 136 << 20;
  4093  	setrlimit(RLIMIT_FSIZE, &rlim);
  4094  	rlim.rlim_cur = rlim.rlim_max = 1 << 20;
  4095  	setrlimit(RLIMIT_STACK, &rlim);
  4096  	// Note: core size is also restricted by RLIMIT_FSIZE.
  4097  	rlim.rlim_cur = rlim.rlim_max = 128 << 20;
  4098  	setrlimit(RLIMIT_CORE, &rlim);
  4099  	rlim.rlim_cur = rlim.rlim_max = 256; // see kMaxFd
  4100  	setrlimit(RLIMIT_NOFILE, &rlim);
  4101  
  4102  	// CLONE_NEWNS/NEWCGROUP cause EINVAL on some systems,
  4103  	// so we do them separately of clone in do_sandbox_namespace.
  4104  	if (unshare(CLONE_NEWNS)) {
  4105  		debug("unshare(CLONE_NEWNS): %d\n", errno);
  4106  	}
  4107  	if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
  4108  		debug("mount(\"/\", MS_REC | MS_PRIVATE): %d\n", errno);
  4109  	}
  4110  	if (unshare(CLONE_NEWIPC)) {
  4111  		debug("unshare(CLONE_NEWIPC): %d\n", errno);
  4112  	}
  4113  	if (unshare(0x02000000)) {
  4114  		debug("unshare(CLONE_NEWCGROUP): %d\n", errno);
  4115  	}
  4116  	if (unshare(CLONE_NEWUTS)) {
  4117  		debug("unshare(CLONE_NEWUTS): %d\n", errno);
  4118  	}
  4119  	if (unshare(CLONE_SYSVSEM)) {
  4120  		debug("unshare(CLONE_SYSVSEM): %d\n", errno);
  4121  	}
  4122  	// These sysctl's restrict ipc resource usage (by default it's possible
  4123  	// to eat all system memory by creating e.g. lots of large sem sets).
  4124  	// These sysctl's are per-namespace, so we need to set them inside
  4125  	// of the test ipc namespace (after CLONE_NEWIPC).
  4126  	typedef struct {
  4127  		const char* name;
  4128  		const char* value;
  4129  	} sysctl_t;
  4130  	static const sysctl_t sysctls[] = {
  4131  	    {"/proc/sys/kernel/shmmax", "16777216"},
  4132  	    {"/proc/sys/kernel/shmall", "536870912"},
  4133  	    {"/proc/sys/kernel/shmmni", "1024"},
  4134  	    {"/proc/sys/kernel/msgmax", "8192"},
  4135  	    {"/proc/sys/kernel/msgmni", "1024"},
  4136  	    {"/proc/sys/kernel/msgmnb", "1024"},
  4137  	    {"/proc/sys/kernel/sem", "1024 1048576 500 1024"},
  4138  	};
  4139  	unsigned i;
  4140  	for (i = 0; i < sizeof(sysctls) / sizeof(sysctls[0]); i++)
  4141  		write_file(sysctls[i].name, sysctls[i].value);
  4142  }
  4143  #endif
  4144  
  4145  #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE
  4146  static int wait_for_loop(int pid)
  4147  {
  4148  	if (pid < 0)
  4149  		fail("sandbox fork failed");
  4150  	debug("spawned loop pid %d\n", pid);
  4151  	int status = 0;
  4152  	while (waitpid(-1, &status, __WALL) != pid) {
  4153  	}
  4154  	return WEXITSTATUS(status);
  4155  }
  4156  #endif
  4157  
  4158  #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID
  4159  #include <linux/capability.h>
  4160  
  4161  static void drop_caps(void)
  4162  {
  4163  	struct __user_cap_header_struct cap_hdr = {};
  4164  	struct __user_cap_data_struct cap_data[2] = {};
  4165  	cap_hdr.version = _LINUX_CAPABILITY_VERSION_3;
  4166  	cap_hdr.pid = getpid();
  4167  	if (syscall(SYS_capget, &cap_hdr, &cap_data))
  4168  		fail("capget failed");
  4169  	// Drop CAP_SYS_PTRACE so that test processes can't attach to parent processes.
  4170  	// Previously it lead to hangs because the loop process stopped due to SIGSTOP.
  4171  	// Note that a process can always ptrace its direct children, which is enough for testing purposes.
  4172  	//
  4173  	// A process with CAP_SYS_NICE can bring kernel down by asking for too high SCHED_DEADLINE priority,
  4174  	// as the result rcu and other system services that use kernel threads will stop functioning.
  4175  	// Some parameters for SCHED_DEADLINE should be OK, but we don't have means to enforce
  4176  	// values of indirect syscall arguments. Peter Zijlstra proposed sysctl_deadline_period_{min,max}
  4177  	// which could be used to enfore safe limits without droppping CAP_SYS_NICE, but we don't have it yet.
  4178  	// See the following bug for details:
  4179  	// https://groups.google.com/forum/#!topic/syzkaller-bugs/G6Wl_PKPIWI
  4180  	const int drop = (1 << CAP_SYS_PTRACE) | (1 << CAP_SYS_NICE);
  4181  	cap_data[0].effective &= ~drop;
  4182  	cap_data[0].permitted &= ~drop;
  4183  	cap_data[0].inheritable &= ~drop;
  4184  	if (syscall(SYS_capset, &cap_hdr, &cap_data))
  4185  		fail("capset failed");
  4186  }
  4187  #endif
  4188  
  4189  #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE
  4190  #include <sched.h>
  4191  #include <sys/types.h>
  4192  
  4193  static int do_sandbox_none(void)
  4194  {
  4195  	// CLONE_NEWPID takes effect for the first child of the current process,
  4196  	// so we do it before fork to make the loop "init" process of the namespace.
  4197  	// We ought to do fail here, but sandbox=none is used in pkg/ipc tests
  4198  	// and they are usually run under non-root.
  4199  	// Also since debug is stripped by pkg/csource, we need to do {}
  4200  	// even though we generally don't do {} around single statements.
  4201  	if (unshare(CLONE_NEWPID)) {
  4202  		debug("unshare(CLONE_NEWPID): %d\n", errno);
  4203  	}
  4204  	int pid = fork();
  4205  	if (pid != 0)
  4206  		return wait_for_loop(pid);
  4207  
  4208  #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION
  4209  	initialize_vhci();
  4210  #endif
  4211  	sandbox_common();
  4212  	drop_caps();
  4213  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  4214  	initialize_netdevices_init();
  4215  #endif
  4216  	if (unshare(CLONE_NEWNET)) {
  4217  		debug("unshare(CLONE_NEWNET): %d\n", errno);
  4218  	}
  4219  	// Enable access to IPPROTO_ICMP sockets, must be done after CLONE_NEWNET.
  4220  	write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535");
  4221  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
  4222  	initialize_devlink_pci();
  4223  #endif
  4224  #if SYZ_EXECUTOR || SYZ_NET_INJECTION
  4225  	initialize_tun();
  4226  #endif
  4227  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  4228  	initialize_netdevices();
  4229  #endif
  4230  #if SYZ_EXECUTOR || SYZ_WIFI
  4231  	initialize_wifi_devices();
  4232  #endif
  4233  	sandbox_common_mount_tmpfs();
  4234  	loop();
  4235  	doexit(1);
  4236  }
  4237  #endif
  4238  
  4239  #if SYZ_EXECUTOR || SYZ_SANDBOX_SETUID
  4240  #include <grp.h>
  4241  #include <sched.h>
  4242  #include <sys/prctl.h>
  4243  
  4244  #define SYZ_HAVE_SANDBOX_SETUID 1
  4245  static int do_sandbox_setuid(void)
  4246  {
  4247  	if (unshare(CLONE_NEWPID)) {
  4248  		debug("unshare(CLONE_NEWPID): %d\n", errno);
  4249  	}
  4250  	int pid = fork();
  4251  	if (pid != 0)
  4252  		return wait_for_loop(pid);
  4253  
  4254  #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION
  4255  	initialize_vhci();
  4256  #endif
  4257  	sandbox_common();
  4258  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  4259  	initialize_netdevices_init();
  4260  #endif
  4261  	if (unshare(CLONE_NEWNET)) {
  4262  		debug("unshare(CLONE_NEWNET): %d\n", errno);
  4263  	}
  4264  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
  4265  	initialize_devlink_pci();
  4266  #endif
  4267  #if SYZ_EXECUTOR || SYZ_NET_INJECTION
  4268  	initialize_tun();
  4269  #endif
  4270  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  4271  	initialize_netdevices();
  4272  #endif
  4273  #if SYZ_EXECUTOR || SYZ_WIFI
  4274  	initialize_wifi_devices();
  4275  #endif
  4276  	setup_binderfs();
  4277  	setup_fusectl();
  4278  
  4279  	const int nobody = 65534;
  4280  	if (setgroups(0, NULL))
  4281  		fail("failed to setgroups");
  4282  	if (syscall(SYS_setresgid, nobody, nobody, nobody))
  4283  		fail("failed to setresgid");
  4284  	if (syscall(SYS_setresuid, nobody, nobody, nobody))
  4285  		fail("failed to setresuid");
  4286  
  4287  	// setresuid and setresgid clear the parent-death signal.
  4288  	prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
  4289  	// This is required to open /proc/self/ files.
  4290  	// Otherwise they are owned by root and we can't open them after setuid.
  4291  	// See task_dump_owner function in kernel.
  4292  	prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
  4293  
  4294  	loop();
  4295  	doexit(1);
  4296  }
  4297  #endif
  4298  
  4299  #if SYZ_EXECUTOR || SYZ_SANDBOX_NAMESPACE
  4300  #include <sched.h>
  4301  #include <sys/mman.h>
  4302  #include <sys/mount.h>
  4303  
  4304  static int real_uid;
  4305  static int real_gid;
  4306  __attribute__((aligned(64 << 10))) static char sandbox_stack[1 << 20];
  4307  
  4308  static int namespace_sandbox_proc(void* arg)
  4309  {
  4310  	sandbox_common();
  4311  
  4312  	// /proc/self/setgroups is not present on some systems, ignore error.
  4313  	write_file("/proc/self/setgroups", "deny");
  4314  	if (!write_file("/proc/self/uid_map", "0 %d 1\n", real_uid))
  4315  		fail("write of /proc/self/uid_map failed");
  4316  	if (!write_file("/proc/self/gid_map", "0 %d 1\n", real_gid))
  4317  		fail("write of /proc/self/gid_map failed");
  4318  
  4319  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  4320  	initialize_netdevices_init();
  4321  #endif
  4322  	// CLONE_NEWNET must always happen before tun setup,
  4323  	// because we want the tun device in the test namespace.
  4324  	if (unshare(CLONE_NEWNET))
  4325  		fail("unshare(CLONE_NEWNET)");
  4326  	// Enable access to IPPROTO_ICMP sockets, must be done after CLONE_NEWNET.
  4327  	write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535");
  4328  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
  4329  	initialize_devlink_pci();
  4330  #endif
  4331  #if SYZ_EXECUTOR || SYZ_NET_INJECTION
  4332  	// We setup tun here as it needs to be in the test net namespace,
  4333  	// which in turn needs to be in the test user namespace.
  4334  	// However, IFF_NAPI_FRAGS will fail as we are not root already.
  4335  	// TODO: we should create tun in the init net namespace and use setns
  4336  	// to move it to the target namespace.
  4337  	initialize_tun();
  4338  #endif
  4339  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  4340  	initialize_netdevices();
  4341  #endif
  4342  #if SYZ_EXECUTOR || SYZ_WIFI
  4343  	initialize_wifi_devices();
  4344  #endif
  4345  
  4346  	sandbox_common_mount_tmpfs();
  4347  	drop_caps();
  4348  
  4349  	loop();
  4350  	doexit(1);
  4351  }
  4352  
  4353  #define SYZ_HAVE_SANDBOX_NAMESPACE 1
  4354  static int do_sandbox_namespace(void)
  4355  {
  4356  #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION
  4357  	// HCIDEVUP requires CAP_ADMIN, so this needs to happen early.
  4358  	initialize_vhci();
  4359  #endif
  4360  	real_uid = getuid();
  4361  	real_gid = getgid();
  4362  	mprotect(sandbox_stack, 4096, PROT_NONE); // to catch stack underflows
  4363  	int pid = clone(namespace_sandbox_proc, &sandbox_stack[sizeof(sandbox_stack) - 64],
  4364  			CLONE_NEWUSER | CLONE_NEWPID, 0);
  4365  	return wait_for_loop(pid);
  4366  }
  4367  #endif
  4368  
  4369  #if SYZ_EXECUTOR || SYZ_SANDBOX_ANDROID
  4370  // seccomp only supported for Arm, Arm64, X86, and X86_64 archs
  4371  #if GOARCH_arm || GOARCH_arm64 || GOARCH_386 || GOARCH_amd64
  4372  #include <assert.h>
  4373  #include <errno.h>
  4374  #include <linux/audit.h>
  4375  #include <linux/filter.h>
  4376  #include <linux/seccomp.h>
  4377  #include <stddef.h>
  4378  #include <stdlib.h>
  4379  #include <sys/prctl.h>
  4380  #include <sys/syscall.h>
  4381  
  4382  #include "android/android_seccomp.h"
  4383  
  4384  #if GOARCH_amd64 || GOARCH_386
  4385  // Syz-executor is linked against glibc when fuzzing runs on Cuttlefish x86-x64.
  4386  // However Android blocks calls into mkdir, rmdir, symlink which causes
  4387  // syz-executor to crash. When fuzzing runs on Android device this issue
  4388  // is not observed, because syz-executor is linked against Bionic. Under
  4389  // the hood Bionic invokes mkdirat, inlinkat and symlinkat, which are
  4390  // allowed by seccomp-bpf.
  4391  // This issue may exist not only in Android, but also in Linux in general
  4392  // where seccomp filtering is enforced.
  4393  //
  4394  // This trick makes linker believe it matched the correct version of mkdir,
  4395  // rmdir, symlink. So now behavior is the same across ARM and non-ARM builds.
  4396  inline int mkdir(const char* path, mode_t mode)
  4397  {
  4398  	return mkdirat(AT_FDCWD, path, mode);
  4399  }
  4400  
  4401  inline int rmdir(const char* path)
  4402  {
  4403  	return unlinkat(AT_FDCWD, path, AT_REMOVEDIR);
  4404  }
  4405  
  4406  inline int symlink(const char* old_path, const char* new_path)
  4407  {
  4408  	return symlinkat(old_path, AT_FDCWD, new_path);
  4409  }
  4410  #endif
  4411  
  4412  #endif
  4413  #include <fcntl.h> // open(2)
  4414  #include <grp.h> // setgroups
  4415  #include <sys/xattr.h> // setxattr, getxattr
  4416  
  4417  #define AID_NET_BT_ADMIN 3001
  4418  #define AID_NET_BT 3002
  4419  #define AID_INET 3003
  4420  #define AID_EVERYBODY 9997
  4421  #define AID_APP 10000
  4422  
  4423  #define UNTRUSTED_APP_UID (AID_APP + 999)
  4424  #define UNTRUSTED_APP_GID (AID_APP + 999)
  4425  
  4426  #define SYSTEM_UID 1000
  4427  #define SYSTEM_GID 1000
  4428  
  4429  const char* const SELINUX_CONTEXT_UNTRUSTED_APP = "u:r:untrusted_app:s0:c512,c768";
  4430  const char* const SELINUX_LABEL_APP_DATA_FILE = "u:object_r:app_data_file:s0:c512,c768";
  4431  const char* const SELINUX_CONTEXT_FILE = "/proc/thread-self/attr/current";
  4432  const char* const SELINUX_XATTR_NAME = "security.selinux";
  4433  
  4434  const gid_t UNTRUSTED_APP_GROUPS[] = {UNTRUSTED_APP_GID, AID_NET_BT_ADMIN, AID_NET_BT, AID_INET, AID_EVERYBODY};
  4435  const size_t UNTRUSTED_APP_NUM_GROUPS = sizeof(UNTRUSTED_APP_GROUPS) / sizeof(UNTRUSTED_APP_GROUPS[0]);
  4436  
  4437  const gid_t SYSTEM_GROUPS[] = {SYSTEM_GID, AID_NET_BT_ADMIN, AID_NET_BT, AID_INET, AID_EVERYBODY};
  4438  const size_t SYSTEM_NUM_GROUPS = sizeof(SYSTEM_GROUPS) / sizeof(SYSTEM_GROUPS[0]);
  4439  
  4440  // Similar to libselinux getcon(3), but:
  4441  // - No library dependency
  4442  // - No dynamic memory allocation
  4443  // - Uses fail() instead of returning an error code
  4444  static void getcon(char* context, size_t context_size)
  4445  {
  4446  	int fd = open(SELINUX_CONTEXT_FILE, O_RDONLY);
  4447  	if (fd < 0)
  4448  		fail("getcon: couldn't open context file");
  4449  
  4450  	ssize_t nread = read(fd, context, context_size);
  4451  
  4452  	close(fd);
  4453  
  4454  	if (nread <= 0)
  4455  		fail("getcon: failed to read context file");
  4456  
  4457  	// The contents of the context file MAY end with a newline
  4458  	// and MAY not have a null terminator.  Handle this here.
  4459  	if (context[nread - 1] == '\n')
  4460  		context[nread - 1] = '\0';
  4461  }
  4462  
  4463  // Similar to libselinux setcon(3), but:
  4464  // - No library dependency
  4465  // - No dynamic memory allocation
  4466  // - Uses fail() instead of returning an error code
  4467  static void setcon(const char* context)
  4468  {
  4469  	char new_context[512];
  4470  
  4471  	// Attempt to write the new context
  4472  	int fd = open(SELINUX_CONTEXT_FILE, O_WRONLY);
  4473  
  4474  	if (fd < 0)
  4475  		fail("setcon: could not open context file");
  4476  
  4477  	ssize_t bytes_written = write(fd, context, strlen(context));
  4478  
  4479  	// N.B.: We cannot reuse this file descriptor, since the target SELinux context
  4480  	//       may not be able to read from it.
  4481  	close(fd);
  4482  
  4483  	if (bytes_written != (ssize_t)strlen(context))
  4484  		failmsg("setcon: could not write entire context", "wrote=%zi, expected=%zu", bytes_written, strlen(context));
  4485  
  4486  	// Validate the transition by checking the context
  4487  	getcon(new_context, sizeof(new_context));
  4488  
  4489  	if (strcmp(context, new_context) != 0)
  4490  		failmsg("setcon: failed to change", "want=%s, context=%s", context, new_context);
  4491  }
  4492  
  4493  // Similar to libselinux setfilecon(3), but:
  4494  // - No library dependency
  4495  // - No dynamic memory allocation
  4496  // - Uses fail() instead of returning an error code
  4497  static void setfilecon(const char* path, const char* context)
  4498  {
  4499  	char new_context[512];
  4500  
  4501  	if (setxattr(path, SELINUX_XATTR_NAME, context, strlen(context) + 1, 0) != 0)
  4502  		fail("setfilecon: setxattr failed");
  4503  	if (getxattr(path, SELINUX_XATTR_NAME, new_context, sizeof(new_context)) < 0)
  4504  		fail("setfilecon: getxattr failed");
  4505  	if (strcmp(context, new_context) != 0)
  4506  		failmsg("setfilecon: could not set context", "want=%s, got=%s", context, new_context);
  4507  }
  4508  
  4509  #define SYZ_HAVE_SANDBOX_ANDROID 1
  4510  
  4511  static int do_sandbox_android(uint64 sandbox_arg)
  4512  {
  4513  	setup_fusectl();
  4514  #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION
  4515  	initialize_vhci();
  4516  #endif
  4517  	sandbox_common();
  4518  	drop_caps();
  4519  
  4520  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  4521  	initialize_netdevices_init();
  4522  #endif
  4523  	// CLONE_NEWNET must always happen before tun setup, because we want the tun
  4524  	// device in the test namespace. If we don't do this, executor will crash with
  4525  	// SYZFATAL: executor NUM failed NUM times: executor NUM: EOF
  4526  	if (unshare(CLONE_NEWNET)) {
  4527  		debug("unshare(CLONE_NEWNET): %d\n", errno);
  4528  	}
  4529  	// Enable access to IPPROTO_ICMP sockets, must be done after CLONE_NEWNET.
  4530  	write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535");
  4531  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
  4532  	initialize_devlink_pci();
  4533  #endif
  4534  #if SYZ_EXECUTOR || SYZ_NET_INJECTION
  4535  	initialize_tun();
  4536  #endif
  4537  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  4538  	initialize_netdevices();
  4539  #endif
  4540  	uid_t uid = UNTRUSTED_APP_UID;
  4541  	size_t num_groups = UNTRUSTED_APP_NUM_GROUPS;
  4542  	const gid_t* groups = UNTRUSTED_APP_GROUPS;
  4543  	gid_t gid = UNTRUSTED_APP_GID;
  4544  	debug("executor received sandbox_arg=%llu\n", sandbox_arg);
  4545  	if (sandbox_arg == 1) {
  4546  		uid = SYSTEM_UID;
  4547  		num_groups = SYSTEM_NUM_GROUPS;
  4548  		groups = SYSTEM_GROUPS;
  4549  		gid = SYSTEM_GID;
  4550  
  4551  		debug("fuzzing under SYSTEM account\n");
  4552  	}
  4553  	if (chown(".", uid, uid) != 0)
  4554  		failmsg("do_sandbox_android: chmod failed", "sandbox_arg=%llu", sandbox_arg);
  4555  
  4556  	if (setgroups(num_groups, groups) != 0)
  4557  		failmsg("do_sandbox_android: setgroups failed", "sandbox_arg=%llu", sandbox_arg);
  4558  
  4559  	if (setresgid(gid, gid, gid) != 0)
  4560  		failmsg("do_sandbox_android: setresgid failed", "sandbox_arg=%llu", sandbox_arg);
  4561  
  4562  	setup_binderfs();
  4563  
  4564  #if GOARCH_arm || GOARCH_arm64 || GOARCH_386 || GOARCH_amd64
  4565  	// Will fail() if anything fails.
  4566  	// Must be called when the new process still has CAP_SYS_ADMIN, in this case,
  4567  	// before changing uid from 0, which clears capabilities.
  4568  	int account = SCFS_RestrictedApp;
  4569  	if (sandbox_arg == 1)
  4570  		account = SCFS_SystemAccount;
  4571  	set_app_seccomp_filter(account);
  4572  #endif
  4573  
  4574  	if (setresuid(uid, uid, uid) != 0)
  4575  		failmsg("do_sandbox_android: setresuid failed", "sandbox_arg=%llu", sandbox_arg);
  4576  
  4577  	// setresuid and setresgid clear the parent-death signal.
  4578  	prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
  4579  
  4580  	setfilecon(".", SELINUX_LABEL_APP_DATA_FILE);
  4581  	if (uid == UNTRUSTED_APP_UID)
  4582  		setcon(SELINUX_CONTEXT_UNTRUSTED_APP);
  4583  
  4584  	loop();
  4585  	doexit(1);
  4586  }
  4587  #endif
  4588  
  4589  #if SYZ_EXECUTOR || SYZ_REPEAT && SYZ_USE_TMP_DIR
  4590  #include <dirent.h>
  4591  #include <errno.h>
  4592  #include <string.h>
  4593  #include <sys/ioctl.h>
  4594  #include <sys/mount.h>
  4595  
  4596  #define FS_IOC_SETFLAGS _IOW('f', 2, long)
  4597  
  4598  // One does not simply remove a directory.
  4599  // There can be mounts, so we need to try to umount.
  4600  // Moreover, a mount can be mounted several times, so we need to try to umount in a loop.
  4601  // Moreover, after umount a dir can become non-empty again, so we need another loop.
  4602  // Moreover, a mount can be re-mounted as read-only and then we will fail to make a dir empty.
  4603  static void remove_dir(const char* dir)
  4604  {
  4605  	int iter = 0;
  4606  	DIR* dp = 0;
  4607  
  4608  #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID
  4609  	// Starting from v6.9, it does no longer make sense to use MNT_DETACH, because
  4610  	// a loop device may only be reused in RW mode if no mounted filesystem keeps a
  4611  	// reference to it. So we have to umount them synchronously.
  4612  	// MNT_FORCE should hopefully prevent hangs for filesystems that may require a complex cleanup.
  4613  	//
  4614  	// This declaration should not be moved under retry label, since label followed by a declaration
  4615  	// is not supported by old compilers.
  4616  	const int umount_flags = MNT_FORCE | UMOUNT_NOFOLLOW;
  4617  #endif
  4618  
  4619  retry:
  4620  #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID
  4621  #if SYZ_EXECUTOR
  4622  	if (!flag_sandbox_android)
  4623  #endif
  4624  		while (umount2(dir, umount_flags) == 0) {
  4625  			debug("umount(%s)\n", dir);
  4626  		}
  4627  #endif
  4628  	dp = opendir(dir);
  4629  	if (dp == NULL) {
  4630  		if (errno == EMFILE) {
  4631  			// This happens when the test process casts prlimit(NOFILE) on us.
  4632  			// Ideally we somehow prevent test processes from messing with parent processes.
  4633  			// But full sandboxing is expensive, so let's ignore this error for now.
  4634  			exitf("opendir(%s) failed due to NOFILE, exiting", dir);
  4635  		}
  4636  		exitf("opendir(%s) failed", dir);
  4637  	}
  4638  	struct dirent* ep = 0;
  4639  	while ((ep = readdir(dp))) {
  4640  		if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0)
  4641  			continue;
  4642  		char filename[FILENAME_MAX];
  4643  		snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name);
  4644  		// If it's 9p mount with broken transport, lstat will fail.
  4645  		// So try to umount first.
  4646  #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID
  4647  #if SYZ_EXECUTOR
  4648  		if (!flag_sandbox_android)
  4649  #endif
  4650  			while (umount2(filename, umount_flags) == 0) {
  4651  				debug("umount(%s)\n", filename);
  4652  			}
  4653  #endif
  4654  		struct stat st;
  4655  		if (lstat(filename, &st))
  4656  			exitf("lstat(%s) failed", filename);
  4657  		if (S_ISDIR(st.st_mode)) {
  4658  			remove_dir(filename);
  4659  			continue;
  4660  		}
  4661  		int i;
  4662  		for (i = 0;; i++) {
  4663  			if (unlink(filename) == 0)
  4664  				break;
  4665  			if (errno == EPERM) {
  4666  				// Try to reset FS_XFLAG_IMMUTABLE.
  4667  				int fd = open(filename, O_RDONLY);
  4668  				if (fd != -1) {
  4669  					long flags = 0;
  4670  					if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) {
  4671  						debug("reset FS_XFLAG_IMMUTABLE\n");
  4672  					}
  4673  					close(fd);
  4674  					continue;
  4675  				}
  4676  			}
  4677  			if (errno == EROFS) {
  4678  				debug("ignoring EROFS\n");
  4679  				break;
  4680  			}
  4681  			if (errno != EBUSY || i > 100)
  4682  				exitf("unlink(%s) failed", filename);
  4683  #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID
  4684  #if SYZ_EXECUTOR
  4685  			if (!flag_sandbox_android) {
  4686  #endif
  4687  				debug("umount(%s)\n", filename);
  4688  				if (umount2(filename, umount_flags))
  4689  					exitf("umount(%s) failed", filename);
  4690  #if SYZ_EXECUTOR
  4691  			}
  4692  #endif
  4693  #endif
  4694  		}
  4695  	}
  4696  	closedir(dp);
  4697  	for (int i = 0;; i++) {
  4698  		if (rmdir(dir) == 0)
  4699  			break;
  4700  		if (i < 100) {
  4701  			if (errno == EPERM) {
  4702  				// Try to reset FS_XFLAG_IMMUTABLE.
  4703  				int fd = open(dir, O_RDONLY);
  4704  				if (fd != -1) {
  4705  					long flags = 0;
  4706  					if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) {
  4707  						debug("reset FS_XFLAG_IMMUTABLE\n");
  4708  					}
  4709  					close(fd);
  4710  					continue;
  4711  				}
  4712  			}
  4713  			if (errno == EROFS) {
  4714  				debug("ignoring EROFS\n");
  4715  				break;
  4716  			}
  4717  			if (errno == EBUSY) {
  4718  #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID
  4719  #if SYZ_EXECUTOR
  4720  				if (!flag_sandbox_android) {
  4721  #endif
  4722  					debug("umount(%s)\n", dir);
  4723  					if (umount2(dir, umount_flags))
  4724  						exitf("umount(%s) failed", dir);
  4725  #if SYZ_EXECUTOR
  4726  				}
  4727  #endif
  4728  #endif
  4729  				continue;
  4730  			}
  4731  			if (errno == ENOTEMPTY) {
  4732  				if (iter < 100) {
  4733  					iter++;
  4734  					goto retry;
  4735  				}
  4736  			}
  4737  		}
  4738  		exitf("rmdir(%s) failed", dir);
  4739  	}
  4740  }
  4741  #endif
  4742  
  4743  #if SYZ_EXECUTOR || SYZ_FAULT
  4744  #include <fcntl.h>
  4745  #include <string.h>
  4746  #include <sys/stat.h>
  4747  #include <sys/types.h>
  4748  
  4749  static int inject_fault(int nth)
  4750  {
  4751  	int fd;
  4752  	fd = open("/proc/thread-self/fail-nth", O_RDWR);
  4753  	// We treat errors here as temporal/non-critical because we see
  4754  	// occasional ENOENT/EACCES errors returned. It seems that fuzzer
  4755  	// somehow gets its hands to it.
  4756  	if (fd == -1)
  4757  		exitf("failed to open /proc/thread-self/fail-nth");
  4758  	char buf[16];
  4759  	sprintf(buf, "%d", nth);
  4760  	if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf))
  4761  		exitf("failed to write /proc/thread-self/fail-nth");
  4762  	return fd;
  4763  }
  4764  #endif
  4765  
  4766  #if SYZ_EXECUTOR
  4767  static int fault_injected(int fail_fd)
  4768  {
  4769  	char buf[16];
  4770  	int n = read(fail_fd, buf, sizeof(buf) - 1);
  4771  	if (n <= 0)
  4772  		exitf("failed to read /proc/thread-self/fail-nth");
  4773  	int res = n == 2 && buf[0] == '0' && buf[1] == '\n';
  4774  	buf[0] = '0';
  4775  	if (write(fail_fd, buf, 1) != 1)
  4776  		exitf("failed to write /proc/thread-self/fail-nth");
  4777  	close(fail_fd);
  4778  	return res;
  4779  }
  4780  #endif
  4781  
  4782  #if (SYZ_EXECUTOR || SYZ_REPEAT) && SYZ_EXECUTOR_USES_FORK_SERVER
  4783  #include <dirent.h>
  4784  #include <errno.h>
  4785  #include <fcntl.h>
  4786  #include <signal.h>
  4787  #include <string.h>
  4788  #include <sys/stat.h>
  4789  #include <sys/types.h>
  4790  #include <sys/wait.h>
  4791  
  4792  static void kill_and_wait(int pid, int* status)
  4793  {
  4794  	kill(-pid, SIGKILL);
  4795  	kill(pid, SIGKILL);
  4796  	// First, give it up to 100 ms to surrender.
  4797  	for (int i = 0; i < 100; i++) {
  4798  		if (waitpid(-1, status, WNOHANG | __WALL) == pid)
  4799  			return;
  4800  		usleep(1000);
  4801  	}
  4802  	// Now, try to abort fuse connections as they cause deadlocks,
  4803  	// see Documentation/filesystems/fuse.txt for details.
  4804  	// There is no good way to figure out the right connections
  4805  	// provided that the process could use unshare(CLONE_NEWNS),
  4806  	// so we abort all.
  4807  	debug("kill is not working\n");
  4808  	DIR* dir = opendir("/sys/fs/fuse/connections");
  4809  	if (dir) {
  4810  		for (;;) {
  4811  			struct dirent* ent = readdir(dir);
  4812  			if (!ent)
  4813  				break;
  4814  			if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
  4815  				continue;
  4816  			char abort[300];
  4817  			snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name);
  4818  			int fd = open(abort, O_WRONLY);
  4819  			if (fd == -1) {
  4820  				debug("failed to open %s: %d\n", abort, errno);
  4821  				continue;
  4822  			}
  4823  			debug("aborting fuse conn %s\n", ent->d_name);
  4824  			if (write(fd, abort, 1) < 0) {
  4825  				debug("failed to abort: %d\n", errno);
  4826  			}
  4827  			close(fd);
  4828  		}
  4829  		closedir(dir);
  4830  	} else {
  4831  		debug("failed to open /sys/fs/fuse/connections: %d\n", errno);
  4832  	}
  4833  	// Now, just wait, no other options.
  4834  	while (waitpid(-1, status, __WALL) != pid) {
  4835  	}
  4836  }
  4837  #endif
  4838  
  4839  #if (SYZ_EXECUTOR || SYZ_REPEAT && (SYZ_CGROUPS || SYZ_NET_RESET)) && SYZ_EXECUTOR_USES_FORK_SERVER
  4840  #include <fcntl.h>
  4841  #include <sys/ioctl.h>
  4842  #include <sys/stat.h>
  4843  #include <sys/types.h>
  4844  #include <unistd.h>
  4845  
  4846  #define SYZ_HAVE_SETUP_LOOP 1
  4847  static void setup_loop()
  4848  {
  4849  #if SYZ_EXECUTOR || SYZ_CGROUPS
  4850  	setup_cgroups_loop();
  4851  #endif
  4852  #if SYZ_EXECUTOR || SYZ_NET_RESET
  4853  	checkpoint_net_namespace();
  4854  #endif
  4855  }
  4856  #endif
  4857  
  4858  #if (SYZ_EXECUTOR || SYZ_REPEAT && (SYZ_NET_RESET || __NR_syz_mount_image || __NR_syz_read_part_table)) && SYZ_EXECUTOR_USES_FORK_SERVER
  4859  #define SYZ_HAVE_RESET_LOOP 1
  4860  static void reset_loop()
  4861  {
  4862  #if SYZ_EXECUTOR || __NR_syz_mount_image || __NR_syz_read_part_table
  4863  	char buf[64];
  4864  	snprintf(buf, sizeof(buf), "/dev/loop%llu", procid);
  4865  	int loopfd = open(buf, O_RDWR);
  4866  	if (loopfd != -1) {
  4867  		ioctl(loopfd, LOOP_CLR_FD, 0);
  4868  		close(loopfd);
  4869  	}
  4870  #endif
  4871  #if SYZ_EXECUTOR || SYZ_NET_RESET
  4872  	reset_net_namespace();
  4873  #endif
  4874  }
  4875  #endif
  4876  
  4877  #if (SYZ_EXECUTOR || SYZ_REPEAT) && SYZ_EXECUTOR_USES_FORK_SERVER
  4878  #include <sys/prctl.h>
  4879  #include <unistd.h>
  4880  
  4881  #define SYZ_HAVE_SETUP_TEST 1
  4882  static void setup_test()
  4883  {
  4884  	prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
  4885  	// We don't check for getppid() == 1 here b/c of unshare(CLONE_NEWPID),
  4886  	// our parent is normally pid 1.
  4887  	setpgrp();
  4888  #if SYZ_EXECUTOR || SYZ_CGROUPS
  4889  	setup_cgroups_test();
  4890  #endif
  4891  	// It's the leaf test process we want to be always killed first.
  4892  	write_file("/proc/self/oom_score_adj", "1000");
  4893  #if SYZ_EXECUTOR || SYZ_NET_INJECTION
  4894  	// Read all remaining packets from tun to better
  4895  	// isolate consequently executing programs.
  4896  	flush_tun();
  4897  #endif
  4898  #if SYZ_EXECUTOR || SYZ_USE_TMP_DIR
  4899  	// Add a binderfs symlink to the tmp folder.
  4900  	if (symlink("/dev/binderfs", "./binderfs")) {
  4901  		debug("symlink(/dev/binderfs, ./binderfs) failed: %d", errno);
  4902  	}
  4903  #endif
  4904  }
  4905  #endif
  4906  
  4907  #if SYZ_EXECUTOR || SYZ_CLOSE_FDS
  4908  #include <sys/syscall.h>
  4909  #define SYZ_HAVE_CLOSE_FDS 1
  4910  static void close_fds()
  4911  {
  4912  #if SYZ_EXECUTOR
  4913  	if (!flag_close_fds)
  4914  		return;
  4915  #endif
  4916  #ifdef SYS_close_range
  4917  	if (!syscall(SYS_close_range, 3, MAX_FDS, 0))
  4918  		return;
  4919  #endif
  4920  	// Keeping a 9p transport pipe open will hang the proccess dead,
  4921  	// so close all opened file descriptors.
  4922  	// Also close all USB emulation descriptors to trigger exit from USB
  4923  	// event loop to collect coverage.
  4924  	for (int fd = 3; fd < MAX_FDS; fd++)
  4925  		close(fd);
  4926  }
  4927  #endif
  4928  
  4929  #if SYZ_EXECUTOR || SYZ_FAULT
  4930  #include <errno.h>
  4931  
  4932  static const char* setup_fault()
  4933  {
  4934  	int fd = open("/proc/self/make-it-fail", O_WRONLY);
  4935  	if (fd == -1)
  4936  		return "CONFIG_FAULT_INJECTION is not enabled";
  4937  	close(fd);
  4938  
  4939  	fd = open("/proc/thread-self/fail-nth", O_WRONLY);
  4940  	if (fd == -1)
  4941  		return "kernel does not have systematic fault injection support";
  4942  	close(fd);
  4943  
  4944  	static struct {
  4945  		const char* file;
  4946  		const char* val;
  4947  		bool fatal;
  4948  	} files[] = {
  4949  	    {"/sys/kernel/debug/failslab/ignore-gfp-wait", "N", true},
  4950  	    // These are enabled by separate configs (e.g. CONFIG_FAIL_FUTEX)
  4951  	    // and we did not check all of them in host.checkFaultInjection, so we ignore errors.
  4952  	    {"/sys/kernel/debug/fail_futex/ignore-private", "N", false},
  4953  	    {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem", "N", false},
  4954  	    {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-wait", "N", false},
  4955  	    {"/sys/kernel/debug/fail_page_alloc/min-order", "0", false},
  4956  	};
  4957  	unsigned i;
  4958  	for (i = 0; i < sizeof(files) / sizeof(files[0]); i++) {
  4959  		if (!write_file(files[i].file, files[i].val)) {
  4960  			debug("failed to write %s: %d\n", files[i].file, errno);
  4961  			if (files[i].fatal)
  4962  				return "failed to write fault injection file";
  4963  		}
  4964  	}
  4965  	return NULL;
  4966  }
  4967  #endif
  4968  
  4969  #if SYZ_EXECUTOR || SYZ_LEAK
  4970  #include <fcntl.h>
  4971  #include <stdio.h>
  4972  #include <string.h>
  4973  #include <sys/stat.h>
  4974  #include <sys/types.h>
  4975  
  4976  #define KMEMLEAK_FILE "/sys/kernel/debug/kmemleak"
  4977  
  4978  static const char* setup_leak()
  4979  {
  4980  	if (!write_file(KMEMLEAK_FILE, "scan=off")) {
  4981  		if (errno == EBUSY)
  4982  			return "KMEMLEAK disabled: increase CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE"
  4983  			       " or unset CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF";
  4984  		return "failed to write(kmemleak, \"scan=off\")";
  4985  	}
  4986  	// Flush boot leaks.
  4987  	if (!write_file(KMEMLEAK_FILE, "scan"))
  4988  		return "failed to write(kmemleak, \"scan\")";
  4989  	sleep(5); // account for MSECS_MIN_AGE
  4990  	if (!write_file(KMEMLEAK_FILE, "scan"))
  4991  		return "failed to write(kmemleak, \"scan\")";
  4992  	if (!write_file(KMEMLEAK_FILE, "clear"))
  4993  		return "failed to write(kmemleak, \"clear\")";
  4994  	return NULL;
  4995  }
  4996  
  4997  #define SYZ_HAVE_LEAK_CHECK 1
  4998  #if SYZ_EXECUTOR
  4999  static void check_leaks(char** frames, int nframes)
  5000  #else
  5001  static void check_leaks(void)
  5002  #endif
  5003  {
  5004  	int fd = open(KMEMLEAK_FILE, O_RDWR);
  5005  	if (fd == -1)
  5006  		fail("failed to open(kmemleak)");
  5007  	// KMEMLEAK has false positives. To mitigate most of them, it checksums
  5008  	// potentially leaked objects, and reports them only on the next scan
  5009  	// iff the checksum does not change. Because of that we do the following
  5010  	// intricate dance:
  5011  	// Scan, sleep, scan again. At this point we can get some leaks.
  5012  	// If there are leaks, we sleep and scan again, this can remove
  5013  	// false leaks. Then, read kmemleak again. If we get leaks now, then
  5014  	// hopefully these are true positives during the previous testing cycle.
  5015  	uint64 start = current_time_ms();
  5016  	if (write(fd, "scan", 4) != 4)
  5017  		fail("failed to write(kmemleak, \"scan\")");
  5018  	sleep(1);
  5019  	// Account for MSECS_MIN_AGE
  5020  	// (1 second less because scanning will take at least a second).
  5021  	while (current_time_ms() - start < 4 * 1000)
  5022  		sleep(1);
  5023  	if (write(fd, "scan", 4) != 4)
  5024  		fail("failed to write(kmemleak, \"scan\")");
  5025  	static char buf[128 << 10];
  5026  	ssize_t n = read(fd, buf, sizeof(buf) - 1);
  5027  	if (n < 0)
  5028  		fail("failed to read(kmemleak)");
  5029  	int nleaks = 0;
  5030  	if (n != 0) {
  5031  		sleep(1);
  5032  		if (write(fd, "scan", 4) != 4)
  5033  			fail("failed to write(kmemleak, \"scan\")");
  5034  		if (lseek(fd, 0, SEEK_SET) < 0)
  5035  			fail("failed to lseek(kmemleak)");
  5036  		n = read(fd, buf, sizeof(buf) - 1);
  5037  		if (n < 0)
  5038  			fail("failed to read(kmemleak)");
  5039  		buf[n] = 0;
  5040  		char* pos = buf;
  5041  		char* end = buf + n;
  5042  		while (pos < end) {
  5043  			char* next = strstr(pos + 1, "unreferenced object");
  5044  			if (!next)
  5045  				next = end;
  5046  			char prev = *next;
  5047  			*next = 0;
  5048  #if SYZ_EXECUTOR
  5049  			int f;
  5050  			for (f = 0; f < nframes; f++) {
  5051  				if (strstr(pos, frames[f]))
  5052  					break;
  5053  			}
  5054  			if (f != nframes) {
  5055  				*next = prev;
  5056  				pos = next;
  5057  				continue;
  5058  			}
  5059  #endif
  5060  			// BUG in output should be recognized by manager.
  5061  			fprintf(stderr, "BUG: memory leak\n%s\n", pos);
  5062  			*next = prev;
  5063  			pos = next;
  5064  			nleaks++;
  5065  		}
  5066  	}
  5067  	if (write(fd, "clear", 5) != 5)
  5068  		fail("failed to write(kmemleak, \"clear\")");
  5069  	close(fd);
  5070  	if (nleaks)
  5071  		doexit(1);
  5072  }
  5073  #endif
  5074  
  5075  #if SYZ_EXECUTOR || SYZ_BINFMT_MISC
  5076  #include <fcntl.h>
  5077  #include <sys/mount.h>
  5078  #include <sys/stat.h>
  5079  #include <sys/types.h>
  5080  
  5081  static const char* setup_binfmt_misc()
  5082  {
  5083  	// EBUSY means it's already mounted here.
  5084  	if (mount(0, "/proc/sys/fs/binfmt_misc", "binfmt_misc", 0, 0) && errno != EBUSY) {
  5085  		debug("mount(binfmt_misc) failed: %d\n", errno);
  5086  		return NULL;
  5087  	}
  5088  	if (!write_file("/proc/sys/fs/binfmt_misc/register", ":syz0:M:0:\x01::./file0:") ||
  5089  	    !write_file("/proc/sys/fs/binfmt_misc/register", ":syz1:M:1:\x02::./file0:POC"))
  5090  		return "write(/proc/sys/fs/binfmt_misc/register) failed";
  5091  	return NULL;
  5092  }
  5093  #endif
  5094  
  5095  #if SYZ_EXECUTOR || SYZ_KCSAN
  5096  static const char* setup_kcsan()
  5097  {
  5098  	if (!write_file("/sys/kernel/debug/kcsan", "on"))
  5099  		return "write(/sys/kernel/debug/kcsan, on) failed";
  5100  	return NULL;
  5101  }
  5102  #endif
  5103  
  5104  #if SYZ_EXECUTOR || SYZ_USB
  5105  static const char* setup_usb()
  5106  {
  5107  	if (chmod("/dev/raw-gadget", 0666))
  5108  		return "failed to chmod /dev/raw-gadget";
  5109  	return NULL;
  5110  }
  5111  #endif
  5112  
  5113  #if SYZ_EXECUTOR || SYZ_SYSCTL
  5114  #include <errno.h>
  5115  #include <signal.h>
  5116  #include <stdio.h>
  5117  #include <string.h>
  5118  #include <sys/wait.h>
  5119  
  5120  static void setup_sysctl()
  5121  {
  5122  	// See ctrl-alt-del comment below.
  5123  	int cad_pid = fork();
  5124  	if (cad_pid < 0)
  5125  		fail("fork failed");
  5126  	if (cad_pid == 0) {
  5127  		for (;;)
  5128  			sleep(100);
  5129  	}
  5130  	char tmppid[32];
  5131  	snprintf(tmppid, sizeof(tmppid), "%d", cad_pid);
  5132  
  5133  	// TODO: consider moving all sysctl's into CMDLINE config later.
  5134  	// Kernel has support for setting sysctl's via command line since 3db978d480e28 (v5.8).
  5135  	struct {
  5136  		const char* name;
  5137  		const char* data;
  5138  	} files[] = {
  5139  #if GOARCH_amd64 || GOARCH_386
  5140  	    // nmi_check_duration() prints "INFO: NMI handler took too long" on slow debug kernels.
  5141  	    // It happens a lot in qemu, and the messages are frequently corrupted
  5142  	    // (intermixed with other kernel output as they are printed from NMI)
  5143  	    // and are not matched against the suppression in pkg/report.
  5144  	    // This write prevents these messages from being printed.
  5145  	    {"/sys/kernel/debug/x86/nmi_longest_ns", "10000000000"},
  5146  #endif
  5147  	    {"/proc/sys/kernel/hung_task_check_interval_secs", "20"},
  5148  	    // bpf_jit_kallsyms and disabling bpf_jit_harden are required
  5149  	    // for unwinding through bpf functions.
  5150  	    {"/proc/sys/net/core/bpf_jit_kallsyms", "1"},
  5151  	    {"/proc/sys/net/core/bpf_jit_harden", "0"},
  5152  	    // This is to provide more useful info in crash reports.
  5153  	    {"/proc/sys/kernel/kptr_restrict", "0"},
  5154  	    {"/proc/sys/kernel/softlockup_all_cpu_backtrace", "1"},
  5155  	    // This is to restrict effects of recursive exponential mounts, for details see
  5156  	    // "mnt: Add a per mount namespace limit on the number of mounts" commit.
  5157  	    {"/proc/sys/fs/mount-max", "100"},
  5158  	    // Dumping all tasks to console can take too long.
  5159  	    {"/proc/sys/vm/oom_dump_tasks", "0"},
  5160  	    // Executor hits lots of SIGSEGVs, no point in logging them.
  5161  	    {"/proc/sys/debug/exception-trace", "0"},
  5162  	    {"/proc/sys/kernel/printk", "7 4 1 3"},
  5163  	    // Faster gc (1 second) is intended to make tests more repeatable.
  5164  	    {"/proc/sys/kernel/keys/gc_delay", "1"},
  5165  	    // We always want to prefer killing the allocating test process rather than somebody else
  5166  	    // (sshd or another random test process).
  5167  	    {"/proc/sys/vm/oom_kill_allocating_task", "1"},
  5168  	    // This blocks some of the ways the fuzzer can trigger a reboot.
  5169  	    // ctrl-alt-del=0 tells kernel to signal cad_pid instead of rebooting.
  5170  	    // We set cad_pid to a transient process pid ctrl-alt-del a no-op.
  5171  	    // Note: we need to write a live process pid.
  5172  	    // For context see: https://groups.google.com/g/syzkaller-bugs/c/WqOY4TiRnFg/m/6P9u8lWZAQAJ
  5173  	    {"/proc/sys/kernel/ctrl-alt-del", "0"},
  5174  	    {"/proc/sys/kernel/cad_pid", tmppid},
  5175  
  5176  	};
  5177  	for (size_t i = 0; i < sizeof(files) / sizeof(files[0]); i++) {
  5178  		if (!write_file(files[i].name, files[i].data)) {
  5179  			debug("write to %s failed: %s\n", files[i].name, strerror(errno));
  5180  		}
  5181  	}
  5182  	kill(cad_pid, SIGKILL);
  5183  	while (waitpid(cad_pid, NULL, 0) != cad_pid)
  5184  		;
  5185  }
  5186  #endif
  5187  
  5188  #if SYZ_EXECUTOR || SYZ_802154
  5189  #include <net/if.h>
  5190  #include <string.h>
  5191  #include <sys/socket.h>
  5192  #include <sys/types.h>
  5193  
  5194  #define NL802154_CMD_SET_SHORT_ADDR 11
  5195  #define NL802154_ATTR_IFINDEX 3
  5196  #define NL802154_ATTR_SHORT_ADDR 10
  5197  
  5198  static const char* setup_802154()
  5199  {
  5200  	const char* error = NULL;
  5201  	int sock_generic = -1;
  5202  	int sock_route = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  5203  	if (sock_route == -1) {
  5204  		error = "socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE) failed";
  5205  		goto fail;
  5206  	}
  5207  	sock_generic = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
  5208  	if (sock_generic == -1) {
  5209  		error = "socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC) failed";
  5210  		goto fail;
  5211  	}
  5212  	{
  5213  		int nl802154_family_id = netlink_query_family_id(&nlmsg, sock_generic, "nl802154", true);
  5214  		if (nl802154_family_id < 0) {
  5215  			error = "netlink_query_family_id failed";
  5216  			goto fail;
  5217  		}
  5218  
  5219  		for (int i = 0; i < 2; i++) {
  5220  			// wpan0/1 are created by CONFIG_IEEE802154_HWSIM.
  5221  			// sys/linux/socket_ieee802154.txt knowns about these names and consts.
  5222  			char devname[] = "wpan0";
  5223  			devname[strlen(devname) - 1] += i;
  5224  			uint64 hwaddr = 0xaaaaaaaaaaaa0002 + (i << 8);
  5225  			uint16 shortaddr = 0xaaa0 + i;
  5226  			int ifindex = if_nametoindex(devname);
  5227  			struct genlmsghdr genlhdr;
  5228  			memset(&genlhdr, 0, sizeof(genlhdr));
  5229  			genlhdr.cmd = NL802154_CMD_SET_SHORT_ADDR;
  5230  			netlink_init(&nlmsg, nl802154_family_id, 0, &genlhdr, sizeof(genlhdr));
  5231  			netlink_attr(&nlmsg, NL802154_ATTR_IFINDEX, &ifindex, sizeof(ifindex));
  5232  			netlink_attr(&nlmsg, NL802154_ATTR_SHORT_ADDR, &shortaddr, sizeof(shortaddr));
  5233  			if (netlink_send(&nlmsg, sock_generic) < 0) {
  5234  				error = "NL802154_CMD_SET_SHORT_ADDR failed";
  5235  				goto fail;
  5236  			}
  5237  			netlink_device_change(&nlmsg, sock_route, devname, true, 0, &hwaddr, sizeof(hwaddr), 0);
  5238  			if (i == 0) {
  5239  				netlink_add_device_impl(&nlmsg, "lowpan", "lowpan0", false);
  5240  				netlink_done(&nlmsg);
  5241  				netlink_attr(&nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex));
  5242  				if (netlink_send(&nlmsg, sock_route) < 0) {
  5243  					error = "netlink: adding device lowpan0 type lowpan link wpan0";
  5244  					goto fail;
  5245  				}
  5246  			}
  5247  		}
  5248  	}
  5249  fail:
  5250  	close(sock_route);
  5251  	close(sock_generic);
  5252  	return error;
  5253  }
  5254  #endif
  5255  
  5256  #if GOARCH_s390x
  5257  #include <sys/mman.h>
  5258  // Ugly way to work around gcc's "error: function called through a non-compatible type".
  5259  // Simply casting via (void*) inline does not work b/c gcc sees through a chain of casts.
  5260  // The macro is used in generated C code.
  5261  #define CAST(f) ({void* p = (void*)f; p; })
  5262  #endif
  5263  
  5264  #if SYZ_EXECUTOR || __NR_syz_fuse_handle_req
  5265  #include <fcntl.h>
  5266  #include <stddef.h>
  5267  #include <stdio.h>
  5268  #include <sys/stat.h>
  5269  #include <sys/types.h>
  5270  
  5271  // From linux/fuse.h
  5272  #define FUSE_MIN_READ_BUFFER 8192
  5273  
  5274  // From linux/fuse.h
  5275  enum fuse_opcode {
  5276  	FUSE_LOOKUP = 1,
  5277  	FUSE_FORGET = 2, // no reply
  5278  	FUSE_GETATTR = 3,
  5279  	FUSE_SETATTR = 4,
  5280  	FUSE_READLINK = 5,
  5281  	FUSE_SYMLINK = 6,
  5282  	FUSE_MKNOD = 8,
  5283  	FUSE_MKDIR = 9,
  5284  	FUSE_UNLINK = 10,
  5285  	FUSE_RMDIR = 11,
  5286  	FUSE_RENAME = 12,
  5287  	FUSE_LINK = 13,
  5288  	FUSE_OPEN = 14,
  5289  	FUSE_READ = 15,
  5290  	FUSE_WRITE = 16,
  5291  	FUSE_STATFS = 17,
  5292  	FUSE_RELEASE = 18,
  5293  	FUSE_FSYNC = 20,
  5294  	FUSE_SETXATTR = 21,
  5295  	FUSE_GETXATTR = 22,
  5296  	FUSE_LISTXATTR = 23,
  5297  	FUSE_REMOVEXATTR = 24,
  5298  	FUSE_FLUSH = 25,
  5299  	FUSE_INIT = 26,
  5300  	FUSE_OPENDIR = 27,
  5301  	FUSE_READDIR = 28,
  5302  	FUSE_RELEASEDIR = 29,
  5303  	FUSE_FSYNCDIR = 30,
  5304  	FUSE_GETLK = 31,
  5305  	FUSE_SETLK = 32,
  5306  	FUSE_SETLKW = 33,
  5307  	FUSE_ACCESS = 34,
  5308  	FUSE_CREATE = 35,
  5309  	FUSE_INTERRUPT = 36,
  5310  	FUSE_BMAP = 37,
  5311  	FUSE_DESTROY = 38,
  5312  	FUSE_IOCTL = 39,
  5313  	FUSE_POLL = 40,
  5314  	FUSE_NOTIFY_REPLY = 41,
  5315  	FUSE_BATCH_FORGET = 42,
  5316  	FUSE_FALLOCATE = 43,
  5317  	FUSE_READDIRPLUS = 44,
  5318  	FUSE_RENAME2 = 45,
  5319  	FUSE_LSEEK = 46,
  5320  	FUSE_COPY_FILE_RANGE = 47,
  5321  	FUSE_SETUPMAPPING = 48,
  5322  	FUSE_REMOVEMAPPING = 49,
  5323  	FUSE_SYNCFS = 50,
  5324  	FUSE_TMPFILE = 51,
  5325  	FUSE_STATX = 52,
  5326  
  5327  	// CUSE specific operations
  5328  	CUSE_INIT = 4096,
  5329  
  5330  	// Reserved opcodes: helpful to detect structure endian-ness
  5331  	CUSE_INIT_BSWAP_RESERVED = 1048576, // CUSE_INIT << 8
  5332  	FUSE_INIT_BSWAP_RESERVED = 436207616, // FUSE_INIT << 24
  5333  };
  5334  
  5335  // From linux/fuse.h
  5336  struct fuse_in_header {
  5337  	uint32 len;
  5338  	uint32 opcode;
  5339  	uint64 unique;
  5340  	uint64 nodeid;
  5341  	uint32 uid;
  5342  	uint32 gid;
  5343  	uint32 pid;
  5344  	uint32 padding;
  5345  };
  5346  
  5347  // From linux/fuse.h
  5348  struct fuse_out_header {
  5349  	uint32 len;
  5350  	// This is actually a int32_t but *_t variants fail to compile inside
  5351  	// the executor (it appends an additional _t for some reason) and int32
  5352  	// does not exist. Since we don't touch this field, defining it as
  5353  	// unsigned should not cause any problems.
  5354  	uint32 error;
  5355  	uint64 unique;
  5356  };
  5357  
  5358  // Struct shared between syz_fuse_handle_req() and the fuzzer. Used to provide
  5359  // a fuzzed response for each request type.
  5360  struct syz_fuse_req_out {
  5361  	struct fuse_out_header* init;
  5362  	struct fuse_out_header* lseek;
  5363  	struct fuse_out_header* bmap;
  5364  	struct fuse_out_header* poll;
  5365  	struct fuse_out_header* getxattr;
  5366  	struct fuse_out_header* lk;
  5367  	struct fuse_out_header* statfs;
  5368  	struct fuse_out_header* write;
  5369  	struct fuse_out_header* read;
  5370  	struct fuse_out_header* open;
  5371  	struct fuse_out_header* attr;
  5372  	struct fuse_out_header* entry;
  5373  	struct fuse_out_header* dirent;
  5374  	struct fuse_out_header* direntplus;
  5375  	struct fuse_out_header* create_open;
  5376  	struct fuse_out_header* ioctl;
  5377  	struct fuse_out_header* statx;
  5378  };
  5379  
  5380  // Link the reponse to the request and send it to /dev/fuse.
  5381  static int fuse_send_response(int fd,
  5382  			      const struct fuse_in_header* in_hdr,
  5383  			      struct fuse_out_header* out_hdr)
  5384  {
  5385  	if (!out_hdr) {
  5386  		debug("fuse_send_response: received a NULL out_hdr\n");
  5387  		return -1;
  5388  	}
  5389  
  5390  	out_hdr->unique = in_hdr->unique;
  5391  	if (write(fd, out_hdr, out_hdr->len) == -1) {
  5392  		debug("fuse_send_response > write failed: %d\n", errno);
  5393  		return -1;
  5394  	}
  5395  
  5396  	return 0;
  5397  }
  5398  
  5399  // This function reads a request from /dev/fuse and tries to pick the correct
  5400  // response from the input struct syz_fuse_req_out (a3). Responses are still
  5401  // generated by the fuzzer.
  5402  static volatile long syz_fuse_handle_req(volatile long a0, // /dev/fuse fd.
  5403  					 volatile long a1, // Read buffer.
  5404  					 volatile long a2, // Buffer len.
  5405  					 volatile long a3) // syz_fuse_req_out.
  5406  {
  5407  	struct syz_fuse_req_out* req_out = (struct syz_fuse_req_out*)a3;
  5408  	struct fuse_out_header* out_hdr = NULL;
  5409  	char* buf = (char*)a1;
  5410  	int buf_len = (int)a2;
  5411  	int fd = (int)a0;
  5412  
  5413  	if (!req_out) {
  5414  		debug("syz_fuse_handle_req: received a NULL syz_fuse_req_out\n");
  5415  		return -1;
  5416  	}
  5417  	if (buf_len < FUSE_MIN_READ_BUFFER) {
  5418  		debug("FUSE requires the read buffer to be at least %u\n", FUSE_MIN_READ_BUFFER);
  5419  		return -1;
  5420  	}
  5421  
  5422  	int ret = read(fd, buf, buf_len);
  5423  	if (ret == -1) {
  5424  		debug("syz_fuse_handle_req > read failed: %d\n", errno);
  5425  		return -1;
  5426  	}
  5427  	// Safe to do because ret > 0 (!= -1) and < FUSE_MIN_READ_BUFFER (= 8192).
  5428  	if ((size_t)ret < sizeof(struct fuse_in_header)) {
  5429  		debug("syz_fuse_handle_req: received a truncated FUSE header\n");
  5430  		return -1;
  5431  	}
  5432  
  5433  	const struct fuse_in_header* in_hdr = (const struct fuse_in_header*)buf;
  5434  	debug("syz_fuse_handle_req: received opcode %d\n", in_hdr->opcode);
  5435  	if (in_hdr->len > (uint32)ret) {
  5436  		debug("syz_fuse_handle_req: received a truncated message\n");
  5437  		return -1;
  5438  	}
  5439  
  5440  	switch (in_hdr->opcode) {
  5441  	case FUSE_GETATTR:
  5442  	case FUSE_SETATTR:
  5443  		out_hdr = req_out->attr;
  5444  		break;
  5445  	case FUSE_LOOKUP:
  5446  	case FUSE_SYMLINK:
  5447  	case FUSE_LINK:
  5448  	case FUSE_MKNOD:
  5449  	case FUSE_MKDIR:
  5450  		out_hdr = req_out->entry;
  5451  		break;
  5452  	case FUSE_OPEN:
  5453  	case FUSE_OPENDIR:
  5454  		out_hdr = req_out->open;
  5455  		break;
  5456  	case FUSE_STATFS:
  5457  		out_hdr = req_out->statfs;
  5458  		break;
  5459  	case FUSE_RMDIR:
  5460  	case FUSE_RENAME:
  5461  	case FUSE_RENAME2:
  5462  	case FUSE_FALLOCATE:
  5463  	case FUSE_SETXATTR:
  5464  	case FUSE_REMOVEXATTR:
  5465  	case FUSE_FSYNCDIR:
  5466  	case FUSE_FSYNC:
  5467  	case FUSE_SETLKW:
  5468  	case FUSE_SETLK:
  5469  	case FUSE_ACCESS:
  5470  	case FUSE_FLUSH:
  5471  	case FUSE_RELEASE:
  5472  	case FUSE_RELEASEDIR:
  5473  	case FUSE_UNLINK:
  5474  	case FUSE_DESTROY:
  5475  		// These opcodes do not have any reply data. Hence, we pick
  5476  		// another response and only use the shared header.
  5477  		out_hdr = req_out->init;
  5478  		if (!out_hdr) {
  5479  			debug("syz_fuse_handle_req: received a NULL out_hdr\n");
  5480  			return -1;
  5481  		}
  5482  		out_hdr->len = sizeof(struct fuse_out_header);
  5483  		break;
  5484  	case FUSE_READ:
  5485  		out_hdr = req_out->read;
  5486  		break;
  5487  	case FUSE_READDIR:
  5488  		out_hdr = req_out->dirent;
  5489  		break;
  5490  	case FUSE_READDIRPLUS:
  5491  		out_hdr = req_out->direntplus;
  5492  		break;
  5493  	case FUSE_INIT:
  5494  		out_hdr = req_out->init;
  5495  		break;
  5496  	case FUSE_LSEEK:
  5497  		out_hdr = req_out->lseek;
  5498  		break;
  5499  	case FUSE_GETLK:
  5500  		out_hdr = req_out->lk;
  5501  		break;
  5502  	case FUSE_BMAP:
  5503  		out_hdr = req_out->bmap;
  5504  		break;
  5505  	case FUSE_POLL:
  5506  		out_hdr = req_out->poll;
  5507  		break;
  5508  	case FUSE_GETXATTR:
  5509  	case FUSE_LISTXATTR:
  5510  		out_hdr = req_out->getxattr;
  5511  		break;
  5512  	case FUSE_WRITE:
  5513  	case FUSE_COPY_FILE_RANGE:
  5514  		out_hdr = req_out->write;
  5515  		break;
  5516  	case FUSE_FORGET:
  5517  	case FUSE_BATCH_FORGET:
  5518  		// FUSE_FORGET and FUSE_BATCH_FORGET expect no reply.
  5519  		return 0;
  5520  	case FUSE_CREATE:
  5521  		out_hdr = req_out->create_open;
  5522  		break;
  5523  	case FUSE_IOCTL:
  5524  		out_hdr = req_out->ioctl;
  5525  		break;
  5526  	case FUSE_STATX:
  5527  		out_hdr = req_out->statx;
  5528  		break;
  5529  	default:
  5530  		debug("syz_fuse_handle_req: unknown FUSE opcode\n");
  5531  		return -1;
  5532  	}
  5533  
  5534  	return fuse_send_response(fd, in_hdr, out_hdr);
  5535  }
  5536  #endif
  5537  
  5538  #if SYZ_EXECUTOR || __NR_syz_80211_inject_frame
  5539  #include <errno.h>
  5540  #include <linux/genetlink.h>
  5541  #include <linux/if_ether.h>
  5542  #include <linux/nl80211.h>
  5543  #include <net/if.h>
  5544  #include <sys/ioctl.h>
  5545  
  5546  // This pseudo syscall performs 802.11 frame injection.
  5547  //
  5548  // Its current implementation performs the injection by means of mac80211_hwsim.
  5549  // The procedure consists of the following steps:
  5550  // 1. Open a netlink socket
  5551  // 2. Register as an application responsible for wireless medium simulation by executing
  5552  //    HWSIM_CMD_REGISTER. This is a preq-requisite for the following step. After HWSIM_CMD_REGISTER
  5553  //    is executed, mac80211_hwsim stops simulating a perfect medium.
  5554  //    It is also important to note that this command registers a specific socket, not a netlink port.
  5555  // 3. Inject a frame to the required interface by executing HWSIM_CMD_FRAME.
  5556  // 4. Close the socket. mac80211_hwsim will detect this and return to perfect medium simulation.
  5557  //
  5558  // Note that we cannot (should not) open a socket, register it once and then use it for frame injection
  5559  // throughout the lifetime of a proc. When some socket is registered, mac80211_hwsim does not broadcast
  5560  // frames to all interfaces itself. As we do not perform this activity either, a permanently registered
  5561  // socket will disrupt normal network operation.
  5562  
  5563  #define HWSIM_ATTR_RX_RATE 5
  5564  #define HWSIM_ATTR_SIGNAL 6
  5565  #define HWSIM_ATTR_ADDR_RECEIVER 1
  5566  #define HWSIM_ATTR_FRAME 3
  5567  
  5568  #define WIFI_MAX_INJECT_LEN 2048
  5569  
  5570  static int hwsim_register_socket(struct nlmsg* nlmsg, int sock, int hwsim_family)
  5571  {
  5572  	struct genlmsghdr genlhdr;
  5573  	memset(&genlhdr, 0, sizeof(genlhdr));
  5574  	genlhdr.cmd = HWSIM_CMD_REGISTER;
  5575  	netlink_init(nlmsg, hwsim_family, 0, &genlhdr, sizeof(genlhdr));
  5576  	int err = netlink_send_ext(nlmsg, sock, 0, NULL, false);
  5577  	if (err < 0) {
  5578  		debug("hwsim_register_device failed: %s\n", strerror(errno));
  5579  	}
  5580  	return err;
  5581  }
  5582  
  5583  static int hwsim_inject_frame(struct nlmsg* nlmsg, int sock, int hwsim_family, uint8* mac_addr, uint8* data, int len)
  5584  {
  5585  	struct genlmsghdr genlhdr;
  5586  	uint32 rx_rate = WIFI_DEFAULT_RX_RATE;
  5587  	uint32 signal = WIFI_DEFAULT_SIGNAL;
  5588  
  5589  	memset(&genlhdr, 0, sizeof(genlhdr));
  5590  	genlhdr.cmd = HWSIM_CMD_FRAME;
  5591  	netlink_init(nlmsg, hwsim_family, 0, &genlhdr, sizeof(genlhdr));
  5592  	netlink_attr(nlmsg, HWSIM_ATTR_RX_RATE, &rx_rate, sizeof(rx_rate));
  5593  	netlink_attr(nlmsg, HWSIM_ATTR_SIGNAL, &signal, sizeof(signal));
  5594  	netlink_attr(nlmsg, HWSIM_ATTR_ADDR_RECEIVER, mac_addr, ETH_ALEN);
  5595  	netlink_attr(nlmsg, HWSIM_ATTR_FRAME, data, len);
  5596  	int err = netlink_send_ext(nlmsg, sock, 0, NULL, false);
  5597  	if (err < 0) {
  5598  		debug("hwsim_inject_frame failed: %s\n", strerror(errno));
  5599  	}
  5600  	return err;
  5601  }
  5602  
  5603  static long syz_80211_inject_frame(volatile long a0, volatile long a1, volatile long a2)
  5604  {
  5605  	uint8* mac_addr = (uint8*)a0;
  5606  	uint8* buf = (uint8*)a1;
  5607  	int buf_len = (int)a2;
  5608  	struct nlmsg tmp_msg;
  5609  
  5610  	if (buf_len < 0 || buf_len > WIFI_MAX_INJECT_LEN) {
  5611  		debug("syz_80211_inject_frame: wrong buffer size %d\n", buf_len);
  5612  		return -1;
  5613  	}
  5614  
  5615  	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
  5616  	if (sock < 0) {
  5617  		debug("syz_80211_inject_frame: socket creation failed, errno %d\n", errno);
  5618  		return -1;
  5619  	}
  5620  
  5621  	int hwsim_family_id = netlink_query_family_id(&tmp_msg, sock, "MAC80211_HWSIM", false);
  5622  	if (hwsim_family_id < 0) {
  5623  		debug("syz_80211_inject_frame: failed to query family id\n");
  5624  		close(sock);
  5625  		return -1;
  5626  	}
  5627  	int ret = hwsim_register_socket(&tmp_msg, sock, hwsim_family_id);
  5628  	if (ret < 0) {
  5629  		debug("syz_80211_inject_frame: failed to register socket, ret %d\n", ret);
  5630  		close(sock);
  5631  		return -1;
  5632  	}
  5633  
  5634  	ret = hwsim_inject_frame(&tmp_msg, sock, hwsim_family_id, mac_addr, buf, buf_len);
  5635  	close(sock);
  5636  	if (ret < 0) {
  5637  		debug("syz_80211_inject_frame: failed to inject message, ret %d\n", ret);
  5638  		return -1;
  5639  	}
  5640  
  5641  	return 0;
  5642  }
  5643  
  5644  #endif
  5645  
  5646  #if SYZ_EXECUTOR || __NR_syz_80211_join_ibss
  5647  
  5648  #define WIFI_MAX_SSID_LEN 32
  5649  
  5650  #define WIFI_JOIN_IBSS_NO_SCAN 0
  5651  #define WIFI_JOIN_IBSS_BG_SCAN 1
  5652  #define WIFI_JOIN_IBSS_BG_NO_SCAN 2
  5653  
  5654  static long syz_80211_join_ibss(volatile long a0, volatile long a1, volatile long a2, volatile long a3)
  5655  {
  5656  	char* interface = (char*)a0;
  5657  	uint8* ssid = (uint8*)a1;
  5658  	int ssid_len = (int)a2;
  5659  	int mode = (int)a3; // This parameter essentially determines whether it will perform a scan
  5660  
  5661  	struct nlmsg tmp_msg;
  5662  	uint8 bssid[ETH_ALEN] = WIFI_IBSS_BSSID;
  5663  
  5664  	if (ssid_len < 0 || ssid_len > WIFI_MAX_SSID_LEN) {
  5665  		debug("syz_80211_join_ibss: invalid ssid len %d\n", ssid_len);
  5666  		return -1;
  5667  	}
  5668  
  5669  	if (mode < 0 || mode > WIFI_JOIN_IBSS_BG_NO_SCAN) {
  5670  		debug("syz_80211_join_ibss: invalid mode %d\n", mode);
  5671  		return -1;
  5672  	}
  5673  
  5674  	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
  5675  	if (sock < 0) {
  5676  		debug("syz_80211_join_ibss: socket creation failed, errno %d\n", errno);
  5677  		return -1;
  5678  	}
  5679  
  5680  	int nl80211_family_id = netlink_query_family_id(&tmp_msg, sock, "nl80211", false);
  5681  	if (nl80211_family_id < 0) {
  5682  		debug("syz_80211_join_ibss: netlink_query_family_id failed\n");
  5683  		close(sock);
  5684  		return -1;
  5685  	}
  5686  	struct join_ibss_props ibss_props = {
  5687  	    .wiphy_freq = WIFI_DEFAULT_FREQUENCY,
  5688  	    .wiphy_freq_fixed = (mode == WIFI_JOIN_IBSS_NO_SCAN || mode == WIFI_JOIN_IBSS_BG_NO_SCAN),
  5689  	    .mac = bssid,
  5690  	    .ssid = ssid,
  5691  	    .ssid_len = ssid_len};
  5692  
  5693  	int ret = nl80211_setup_ibss_interface(&tmp_msg, sock, nl80211_family_id, interface, &ibss_props, false);
  5694  	close(sock);
  5695  	if (ret < 0) {
  5696  		debug("syz_80211_join_ibss: failed set up IBSS network for %.32s\n", interface);
  5697  		return -1;
  5698  	}
  5699  
  5700  	if (mode == WIFI_JOIN_IBSS_NO_SCAN) {
  5701  		ret = await_ifla_operstate(&tmp_msg, interface, IF_OPER_UP, false);
  5702  		if (ret < 0) {
  5703  			debug("syz_80211_join_ibss: await_ifla_operstate failed for %.32s, ret %d\n", interface, ret);
  5704  			return -1;
  5705  		}
  5706  	}
  5707  
  5708  	return 0;
  5709  }
  5710  
  5711  #endif
  5712  
  5713  #if SYZ_EXECUTOR || __NR_syz_clone || __NR_syz_clone3
  5714  #if SYZ_EXECUTOR
  5715  // The slowdown multiplier is already taken into account.
  5716  #define USLEEP_FORKED_CHILD (3 * syscall_timeout_ms * 1000)
  5717  #else
  5718  #define USLEEP_FORKED_CHILD (3 * /*{{{BASE_CALL_TIMEOUT_MS}}}*/ *1000)
  5719  #endif
  5720  
  5721  static long handle_clone_ret(long ret)
  5722  {
  5723  	if (ret != 0) {
  5724  #if SYZ_EXECUTOR || SYZ_HANDLE_SEGV
  5725  		__atomic_store_n(&clone_ongoing, 0, __ATOMIC_RELAXED);
  5726  #endif
  5727  		return ret;
  5728  	}
  5729  	// Exit if we're in the child process - not all kernels provide the proper means
  5730  	// to prevent fork-bombs.
  5731  	// But first sleep for some time. This will hopefully foster IPC fuzzing.
  5732  	usleep(USLEEP_FORKED_CHILD);
  5733  	// Note that exit_group is a bad choice here because if we created just a thread, then
  5734  	// the whole process will be killed. A plain exit will work fine in any case.
  5735  	syscall(__NR_exit, 0);
  5736  	while (1) {
  5737  	}
  5738  }
  5739  #endif
  5740  
  5741  #if SYZ_EXECUTOR || __NR_syz_clone
  5742  #include <sched.h>
  5743  
  5744  // syz_clone is mostly needed on kernels which do not suport clone3.
  5745  static long syz_clone(volatile long flags, volatile long stack, volatile long stack_len,
  5746  		      volatile long ptid, volatile long ctid, volatile long tls)
  5747  {
  5748  	// ABI requires 16-byte stack alignment.
  5749  	long sp = (stack + stack_len) & ~15;
  5750  #if SYZ_EXECUTOR || SYZ_HANDLE_SEGV
  5751  	__atomic_store_n(&clone_ongoing, 1, __ATOMIC_RELAXED);
  5752  #endif
  5753  	// Clear the CLONE_VM flag. Otherwise it'll very likely corrupt syz-executor.
  5754  	long ret = (long)syscall(__NR_clone, flags & ~CLONE_VM, sp, ptid, ctid, tls);
  5755  	return handle_clone_ret(ret);
  5756  }
  5757  #endif
  5758  
  5759  #if SYZ_EXECUTOR || __NR_syz_clone3
  5760  #include <linux/sched.h>
  5761  #include <sched.h>
  5762  
  5763  #define MAX_CLONE_ARGS_BYTES 256
  5764  static long syz_clone3(volatile long a0, volatile long a1)
  5765  {
  5766  	unsigned long copy_size = a1;
  5767  	if (copy_size < sizeof(uint64) || copy_size > MAX_CLONE_ARGS_BYTES)
  5768  		return -1;
  5769  	// The structure may have different sizes on different kernel versions, so copy it as raw bytes.
  5770  	char clone_args[MAX_CLONE_ARGS_BYTES];
  5771  	memcpy(&clone_args, (void*)a0, copy_size);
  5772  
  5773  	// As in syz_clone, clear the CLONE_VM flag. Flags are in the first 8-byte integer field.
  5774  	uint64* flags = (uint64*)&clone_args;
  5775  	*flags &= ~CLONE_VM;
  5776  #if SYZ_EXECUTOR || SYZ_HANDLE_SEGV
  5777  	__atomic_store_n(&clone_ongoing, 1, __ATOMIC_RELAXED);
  5778  #endif
  5779  	return handle_clone_ret((long)syscall(__NR_clone3, &clone_args, copy_size));
  5780  }
  5781  
  5782  #endif
  5783  
  5784  #if SYZ_EXECUTOR || __NR_syz_pkey_set
  5785  #include <errno.h>
  5786  #define RESERVED_PKEY 15
  5787  // syz_pkey_set(key pkey, val flags[pkey_flags])
  5788  static long syz_pkey_set(volatile long pkey, volatile long val)
  5789  {
  5790  #if GOARCH_amd64 || GOARCH_386
  5791  	if (pkey == RESERVED_PKEY) {
  5792  		errno = EINVAL;
  5793  		return -1;
  5794  	}
  5795  	uint32 eax = 0;
  5796  	uint32 ecx = 0;
  5797  	asm volatile("rdpkru"
  5798  		     : "=a"(eax)
  5799  		     : "c"(ecx)
  5800  		     : "edx");
  5801  	// PKRU register contains 2 bits per key.
  5802  	// Max number of keys is 16.
  5803  	// Clear old bits for the key:
  5804  	eax &= ~(3 << ((pkey % 16) * 2));
  5805  	// Set new bits for the key:
  5806  	eax |= (val & 3) << ((pkey % 16) * 2);
  5807  	uint32 edx = 0;
  5808  	asm volatile("wrpkru" ::"a"(eax), "c"(ecx), "d"(edx));
  5809  #endif
  5810  	return 0;
  5811  }
  5812  #endif
  5813  
  5814  #if SYZ_EXECUTOR || SYZ_SWAP
  5815  #include <fcntl.h>
  5816  #include <linux/falloc.h>
  5817  #include <stdio.h>
  5818  #include <string.h>
  5819  #include <sys/stat.h>
  5820  #include <sys/swap.h>
  5821  #include <sys/types.h>
  5822  
  5823  #define SWAP_FILE "./swap-file"
  5824  #define SWAP_FILE_SIZE (128 * 1000 * 1000) // 128 MB.
  5825  
  5826  static const char* setup_swap()
  5827  {
  5828  	// The call must be idempotent, so first disable swap and remove the swap file.
  5829  	swapoff(SWAP_FILE);
  5830  	unlink(SWAP_FILE);
  5831  	// Zero-fill the file.
  5832  	int fd = open(SWAP_FILE, O_CREAT | O_WRONLY | O_CLOEXEC, 0600);
  5833  	if (fd == -1)
  5834  		return "swap file open failed";
  5835  	// We cannot do ftruncate -- swapon complains about this. Do fallocate instead.
  5836  	fallocate(fd, FALLOC_FL_ZERO_RANGE, 0, SWAP_FILE_SIZE);
  5837  	close(fd);
  5838  	// Set up the swap file.
  5839  	char cmdline[64];
  5840  	sprintf(cmdline, "mkswap %s", SWAP_FILE);
  5841  	if (runcmdline(cmdline))
  5842  		return "mkswap failed";
  5843  	if (swapon(SWAP_FILE, SWAP_FLAG_PREFER) == 1)
  5844  		return "swapon failed";
  5845  	return NULL;
  5846  }
  5847  #endif
  5848  
  5849  #if SYZ_EXECUTOR || __NR_syz_pidfd_open
  5850  #include <sys/syscall.h>
  5851  
  5852  // TODO: long-term we should improve our sandboxing rules since there are also
  5853  // many other opportunities for a fuzzer process to access what it shouldn't.
  5854  // Here we only shut down one of the recently discovered ways.
  5855  static long syz_pidfd_open(volatile long pid, volatile long flags)
  5856  {
  5857  	if (pid == 1) {
  5858  		// Under a PID namespace, pid=1 is the parent process.
  5859  		// We don't want a forked child to mangle parent syz-executor's fds.
  5860  		pid = 0;
  5861  	}
  5862  	return syscall(__NR_pidfd_open, pid, flags);
  5863  }
  5864  
  5865  #endif
  5866  
  5867  #if SYZ_EXECUTOR || __NR_syz_kfuzztest_run
  5868  
  5869  #include <fcntl.h>
  5870  #include <stdint.h>
  5871  #include <stdio.h>
  5872  #include <stdlib.h>
  5873  #include <string.h>
  5874  #include <sys/stat.h>
  5875  #include <sys/types.h>
  5876  #include <unistd.h>
  5877  
  5878  static long syz_kfuzztest_run(volatile long test_name_ptr, volatile long input_data,
  5879  			      volatile long input_data_size, volatile long buffer)
  5880  {
  5881  	const char* test_name = (const char*)test_name_ptr;
  5882  	if (!test_name) {
  5883  		debug("syz_kfuzztest_run: test name was NULL\n");
  5884  		return -1;
  5885  	}
  5886  	if (!buffer) {
  5887  		debug("syz_kfuzztest_run: buffer was NULL\n");
  5888  		return -1;
  5889  	}
  5890  
  5891  	char buf[256];
  5892  	int ret = snprintf(buf, sizeof(buf), "/sys/kernel/debug/kfuzztest/%s/input", test_name);
  5893  	if (ret < 0 || (unsigned long)ret >= sizeof(buf)) {
  5894  		debug("syz_kfuzztest_run: constructed path is too long or snprintf failed\n");
  5895  		return -1;
  5896  	}
  5897  
  5898  	int fd = openat(AT_FDCWD, buf, O_WRONLY, 0);
  5899  	if (fd < 0) {
  5900  		debug("syz_kfuzztest_run: failed to open %s\n", buf);
  5901  		return -1;
  5902  	}
  5903  
  5904  	ssize_t bytes_written = write(fd, (void*)buffer, (size_t)input_data_size);
  5905  	if (bytes_written != input_data_size) {
  5906  		debug("syz_kfuzztest_run: failed to write to %s, reason: %s\n", buf, strerror(errno));
  5907  		close(fd);
  5908  		return -1;
  5909  	}
  5910  
  5911  	if (close(fd) != 0) {
  5912  		debug("syz_kfuzztest_run: failed to close file\n");
  5913  		return -1;
  5914  	}
  5915  
  5916  	return 0;
  5917  }
  5918  
  5919  #endif