github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/executor/common_linux.h (about)

     1  // Copyright 2016 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  // This file is shared between executor and csource package.
     5  
     6  #include <stdlib.h>
     7  #include <sys/syscall.h>
     8  #include <sys/types.h>
     9  #include <unistd.h>
    10  
    11  #if SYZ_EXECUTOR
    12  const int kExtraCoverSize = 256 << 10;
    13  struct cover_t;
    14  static void cover_reset(cover_t* cov);
    15  #endif
    16  
    17  #if SYZ_EXECUTOR || SYZ_THREADED
    18  #include <linux/futex.h>
    19  #include <pthread.h>
    20  
    21  typedef struct {
    22  	int state;
    23  } event_t;
    24  
    25  static void event_init(event_t* ev)
    26  {
    27  	ev->state = 0;
    28  }
    29  
    30  static void event_reset(event_t* ev)
    31  {
    32  	ev->state = 0;
    33  }
    34  
    35  static void event_set(event_t* ev)
    36  {
    37  	if (ev->state)
    38  		exitf("event already set");
    39  	__atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
    40  	syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000);
    41  }
    42  
    43  static void event_wait(event_t* ev)
    44  {
    45  	while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
    46  		syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0);
    47  }
    48  
    49  static int event_isset(event_t* ev)
    50  {
    51  	return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
    52  }
    53  
    54  static int event_timedwait(event_t* ev, uint64 timeout)
    55  {
    56  	uint64 start = current_time_ms();
    57  	uint64 now = start;
    58  	for (;;) {
    59  		uint64 remain = timeout - (now - start);
    60  		struct timespec ts;
    61  		ts.tv_sec = remain / 1000;
    62  		ts.tv_nsec = (remain % 1000) * 1000 * 1000;
    63  		syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts);
    64  		if (__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
    65  			return 1;
    66  		now = current_time_ms();
    67  		if (now - start > timeout)
    68  			return 0;
    69  	}
    70  }
    71  #endif
    72  
    73  #if SYZ_EXECUTOR || SYZ_REPEAT || SYZ_NET_INJECTION || SYZ_FAULT || SYZ_SANDBOX_NONE || \
    74      SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID ||               \
    75      SYZ_FAULT || SYZ_LEAK || SYZ_BINFMT_MISC || SYZ_SYSCTL ||                           \
    76      ((__NR_syz_usb_connect || __NR_syz_usb_connect_ath9k) && USB_DEBUG) ||              \
    77      __NR_syz_usbip_server_init
    78  #include <errno.h>
    79  #include <fcntl.h>
    80  #include <stdarg.h>
    81  #include <stdbool.h>
    82  #include <string.h>
    83  #include <sys/stat.h>
    84  #include <sys/types.h>
    85  
    86  static bool write_file(const char* file, const char* what, ...)
    87  {
    88  	char buf[1024];
    89  	va_list args;
    90  	va_start(args, what);
    91  	vsnprintf(buf, sizeof(buf), what, args);
    92  	va_end(args);
    93  	buf[sizeof(buf) - 1] = 0;
    94  	int len = strlen(buf);
    95  
    96  	int fd = open(file, O_WRONLY | O_CLOEXEC);
    97  	if (fd == -1)
    98  		return false;
    99  	if (write(fd, buf, len) != len) {
   100  		int err = errno;
   101  		close(fd);
   102  		debug("write(%s) failed: %d\n", file, err);
   103  		errno = err;
   104  		return false;
   105  	}
   106  	close(fd);
   107  	return true;
   108  }
   109  #endif
   110  
   111  #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI || SYZ_WIFI || SYZ_802154 || \
   112      __NR_syz_genetlink_get_family_id || __NR_syz_80211_inject_frame || __NR_syz_80211_join_ibss || SYZ_NIC_VF
   113  #include <arpa/inet.h>
   114  #include <errno.h>
   115  #include <net/if.h>
   116  #include <netinet/in.h>
   117  #include <stdbool.h>
   118  #include <string.h>
   119  #include <sys/socket.h>
   120  #include <sys/types.h>
   121  
   122  #include <linux/genetlink.h>
   123  #include <linux/if_addr.h>
   124  #include <linux/if_link.h>
   125  #include <linux/in6.h>
   126  #include <linux/neighbour.h>
   127  #include <linux/net.h>
   128  #include <linux/netlink.h>
   129  #include <linux/rtnetlink.h>
   130  #include <linux/veth.h>
   131  
   132  struct nlmsg {
   133  	char* pos;
   134  	int nesting;
   135  	struct nlattr* nested[8];
   136  	char buf[4096];
   137  };
   138  
   139  static void netlink_init(struct nlmsg* nlmsg, int typ, int flags,
   140  			 const void* data, int size)
   141  {
   142  	memset(nlmsg, 0, sizeof(*nlmsg));
   143  	struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf;
   144  	hdr->nlmsg_type = typ;
   145  	hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags;
   146  	memcpy(hdr + 1, data, size);
   147  	nlmsg->pos = (char*)(hdr + 1) + NLMSG_ALIGN(size);
   148  }
   149  
   150  static void netlink_attr(struct nlmsg* nlmsg, int typ,
   151  			 const void* data, int size)
   152  {
   153  	struct nlattr* attr = (struct nlattr*)nlmsg->pos;
   154  	attr->nla_len = sizeof(*attr) + size;
   155  	attr->nla_type = typ;
   156  	if (size > 0)
   157  		memcpy(attr + 1, data, size);
   158  	nlmsg->pos += NLMSG_ALIGN(attr->nla_len);
   159  }
   160  
   161  #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_802154
   162  static void netlink_nest(struct nlmsg* nlmsg, int typ)
   163  {
   164  	struct nlattr* attr = (struct nlattr*)nlmsg->pos;
   165  	attr->nla_type = typ;
   166  	nlmsg->pos += sizeof(*attr);
   167  	nlmsg->nested[nlmsg->nesting++] = attr;
   168  }
   169  
   170  static void netlink_done(struct nlmsg* nlmsg)
   171  {
   172  	struct nlattr* attr = nlmsg->nested[--nlmsg->nesting];
   173  	attr->nla_len = nlmsg->pos - (char*)attr;
   174  }
   175  
   176  #if SYZ_EXECUTOR || SYZ_NIC_VF
   177  #include <ifaddrs.h>
   178  #include <linux/ethtool.h>
   179  #include <linux/sockios.h>
   180  #include <sys/ioctl.h>
   181  
   182  struct vf_intf {
   183  	char pass_thru_intf[IFNAMSIZ];
   184  	int ppid; // used by Child
   185  };
   186  
   187  static struct vf_intf vf_intf;
   188  
   189  static void find_vf_interface(void)
   190  {
   191  #if SYZ_EXECUTOR
   192  	if (!flag_nic_vf)
   193  		return;
   194  #endif
   195  	struct ifaddrs* addresses = NULL;
   196  	int pid = getpid();
   197  	int ret = 0;
   198  
   199  	memset(&vf_intf, 0, sizeof(struct vf_intf));
   200  
   201  	debug("Checking for VF pass-thru interface.\n");
   202  	if (getifaddrs(&addresses) == -1) {
   203  		debug("%s: getifaddrs() failed.\n", __func__);
   204  		return;
   205  	}
   206  
   207  	int fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
   208  
   209  	if (fd < 0) {
   210  		debug("%s: socket() failed.\n", __func__);
   211  		return;
   212  	}
   213  	struct ifreq ifr;
   214  	struct ethtool_drvinfo drvinfo;
   215  	struct ifaddrs* address = addresses;
   216  
   217  	while (address) {
   218  		debug("ifa_name: %s\n", address->ifa_name);
   219  		memset(&ifr, 0, sizeof(struct ifreq));
   220  		strcpy(ifr.ifr_name, address->ifa_name);
   221  		memset(&drvinfo, 0, sizeof(struct ethtool_drvinfo));
   222  		drvinfo.cmd = ETHTOOL_GDRVINFO;
   223  		ifr.ifr_data = (caddr_t)&drvinfo;
   224  		ret = ioctl(fd, SIOCETHTOOL, &ifr);
   225  
   226  		if (ret < 0) {
   227  			debug("%s: ioctl() failed.\n", __func__);
   228  		} else if (strlen(drvinfo.bus_info)) {
   229  			debug("bus_info: %s, strlen(drvinfo.bus_info)=%zu\n",
   230  			      drvinfo.bus_info, strlen(drvinfo.bus_info));
   231  			if (strcmp(drvinfo.bus_info, "0000:00:11.0") == 0) {
   232  				if (strlen(address->ifa_name) < IFNAMSIZ) {
   233  					strncpy(vf_intf.pass_thru_intf,
   234  						address->ifa_name, IFNAMSIZ);
   235  					vf_intf.ppid = pid;
   236  				} else {
   237  					debug("%s: %d strlen(%s) >= IFNAMSIZ.\n",
   238  					      __func__, pid, address->ifa_name);
   239  				}
   240  				break;
   241  			}
   242  		}
   243  		address = address->ifa_next;
   244  	}
   245  	freeifaddrs(addresses);
   246  	if (!vf_intf.ppid) {
   247  		memset(&vf_intf, 0, sizeof(struct vf_intf));
   248  		debug("%s: %d could not find VF pass-thru interface.\n", __func__, pid);
   249  		return;
   250  	}
   251  	debug("%s: %d found VF pass-thru interface %s\n",
   252  	      __func__, pid, vf_intf.pass_thru_intf);
   253  }
   254  #endif // SYZ_NIC_VF
   255  
   256  #endif
   257  
   258  static int netlink_send_ext(struct nlmsg* nlmsg, int sock,
   259  			    uint16 reply_type, int* reply_len, bool dofail)
   260  {
   261  #if SYZ_EXECUTOR
   262  	if (in_execute_one && dofail) {
   263  		// We can expect different sorts of breakages during fuzzing,
   264  		// we should not kill the whole process because of them.
   265  		failmsg("invalid netlink_send_ext arguments", "dofail is true during syscall execution");
   266  	}
   267  #endif
   268  	if (nlmsg->pos > nlmsg->buf + sizeof(nlmsg->buf) || nlmsg->nesting)
   269  		fail("nlmsg overflow/bad nesting");
   270  	struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf;
   271  	hdr->nlmsg_len = nlmsg->pos - nlmsg->buf;
   272  	struct sockaddr_nl addr;
   273  	memset(&addr, 0, sizeof(addr));
   274  	addr.nl_family = AF_NETLINK;
   275  	ssize_t n = sendto(sock, nlmsg->buf, hdr->nlmsg_len, 0, (struct sockaddr*)&addr, sizeof(addr));
   276  	if (n != (ssize_t)hdr->nlmsg_len) {
   277  		if (dofail)
   278  			failmsg("netlink_send_ext: short netlink write", "wrote=%zd, want=%d", n, hdr->nlmsg_len);
   279  		debug("netlink_send_ext: short netlink write: %zd/%d errno=%d\n", n, hdr->nlmsg_len, errno);
   280  		return -1;
   281  	}
   282  	n = recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0);
   283  	if (reply_len)
   284  		*reply_len = 0;
   285  	if (n < 0) {
   286  		if (dofail)
   287  			fail("netlink_send_ext: netlink read failed");
   288  		debug("netlink_send_ext: netlink read failed: errno=%d\n", errno);
   289  		return -1;
   290  	}
   291  	if (n < (ssize_t)sizeof(struct nlmsghdr)) {
   292  		errno = EINVAL;
   293  		if (dofail)
   294  			failmsg("netlink_send_ext: short netlink read", "read=%zd", n);
   295  		debug("netlink_send_ext: short netlink read: %zd\n", n);
   296  		return -1;
   297  	}
   298  	if (hdr->nlmsg_type == NLMSG_DONE)
   299  		return 0;
   300  	if (reply_len && hdr->nlmsg_type == reply_type) {
   301  		*reply_len = n;
   302  		return 0;
   303  	}
   304  	if (n < (ssize_t)(sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr))) {
   305  		errno = EINVAL;
   306  		if (dofail)
   307  			failmsg("netlink_send_ext: short netlink read", "read=%zd", n);
   308  		debug("netlink_send_ext: short netlink read: %zd\n", n);
   309  		return -1;
   310  	}
   311  	if (hdr->nlmsg_type != NLMSG_ERROR) {
   312  		errno = EINVAL;
   313  		if (dofail)
   314  			failmsg("netlink_send_ext: bad netlink ack type", "type=%d", hdr->nlmsg_type);
   315  		debug("netlink_send_ext: short netlink ack: %d\n", hdr->nlmsg_type);
   316  		return -1;
   317  	}
   318  	errno = -((struct nlmsgerr*)(hdr + 1))->error;
   319  	return -errno;
   320  }
   321  
   322  #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI || SYZ_WIFI || SYZ_802154
   323  static int netlink_send(struct nlmsg* nlmsg, int sock)
   324  {
   325  	return netlink_send_ext(nlmsg, sock, 0, NULL, true);
   326  }
   327  #endif
   328  
   329  static int netlink_query_family_id(struct nlmsg* nlmsg, int sock, const char* family_name, bool dofail)
   330  {
   331  	struct genlmsghdr genlhdr;
   332  	memset(&genlhdr, 0, sizeof(genlhdr));
   333  	genlhdr.cmd = CTRL_CMD_GETFAMILY;
   334  	netlink_init(nlmsg, GENL_ID_CTRL, 0, &genlhdr, sizeof(genlhdr));
   335  	netlink_attr(nlmsg, CTRL_ATTR_FAMILY_NAME, family_name, strnlen(family_name, GENL_NAMSIZ - 1) + 1);
   336  	int n = 0;
   337  	int err = netlink_send_ext(nlmsg, sock, GENL_ID_CTRL, &n, dofail);
   338  	if (err < 0) {
   339  		debug("netlink: failed to get family id for %.*s: %s\n", GENL_NAMSIZ, family_name, strerror(errno));
   340  		return -1;
   341  	}
   342  	uint16 id = 0;
   343  	struct nlattr* attr = (struct nlattr*)(nlmsg->buf + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(genlhdr)));
   344  	for (; (char*)attr < nlmsg->buf + n; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) {
   345  		if (attr->nla_type == CTRL_ATTR_FAMILY_ID) {
   346  			id = *(uint16*)(attr + 1);
   347  			break;
   348  		}
   349  	}
   350  	if (!id) {
   351  		debug("netlink: failed to parse family id for %.*s\n", GENL_NAMSIZ, family_name);
   352  		errno = EINVAL;
   353  		return -1;
   354  	}
   355  	recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0); // recv ack
   356  
   357  	return id;
   358  }
   359  
   360  #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_DEVLINK_PCI
   361  static int netlink_next_msg(struct nlmsg* nlmsg, unsigned int offset,
   362  			    unsigned int total_len)
   363  {
   364  	struct nlmsghdr* hdr = (struct nlmsghdr*)(nlmsg->buf + offset);
   365  
   366  	if (offset == total_len || offset + hdr->nlmsg_len > total_len)
   367  		return -1;
   368  	return hdr->nlmsg_len;
   369  }
   370  #endif
   371  
   372  #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_802154
   373  static void netlink_add_device_impl(struct nlmsg* nlmsg, const char* type,
   374  				    const char* name, bool up)
   375  {
   376  	struct ifinfomsg hdr;
   377  	memset(&hdr, 0, sizeof(hdr));
   378  	if (up)
   379  		hdr.ifi_flags = hdr.ifi_change = IFF_UP;
   380  	netlink_init(nlmsg, RTM_NEWLINK, NLM_F_EXCL | NLM_F_CREATE, &hdr, sizeof(hdr));
   381  	if (name)
   382  		netlink_attr(nlmsg, IFLA_IFNAME, name, strlen(name));
   383  	netlink_nest(nlmsg, IFLA_LINKINFO);
   384  	netlink_attr(nlmsg, IFLA_INFO_KIND, type, strlen(type));
   385  }
   386  #endif
   387  
   388  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
   389  static void netlink_add_device(struct nlmsg* nlmsg, int sock, const char* type,
   390  			       const char* name)
   391  {
   392  	netlink_add_device_impl(nlmsg, type, name, false);
   393  	netlink_done(nlmsg);
   394  	int err = netlink_send(nlmsg, sock);
   395  	if (err < 0) {
   396  		debug("netlink: adding device %s type %s: %s\n", name, type, strerror(errno));
   397  	}
   398  }
   399  
   400  static void netlink_add_veth(struct nlmsg* nlmsg, int sock, const char* name,
   401  			     const char* peer)
   402  {
   403  	netlink_add_device_impl(nlmsg, "veth", name, false);
   404  	netlink_nest(nlmsg, IFLA_INFO_DATA);
   405  	netlink_nest(nlmsg, VETH_INFO_PEER);
   406  	nlmsg->pos += sizeof(struct ifinfomsg);
   407  	netlink_attr(nlmsg, IFLA_IFNAME, peer, strlen(peer));
   408  	netlink_done(nlmsg);
   409  	netlink_done(nlmsg);
   410  	netlink_done(nlmsg);
   411  	int err = netlink_send(nlmsg, sock);
   412  	if (err < 0) {
   413  		debug("netlink: adding device %s type veth peer %s: %s\n", name, peer, strerror(errno));
   414  	}
   415  }
   416  
   417  static void netlink_add_xfrm(struct nlmsg* nlmsg, int sock, const char* name)
   418  {
   419  	netlink_add_device_impl(nlmsg, "xfrm", name, true);
   420  	netlink_nest(nlmsg, IFLA_INFO_DATA);
   421  	int if_id = 1;
   422  	// This is IFLA_XFRM_IF_ID attr which is not present in older kernel headers.
   423  	netlink_attr(nlmsg, 2, &if_id, sizeof(if_id));
   424  	netlink_done(nlmsg);
   425  	netlink_done(nlmsg);
   426  	int err = netlink_send(nlmsg, sock);
   427  	if (err < 0) {
   428  		debug("netlink: adding device %s type xfrm if_id %d: %s\n", name, if_id, strerror(errno));
   429  	}
   430  }
   431  
   432  static void netlink_add_hsr(struct nlmsg* nlmsg, int sock, const char* name,
   433  			    const char* slave1, const char* slave2)
   434  {
   435  	netlink_add_device_impl(nlmsg, "hsr", name, false);
   436  	netlink_nest(nlmsg, IFLA_INFO_DATA);
   437  	int ifindex1 = if_nametoindex(slave1);
   438  	netlink_attr(nlmsg, IFLA_HSR_SLAVE1, &ifindex1, sizeof(ifindex1));
   439  	int ifindex2 = if_nametoindex(slave2);
   440  	netlink_attr(nlmsg, IFLA_HSR_SLAVE2, &ifindex2, sizeof(ifindex2));
   441  	netlink_done(nlmsg);
   442  	netlink_done(nlmsg);
   443  	int err = netlink_send(nlmsg, sock);
   444  	if (err < 0) {
   445  		debug("netlink: adding device %s type hsr slave1 %s slave2 %s: %s\n", name, slave1, slave2, strerror(errno));
   446  	}
   447  }
   448  
   449  static void netlink_add_linked(struct nlmsg* nlmsg, int sock, const char* type, const char* name, const char* link)
   450  {
   451  	netlink_add_device_impl(nlmsg, type, name, false);
   452  	netlink_done(nlmsg);
   453  	int ifindex = if_nametoindex(link);
   454  	netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex));
   455  	int err = netlink_send(nlmsg, sock);
   456  	if (err < 0) {
   457  		debug("netlink: adding device %s type %s link %s: %s\n", name, type, link, strerror(errno));
   458  	}
   459  }
   460  
   461  static void netlink_add_vlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link, uint16 id, uint16 proto)
   462  {
   463  	netlink_add_device_impl(nlmsg, "vlan", name, false);
   464  	netlink_nest(nlmsg, IFLA_INFO_DATA);
   465  	netlink_attr(nlmsg, IFLA_VLAN_ID, &id, sizeof(id));
   466  	netlink_attr(nlmsg, IFLA_VLAN_PROTOCOL, &proto, sizeof(proto));
   467  	netlink_done(nlmsg);
   468  	netlink_done(nlmsg);
   469  	int ifindex = if_nametoindex(link);
   470  	netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex));
   471  	int err = netlink_send(nlmsg, sock);
   472  	if (err < 0) {
   473  		debug("netlink: add %s type vlan link %s id %d: %s\n", name, link, id, strerror(errno));
   474  	}
   475  }
   476  
   477  static void netlink_add_macvlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link)
   478  {
   479  	netlink_add_device_impl(nlmsg, "macvlan", name, false);
   480  	netlink_nest(nlmsg, IFLA_INFO_DATA);
   481  	uint32 mode = MACVLAN_MODE_BRIDGE;
   482  	netlink_attr(nlmsg, IFLA_MACVLAN_MODE, &mode, sizeof(mode));
   483  	netlink_done(nlmsg);
   484  	netlink_done(nlmsg);
   485  	int ifindex = if_nametoindex(link);
   486  	netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex));
   487  	int err = netlink_send(nlmsg, sock);
   488  	if (err < 0) {
   489  		debug("netlink: add %s type macvlan link %s mode %d: %s\n", name, link, mode, strerror(errno));
   490  	}
   491  }
   492  
   493  static void netlink_add_geneve(struct nlmsg* nlmsg, int sock, const char* name, uint32 vni, struct in_addr* addr4, struct in6_addr* addr6)
   494  {
   495  	netlink_add_device_impl(nlmsg, "geneve", name, false);
   496  	netlink_nest(nlmsg, IFLA_INFO_DATA);
   497  	netlink_attr(nlmsg, IFLA_GENEVE_ID, &vni, sizeof(vni));
   498  	if (addr4)
   499  		netlink_attr(nlmsg, IFLA_GENEVE_REMOTE, addr4, sizeof(*addr4));
   500  	if (addr6)
   501  		netlink_attr(nlmsg, IFLA_GENEVE_REMOTE6, addr6, sizeof(*addr6));
   502  	netlink_done(nlmsg);
   503  	netlink_done(nlmsg);
   504  	int err = netlink_send(nlmsg, sock);
   505  	if (err < 0) {
   506  		debug("netlink: add %s type geneve vni %u: %s\n", name, vni, strerror(errno));
   507  	}
   508  }
   509  
   510  #define IFLA_IPVLAN_FLAGS 2
   511  #define IPVLAN_MODE_L3S 2
   512  #undef IPVLAN_F_VEPA
   513  #define IPVLAN_F_VEPA 2
   514  
   515  static void netlink_add_ipvlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link, uint16 mode, uint16 flags)
   516  {
   517  	netlink_add_device_impl(nlmsg, "ipvlan", name, false);
   518  	netlink_nest(nlmsg, IFLA_INFO_DATA);
   519  	netlink_attr(nlmsg, IFLA_IPVLAN_MODE, &mode, sizeof(mode));
   520  	netlink_attr(nlmsg, IFLA_IPVLAN_FLAGS, &flags, sizeof(flags));
   521  	netlink_done(nlmsg);
   522  	netlink_done(nlmsg);
   523  	int ifindex = if_nametoindex(link);
   524  	netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex));
   525  	int err = netlink_send(nlmsg, sock);
   526  	if (err < 0) {
   527  		debug("netlink: add %s type ipvlan link %s mode %d: %s\n", name, link, mode, strerror(errno));
   528  	}
   529  }
   530  #endif
   531  
   532  #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI || SYZ_802154
   533  static void netlink_device_change(struct nlmsg* nlmsg, int sock, const char* name, bool up,
   534  				  const char* master, const void* mac, int macsize,
   535  				  const char* new_name)
   536  {
   537  	struct ifinfomsg hdr;
   538  	memset(&hdr, 0, sizeof(hdr));
   539  	if (up)
   540  		hdr.ifi_flags = hdr.ifi_change = IFF_UP;
   541  	hdr.ifi_index = if_nametoindex(name);
   542  	netlink_init(nlmsg, RTM_NEWLINK, 0, &hdr, sizeof(hdr));
   543  	if (new_name)
   544  		netlink_attr(nlmsg, IFLA_IFNAME, new_name, strlen(new_name));
   545  	if (master) {
   546  		int ifindex = if_nametoindex(master);
   547  		netlink_attr(nlmsg, IFLA_MASTER, &ifindex, sizeof(ifindex));
   548  	}
   549  	if (macsize)
   550  		netlink_attr(nlmsg, IFLA_ADDRESS, mac, macsize);
   551  	int err = netlink_send(nlmsg, sock);
   552  	if (err < 0) {
   553  		debug("netlink: device %s up master %s: %s\n", name, master ? master : "NULL", strerror(errno));
   554  	}
   555  }
   556  #endif
   557  
   558  #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION
   559  static int netlink_add_addr(struct nlmsg* nlmsg, int sock, const char* dev,
   560  			    const void* addr, int addrsize)
   561  {
   562  	struct ifaddrmsg hdr;
   563  	memset(&hdr, 0, sizeof(hdr));
   564  	hdr.ifa_family = addrsize == 4 ? AF_INET : AF_INET6;
   565  	hdr.ifa_prefixlen = addrsize == 4 ? 24 : 120;
   566  	hdr.ifa_scope = RT_SCOPE_UNIVERSE;
   567  	hdr.ifa_index = if_nametoindex(dev);
   568  	netlink_init(nlmsg, RTM_NEWADDR, NLM_F_CREATE | NLM_F_REPLACE, &hdr, sizeof(hdr));
   569  	netlink_attr(nlmsg, IFA_LOCAL, addr, addrsize);
   570  	netlink_attr(nlmsg, IFA_ADDRESS, addr, addrsize);
   571  	return netlink_send(nlmsg, sock);
   572  }
   573  
   574  static void netlink_add_addr4(struct nlmsg* nlmsg, int sock,
   575  			      const char* dev, const char* addr)
   576  {
   577  	struct in_addr in_addr;
   578  	inet_pton(AF_INET, addr, &in_addr);
   579  	int err = netlink_add_addr(nlmsg, sock, dev, &in_addr, sizeof(in_addr));
   580  	if (err < 0) {
   581  		debug("netlink: add addr %s dev %s: %s\n", addr, dev, strerror(errno));
   582  	}
   583  }
   584  
   585  static void netlink_add_addr6(struct nlmsg* nlmsg, int sock,
   586  			      const char* dev, const char* addr)
   587  {
   588  	struct in6_addr in6_addr;
   589  	inet_pton(AF_INET6, addr, &in6_addr);
   590  	int err = netlink_add_addr(nlmsg, sock, dev, &in6_addr, sizeof(in6_addr));
   591  	if (err < 0) {
   592  		debug("netlink: add addr %s dev %s: %s\n", addr, dev, strerror(errno));
   593  	}
   594  }
   595  #endif
   596  
   597  #if SYZ_EXECUTOR || SYZ_NET_INJECTION
   598  static void netlink_add_neigh(struct nlmsg* nlmsg, int sock, const char* name,
   599  			      const void* addr, int addrsize, const void* mac, int macsize)
   600  {
   601  	struct ndmsg hdr;
   602  	memset(&hdr, 0, sizeof(hdr));
   603  	hdr.ndm_family = addrsize == 4 ? AF_INET : AF_INET6;
   604  	hdr.ndm_ifindex = if_nametoindex(name);
   605  	hdr.ndm_state = NUD_PERMANENT;
   606  	netlink_init(nlmsg, RTM_NEWNEIGH, NLM_F_EXCL | NLM_F_CREATE, &hdr, sizeof(hdr));
   607  	netlink_attr(nlmsg, NDA_DST, addr, addrsize);
   608  	netlink_attr(nlmsg, NDA_LLADDR, mac, macsize);
   609  	int err = netlink_send(nlmsg, sock);
   610  	if (err < 0) {
   611  		debug("netlink: add neigh %s addr %d lladdr %d: %s\n", name, addrsize, macsize, strerror(errno));
   612  	}
   613  }
   614  #endif
   615  #endif
   616  
   617  #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI || SYZ_WIFI || SYZ_802154
   618  static struct nlmsg nlmsg;
   619  #endif
   620  
   621  #if SYZ_EXECUTOR || SYZ_NET_INJECTION
   622  #include <arpa/inet.h>
   623  #include <errno.h>
   624  #include <fcntl.h>
   625  #include <net/if.h>
   626  #include <net/if_arp.h>
   627  #include <stdarg.h>
   628  #include <stdbool.h>
   629  #include <sys/ioctl.h>
   630  #include <sys/stat.h>
   631  
   632  #include <linux/if_ether.h>
   633  #include <linux/if_tun.h>
   634  #include <linux/ip.h>
   635  #include <linux/tcp.h>
   636  
   637  static int tunfd = -1;
   638  
   639  #define TUN_IFACE "syz_tun"
   640  #define LOCAL_MAC 0xaaaaaaaaaaaa
   641  #define REMOTE_MAC 0xaaaaaaaaaabb
   642  #define LOCAL_IPV4 "172.20.20.170"
   643  #define REMOTE_IPV4 "172.20.20.187"
   644  #define LOCAL_IPV6 "fe80::aa"
   645  #define REMOTE_IPV6 "fe80::bb"
   646  
   647  #ifndef IFF_NAPI
   648  #define IFF_NAPI 0x0010
   649  #endif
   650  #if ENABLE_NAPI_FRAGS
   651  static int tun_frags_enabled;
   652  #ifndef IFF_NAPI_FRAGS
   653  #define IFF_NAPI_FRAGS 0x0020
   654  #endif
   655  #endif
   656  
   657  static void initialize_tun(void)
   658  {
   659  #if SYZ_EXECUTOR
   660  	if (!flag_net_injection)
   661  		return;
   662  #endif
   663  	tunfd = open("/dev/net/tun", O_RDWR | O_NONBLOCK);
   664  	if (tunfd == -1) {
   665  #if SYZ_EXECUTOR
   666  		fail("tun: can't open /dev/net/tun");
   667  #else
   668  		printf("tun: can't open /dev/net/tun: please enable CONFIG_TUN=y\n");
   669  		printf("otherwise fuzzing or reproducing might not work as intended\n");
   670  		return;
   671  #endif
   672  	}
   673  	// Remap tun onto higher fd number to hide it from fuzzer and to keep
   674  	// fd numbers stable regardless of whether tun is opened or not (also see kMaxFd).
   675  	const int kTunFd = 200;
   676  	if (dup2(tunfd, kTunFd) < 0)
   677  		fail("dup2(tunfd, kTunFd) failed");
   678  	close(tunfd);
   679  	tunfd = kTunFd;
   680  
   681  	struct ifreq ifr;
   682  	memset(&ifr, 0, sizeof(ifr));
   683  	strncpy(ifr.ifr_name, TUN_IFACE, IFNAMSIZ);
   684  	ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
   685  	// Note: SYZ_ENABLE_NAPI_FRAGS is never enabled. This is code is only for reference
   686  	// in case we figure out how IFF_NAPI_FRAGS works. With IFF_NAPI_FRAGS packets
   687  	// don't reach destinations and bail out in udp_gro_receive (see #1594).
   688  	// Also IFF_NAPI_FRAGS does not work with sandbox_namespace (see comment there).
   689  #if ENABLE_NAPI_FRAGS
   690  	ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS;
   691  #endif
   692  	if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0) {
   693  #if ENABLE_NAPI_FRAGS
   694  		// IFF_NAPI_FRAGS requires root, so try without it.
   695  		ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
   696  		if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0)
   697  #endif
   698  			fail("tun: ioctl(TUNSETIFF) failed");
   699  	}
   700  #if ENABLE_NAPI_FRAGS
   701  	// If IFF_NAPI_FRAGS is not supported it will be silently dropped,
   702  	// so query the effective flags.
   703  	if (ioctl(tunfd, TUNGETIFF, (void*)&ifr) < 0)
   704  		fail("tun: ioctl(TUNGETIFF) failed");
   705  	tun_frags_enabled = (ifr.ifr_flags & IFF_NAPI_FRAGS) != 0;
   706  	debug("tun_frags_enabled=%d\n", tun_frags_enabled);
   707  #endif
   708  
   709  	// Disable IPv6 DAD, otherwise the address remains unusable until DAD completes.
   710  	// Don't panic because this is an optional config.
   711  	char sysctl[64];
   712  	sprintf(sysctl, "/proc/sys/net/ipv6/conf/%s/accept_dad", TUN_IFACE);
   713  	write_file(sysctl, "0");
   714  	// Disable IPv6 router solicitation to prevent IPv6 spam.
   715  	// Don't panic because this is an optional config.
   716  	sprintf(sysctl, "/proc/sys/net/ipv6/conf/%s/router_solicitations", TUN_IFACE);
   717  	write_file(sysctl, "0");
   718  	// There seems to be no way to disable IPv6 MTD to prevent more IPv6 spam.
   719  
   720  	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
   721  	if (sock == -1)
   722  		fail("socket(AF_NETLINK) failed");
   723  
   724  	netlink_add_addr4(&nlmsg, sock, TUN_IFACE, LOCAL_IPV4);
   725  	netlink_add_addr6(&nlmsg, sock, TUN_IFACE, LOCAL_IPV6);
   726  	uint64 macaddr = REMOTE_MAC;
   727  	struct in_addr in_addr;
   728  	inet_pton(AF_INET, REMOTE_IPV4, &in_addr);
   729  	netlink_add_neigh(&nlmsg, sock, TUN_IFACE, &in_addr, sizeof(in_addr), &macaddr, ETH_ALEN);
   730  	struct in6_addr in6_addr;
   731  	inet_pton(AF_INET6, REMOTE_IPV6, &in6_addr);
   732  	netlink_add_neigh(&nlmsg, sock, TUN_IFACE, &in6_addr, sizeof(in6_addr), &macaddr, ETH_ALEN);
   733  	macaddr = LOCAL_MAC;
   734  	netlink_device_change(&nlmsg, sock, TUN_IFACE, true, 0, &macaddr, ETH_ALEN, NULL);
   735  	close(sock);
   736  }
   737  #endif
   738  
   739  #if SYZ_EXECUTOR || __NR_syz_init_net_socket || SYZ_DEVLINK_PCI || __NR_syz_socket_connect_nvme_tcp
   740  const int kInitNetNsFd = 201; // see kMaxFd
   741  #endif
   742  
   743  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI || SYZ_NET_DEVICES
   744  
   745  #include <linux/genetlink.h>
   746  #include <stdbool.h>
   747  
   748  #define DEVLINK_FAMILY_NAME "devlink"
   749  
   750  #define DEVLINK_CMD_PORT_GET 5
   751  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
   752  #define DEVLINK_CMD_RELOAD 37
   753  #endif
   754  #define DEVLINK_ATTR_BUS_NAME 1
   755  #define DEVLINK_ATTR_DEV_NAME 2
   756  #define DEVLINK_ATTR_NETDEV_NAME 7
   757  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
   758  #define DEVLINK_ATTR_NETNS_FD 138
   759  #endif
   760  
   761  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
   762  static void netlink_devlink_netns_move(const char* bus_name, const char* dev_name, int netns_fd)
   763  {
   764  	struct genlmsghdr genlhdr;
   765  	int sock;
   766  	int id, err;
   767  
   768  	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
   769  	if (sock == -1)
   770  		fail("socket(AF_NETLINK) failed");
   771  
   772  	id = netlink_query_family_id(&nlmsg, sock, DEVLINK_FAMILY_NAME, true);
   773  	if (id == -1)
   774  		goto error;
   775  
   776  	memset(&genlhdr, 0, sizeof(genlhdr));
   777  	genlhdr.cmd = DEVLINK_CMD_RELOAD;
   778  	netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr));
   779  	netlink_attr(&nlmsg, DEVLINK_ATTR_BUS_NAME, bus_name, strlen(bus_name) + 1);
   780  	netlink_attr(&nlmsg, DEVLINK_ATTR_DEV_NAME, dev_name, strlen(dev_name) + 1);
   781  	netlink_attr(&nlmsg, DEVLINK_ATTR_NETNS_FD, &netns_fd, sizeof(netns_fd));
   782  	err = netlink_send(&nlmsg, sock);
   783  	if (err < 0) {
   784  		debug("netlink: failed to move devlink instance %s/%s into network namespace: %s\n",
   785  		      bus_name, dev_name, strerror(errno));
   786  	}
   787  error:
   788  	close(sock);
   789  }
   790  #endif
   791  
   792  static struct nlmsg nlmsg2;
   793  
   794  static void initialize_devlink_ports(const char* bus_name, const char* dev_name,
   795  				     const char* netdev_prefix)
   796  {
   797  	struct genlmsghdr genlhdr;
   798  	int len, total_len, id, err, offset;
   799  	uint16 netdev_index;
   800  
   801  	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
   802  	if (sock == -1)
   803  		fail("socket(AF_NETLINK) failed");
   804  
   805  	int rtsock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
   806  	if (rtsock == -1)
   807  		fail("socket(AF_NETLINK) failed");
   808  
   809  	id = netlink_query_family_id(&nlmsg, sock, DEVLINK_FAMILY_NAME, true);
   810  	if (id == -1)
   811  		goto error;
   812  
   813  	memset(&genlhdr, 0, sizeof(genlhdr));
   814  	genlhdr.cmd = DEVLINK_CMD_PORT_GET;
   815  	netlink_init(&nlmsg, id, NLM_F_DUMP, &genlhdr, sizeof(genlhdr));
   816  	netlink_attr(&nlmsg, DEVLINK_ATTR_BUS_NAME, bus_name, strlen(bus_name) + 1);
   817  	netlink_attr(&nlmsg, DEVLINK_ATTR_DEV_NAME, dev_name, strlen(dev_name) + 1);
   818  
   819  	err = netlink_send_ext(&nlmsg, sock, id, &total_len, true);
   820  	if (err < 0) {
   821  		debug("netlink: failed to get port get reply: %s\n", strerror(errno));
   822  		goto error;
   823  	}
   824  
   825  	offset = 0;
   826  	netdev_index = 0;
   827  	while ((len = netlink_next_msg(&nlmsg, offset, total_len)) != -1) {
   828  		struct nlattr* attr = (struct nlattr*)(nlmsg.buf + offset + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(genlhdr)));
   829  		for (; (char*)attr < nlmsg.buf + offset + len; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) {
   830  			if (attr->nla_type == DEVLINK_ATTR_NETDEV_NAME) {
   831  				char* port_name;
   832  				char netdev_name[IFNAMSIZ];
   833  				port_name = (char*)(attr + 1);
   834  				snprintf(netdev_name, sizeof(netdev_name), "%s%d", netdev_prefix, netdev_index);
   835  				netlink_device_change(&nlmsg2, rtsock, port_name, true, 0, 0, 0, netdev_name);
   836  				break;
   837  			}
   838  		}
   839  		offset += len;
   840  		netdev_index++;
   841  	}
   842  error:
   843  	close(rtsock);
   844  	close(sock);
   845  }
   846  
   847  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
   848  #include <fcntl.h>
   849  #include <sched.h>
   850  
   851  static void initialize_devlink_pci(void)
   852  {
   853  #if SYZ_EXECUTOR
   854  	if (!flag_devlink_pci)
   855  		return;
   856  #endif
   857  	int netns = open("/proc/self/ns/net", O_RDONLY);
   858  	if (netns == -1)
   859  		fail("open(/proc/self/ns/net) failed");
   860  	int ret = setns(kInitNetNsFd, 0);
   861  	if (ret == -1)
   862  		fail("set_ns(init_netns_fd) failed");
   863  	netlink_devlink_netns_move("pci", "0000:00:10.0", netns);
   864  	ret = setns(netns, 0);
   865  	if (ret == -1)
   866  		fail("set_ns(this_netns_fd) failed");
   867  	close(netns);
   868  
   869  	initialize_devlink_ports("pci", "0000:00:10.0", "netpci");
   870  }
   871  #endif
   872  #endif
   873  
   874  #if SYZ_EXECUTOR || SYZ_WIFI || __NR_syz_80211_inject_frame || __NR_syz_80211_join_ibss
   875  
   876  #define WIFI_INITIAL_DEVICE_COUNT 2
   877  #define WIFI_MAC_BASE                              \
   878  	{                                          \
   879  		0x08, 0x02, 0x11, 0x00, 0x00, 0x00 \
   880  	}
   881  #define WIFI_IBSS_BSSID                            \
   882  	{                                          \
   883  		0x50, 0x50, 0x50, 0x50, 0x50, 0x50 \
   884  	}
   885  #define WIFI_IBSS_SSID                             \
   886  	{                                          \
   887  		0x10, 0x10, 0x10, 0x10, 0x10, 0x10 \
   888  	}
   889  #define WIFI_DEFAULT_FREQUENCY 2412
   890  #define WIFI_DEFAULT_SIGNAL 0
   891  #define WIFI_DEFAULT_RX_RATE 1
   892  
   893  // consts from drivers/net/wireless/mac80211_hwsim.h
   894  #define HWSIM_CMD_REGISTER 1
   895  #define HWSIM_CMD_FRAME 2
   896  #define HWSIM_CMD_NEW_RADIO 4
   897  #define HWSIM_ATTR_SUPPORT_P2P_DEVICE 14
   898  #define HWSIM_ATTR_PERM_ADDR 22
   899  
   900  #endif
   901  
   902  #if SYZ_EXECUTOR || SYZ_WIFI || __NR_syz_80211_join_ibss
   903  #include <linux/genetlink.h>
   904  #include <linux/if_ether.h>
   905  #include <linux/nl80211.h>
   906  #include <linux/rtnetlink.h>
   907  #include <net/if.h>
   908  #include <stdbool.h>
   909  #include <sys/ioctl.h>
   910  
   911  // From linux/if.h, but we cannot include the file as it conflicts with net/if.h
   912  #define IF_OPER_UP 6
   913  
   914  // IBSS parameters for nl80211_join_ibss
   915  struct join_ibss_props {
   916  	int wiphy_freq;
   917  	bool wiphy_freq_fixed;
   918  	uint8* mac;
   919  	uint8* ssid;
   920  	int ssid_len;
   921  };
   922  
   923  static int set_interface_state(const char* interface_name, int on)
   924  {
   925  	struct ifreq ifr;
   926  	int sock = socket(AF_INET, SOCK_DGRAM, 0);
   927  	if (sock < 0) {
   928  		debug("set_interface_state: failed to open socket, errno %d\n", errno);
   929  		return -1;
   930  	}
   931  
   932  	memset(&ifr, 0, sizeof(ifr));
   933  	strcpy(ifr.ifr_name, interface_name);
   934  	int ret = ioctl(sock, SIOCGIFFLAGS, &ifr);
   935  	if (ret < 0) {
   936  		debug("set_interface_state: failed to execute SIOCGIFFLAGS, ret %d\n", ret);
   937  		close(sock);
   938  		return -1;
   939  	}
   940  
   941  	if (on)
   942  		ifr.ifr_flags |= IFF_UP;
   943  	else
   944  		ifr.ifr_flags &= ~IFF_UP;
   945  
   946  	ret = ioctl(sock, SIOCSIFFLAGS, &ifr);
   947  	close(sock);
   948  	if (ret < 0) {
   949  		debug("set_interface_state: failed to execute SIOCSIFFLAGS, ret %d\n", ret);
   950  		return -1;
   951  	}
   952  	return 0;
   953  }
   954  
   955  static int nl80211_set_interface(struct nlmsg* nlmsg, int sock, int nl80211_family, uint32 ifindex,
   956  				 uint32 iftype, bool dofail)
   957  {
   958  	struct genlmsghdr genlhdr;
   959  
   960  	memset(&genlhdr, 0, sizeof(genlhdr));
   961  	genlhdr.cmd = NL80211_CMD_SET_INTERFACE;
   962  	netlink_init(nlmsg, nl80211_family, 0, &genlhdr, sizeof(genlhdr));
   963  	netlink_attr(nlmsg, NL80211_ATTR_IFINDEX, &ifindex, sizeof(ifindex));
   964  	netlink_attr(nlmsg, NL80211_ATTR_IFTYPE, &iftype, sizeof(iftype));
   965  	int err = netlink_send_ext(nlmsg, sock, 0, NULL, dofail);
   966  	if (err < 0) {
   967  		debug("nl80211_set_interface failed: %s\n", strerror(errno));
   968  	}
   969  	return err;
   970  }
   971  
   972  static int nl80211_join_ibss(struct nlmsg* nlmsg, int sock, int nl80211_family, uint32 ifindex,
   973  			     struct join_ibss_props* props, bool dofail)
   974  {
   975  	struct genlmsghdr genlhdr;
   976  
   977  	memset(&genlhdr, 0, sizeof(genlhdr));
   978  	genlhdr.cmd = NL80211_CMD_JOIN_IBSS;
   979  	netlink_init(nlmsg, nl80211_family, 0, &genlhdr, sizeof(genlhdr));
   980  	netlink_attr(nlmsg, NL80211_ATTR_IFINDEX, &ifindex, sizeof(ifindex));
   981  	netlink_attr(nlmsg, NL80211_ATTR_SSID, props->ssid, props->ssid_len);
   982  	netlink_attr(nlmsg, NL80211_ATTR_WIPHY_FREQ, &(props->wiphy_freq), sizeof(props->wiphy_freq));
   983  	if (props->mac)
   984  		netlink_attr(nlmsg, NL80211_ATTR_MAC, props->mac, ETH_ALEN);
   985  	if (props->wiphy_freq_fixed)
   986  		netlink_attr(nlmsg, NL80211_ATTR_FREQ_FIXED, NULL, 0);
   987  	int err = netlink_send_ext(nlmsg, sock, 0, NULL, dofail);
   988  	if (err < 0) {
   989  		debug("nl80211_join_ibss failed: %s\n", strerror(errno));
   990  	}
   991  	return err;
   992  }
   993  
   994  static int get_ifla_operstate(struct nlmsg* nlmsg, int ifindex, bool dofail)
   995  {
   996  	struct ifinfomsg info;
   997  	memset(&info, 0, sizeof(info));
   998  	info.ifi_family = AF_UNSPEC;
   999  	info.ifi_index = ifindex;
  1000  
  1001  	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  1002  	if (sock == -1) {
  1003  		debug("get_ifla_operstate: socket failed: %d\n", errno);
  1004  		return -1;
  1005  	}
  1006  
  1007  	netlink_init(nlmsg, RTM_GETLINK, 0, &info, sizeof(info));
  1008  	int n;
  1009  	int err = netlink_send_ext(nlmsg, sock, RTM_NEWLINK, &n, dofail);
  1010  	close(sock);
  1011  
  1012  	if (err) {
  1013  		debug("get_ifla_operstate: failed to query: %s\n", strerror(errno));
  1014  		return -1;
  1015  	}
  1016  
  1017  	struct rtattr* attr = IFLA_RTA(NLMSG_DATA(nlmsg->buf));
  1018  	for (; RTA_OK(attr, n); attr = RTA_NEXT(attr, n)) {
  1019  		if (attr->rta_type == IFLA_OPERSTATE)
  1020  			return *((int32_t*)RTA_DATA(attr));
  1021  	}
  1022  
  1023  	return -1;
  1024  }
  1025  
  1026  static int await_ifla_operstate(struct nlmsg* nlmsg, char* interface, int operstate, bool dofail)
  1027  {
  1028  	int ifindex = if_nametoindex(interface);
  1029  	while (true) {
  1030  		usleep(1000); // 1 ms
  1031  		int ret = get_ifla_operstate(nlmsg, ifindex, dofail);
  1032  		if (ret < 0)
  1033  			return ret;
  1034  		if (ret == operstate)
  1035  			return 0;
  1036  	}
  1037  	return 0;
  1038  }
  1039  
  1040  static int nl80211_setup_ibss_interface(struct nlmsg* nlmsg, int sock, int nl80211_family_id, char* interface,
  1041  					struct join_ibss_props* ibss_props, bool dofail)
  1042  {
  1043  	int ifindex = if_nametoindex(interface);
  1044  	if (ifindex == 0) {
  1045  		debug("nl80211_setup_ibss_interface: if_nametoindex failed for %.32s, ret 0\n", interface);
  1046  		return -1;
  1047  	}
  1048  
  1049  	int ret = nl80211_set_interface(nlmsg, sock, nl80211_family_id, ifindex, NL80211_IFTYPE_ADHOC, dofail);
  1050  	if (ret < 0) {
  1051  		debug("nl80211_setup_ibss_interface: nl80211_set_interface failed for %.32s, ret %d\n", interface, ret);
  1052  		return -1;
  1053  	}
  1054  
  1055  	ret = set_interface_state(interface, 1);
  1056  	if (ret < 0) {
  1057  		debug("nl80211_setup_ibss_interface: set_interface_state failed for %.32s, ret %d\n", interface, ret);
  1058  		return -1;
  1059  	}
  1060  
  1061  	ret = nl80211_join_ibss(nlmsg, sock, nl80211_family_id, ifindex, ibss_props, dofail);
  1062  	if (ret < 0) {
  1063  		debug("nl80211_setup_ibss_interface: nl80211_join_ibss failed for %.32s, ret %d\n", interface, ret);
  1064  		return -1;
  1065  	}
  1066  
  1067  	return 0;
  1068  }
  1069  #endif
  1070  
  1071  #if SYZ_EXECUTOR || SYZ_WIFI
  1072  #include <fcntl.h>
  1073  #include <linux/rfkill.h>
  1074  #include <sys/stat.h>
  1075  #include <sys/types.h>
  1076  
  1077  static int hwsim80211_create_device(struct nlmsg* nlmsg, int sock, int hwsim_family, uint8 mac_addr[ETH_ALEN])
  1078  {
  1079  	struct genlmsghdr genlhdr;
  1080  	memset(&genlhdr, 0, sizeof(genlhdr));
  1081  	genlhdr.cmd = HWSIM_CMD_NEW_RADIO;
  1082  	netlink_init(nlmsg, hwsim_family, 0, &genlhdr, sizeof(genlhdr));
  1083  	netlink_attr(nlmsg, HWSIM_ATTR_SUPPORT_P2P_DEVICE, NULL, 0);
  1084  	netlink_attr(nlmsg, HWSIM_ATTR_PERM_ADDR, mac_addr, ETH_ALEN);
  1085  	int err = netlink_send(nlmsg, sock);
  1086  	if (err < 0) {
  1087  		debug("hwsim80211_create_device failed: %s\n", strerror(errno));
  1088  	}
  1089  	return err;
  1090  }
  1091  
  1092  static void initialize_wifi_devices(void)
  1093  {
  1094  	// Set up virtual wifi devices and join them into an IBSS network.
  1095  	// An IBSS network is created here in order to put these devices in an operable state right from
  1096  	// the beginning. It has the following positive effects.
  1097  	// 1. Frame injection becomes possible from the very start.
  1098  	// 2. A number of nl80211 commands expect their target wireless interface to be in an operable state.
  1099  	// 3. Simplification of reproducer generation - in many cases the reproducer will not have to spend time
  1100  	//    selecting system calls that set up the environment.
  1101  	//
  1102  	// IBSS network was chosen as the simplest network type to begin with.
  1103  
  1104  #if SYZ_EXECUTOR
  1105  	if (!flag_wifi)
  1106  		return;
  1107  #endif
  1108  	int rfkill = open("/dev/rfkill", O_RDWR);
  1109  	if (rfkill == -1)
  1110  		fail("open(/dev/rfkill) failed");
  1111  	struct rfkill_event event = {0};
  1112  	event.type = RFKILL_TYPE_ALL;
  1113  	event.op = RFKILL_OP_CHANGE_ALL;
  1114  	if (write(rfkill, &event, sizeof(event)) != (ssize_t)(sizeof(event)))
  1115  		fail("write(/dev/rfkill) failed");
  1116  	close(rfkill);
  1117  
  1118  	uint8 mac_addr[6] = WIFI_MAC_BASE;
  1119  	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
  1120  	if (sock < 0)
  1121  		fail("initialize_wifi_devices: failed to create socket");
  1122  	int hwsim_family_id = netlink_query_family_id(&nlmsg, sock, "MAC80211_HWSIM", true);
  1123  	int nl80211_family_id = netlink_query_family_id(&nlmsg, sock, "nl80211", true);
  1124  	uint8 ssid[] = WIFI_IBSS_SSID;
  1125  	uint8 bssid[] = WIFI_IBSS_BSSID;
  1126  	struct join_ibss_props ibss_props = {
  1127  	    .wiphy_freq = WIFI_DEFAULT_FREQUENCY, .wiphy_freq_fixed = true, .mac = bssid, .ssid = ssid, .ssid_len = sizeof(ssid)};
  1128  
  1129  	for (int device_id = 0; device_id < WIFI_INITIAL_DEVICE_COUNT; device_id++) {
  1130  		// Virtual wifi devices will have consequtive mac addresses
  1131  		mac_addr[5] = device_id;
  1132  		int ret = hwsim80211_create_device(&nlmsg, sock, hwsim_family_id, mac_addr);
  1133  		if (ret < 0)
  1134  			failmsg("initialize_wifi_devices: failed to create device", "device=%d", device_id);
  1135  
  1136  		// For each device, unless HWSIM_ATTR_NO_VIF is passed, a network interface is created
  1137  		// automatically. Such interfaces are named "wlan0", "wlan1" and so on.
  1138  		char interface[6] = "wlan0";
  1139  		interface[4] += device_id;
  1140  
  1141  		if (nl80211_setup_ibss_interface(&nlmsg, sock, nl80211_family_id, interface, &ibss_props, true) < 0)
  1142  			failmsg("initialize_wifi_devices: failed set up IBSS network", "device=%d", device_id);
  1143  	}
  1144  
  1145  	// Wait for all devices to join the IBSS network
  1146  	for (int device_id = 0; device_id < WIFI_INITIAL_DEVICE_COUNT; device_id++) {
  1147  		char interface[6] = "wlan0";
  1148  		interface[4] += device_id;
  1149  		int ret = await_ifla_operstate(&nlmsg, interface, IF_OPER_UP, true);
  1150  		if (ret < 0)
  1151  			failmsg("initialize_wifi_devices: get_ifla_operstate failed",
  1152  				"device=%d, ret=%d", device_id, ret);
  1153  	}
  1154  
  1155  	close(sock);
  1156  }
  1157  #endif
  1158  
  1159  #if SYZ_EXECUTOR || (SYZ_NET_DEVICES && SYZ_NIC_VF) || SYZ_SWAP
  1160  static int runcmdline(char* cmdline)
  1161  {
  1162  	debug("%s\n", cmdline);
  1163  	int ret = system(cmdline);
  1164  	if (ret) {
  1165  		debug("FAIL: %s\n", cmdline);
  1166  	}
  1167  	return ret;
  1168  }
  1169  #endif
  1170  
  1171  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  1172  #include <arpa/inet.h>
  1173  #include <errno.h>
  1174  #include <fcntl.h>
  1175  #include <net/if.h>
  1176  #include <net/if_arp.h>
  1177  #include <stdarg.h>
  1178  #include <stdbool.h>
  1179  #include <sys/ioctl.h>
  1180  #include <sys/stat.h>
  1181  #include <sys/uio.h>
  1182  
  1183  #include <linux/if_ether.h>
  1184  #include <linux/if_tun.h>
  1185  #include <linux/ip.h>
  1186  #include <linux/tcp.h>
  1187  
  1188  // Addresses are chosen to be in the same subnet as tun addresses.
  1189  #define DEV_IPV4 "172.20.20.%d"
  1190  #define DEV_IPV6 "fe80::%02x"
  1191  #define DEV_MAC 0x00aaaaaaaaaa
  1192  
  1193  static void netdevsim_add(unsigned int addr, unsigned int port_count)
  1194  {
  1195  	// These devices are sticky and are not deleted on net namespace destruction.
  1196  	// So try to delete the previous version of the device.
  1197  	write_file("/sys/bus/netdevsim/del_device", "%u", addr);
  1198  	if (write_file("/sys/bus/netdevsim/new_device", "%u %u", addr, port_count)) {
  1199  		char buf[32];
  1200  		snprintf(buf, sizeof(buf), "netdevsim%d", addr);
  1201  		initialize_devlink_ports("netdevsim", buf, "netdevsim");
  1202  	}
  1203  }
  1204  
  1205  #define WG_GENL_NAME "wireguard"
  1206  enum wg_cmd {
  1207  	WG_CMD_GET_DEVICE,
  1208  	WG_CMD_SET_DEVICE,
  1209  };
  1210  enum wgdevice_attribute {
  1211  	WGDEVICE_A_UNSPEC,
  1212  	WGDEVICE_A_IFINDEX,
  1213  	WGDEVICE_A_IFNAME,
  1214  	WGDEVICE_A_PRIVATE_KEY,
  1215  	WGDEVICE_A_PUBLIC_KEY,
  1216  	WGDEVICE_A_FLAGS,
  1217  	WGDEVICE_A_LISTEN_PORT,
  1218  	WGDEVICE_A_FWMARK,
  1219  	WGDEVICE_A_PEERS,
  1220  };
  1221  enum wgpeer_attribute {
  1222  	WGPEER_A_UNSPEC,
  1223  	WGPEER_A_PUBLIC_KEY,
  1224  	WGPEER_A_PRESHARED_KEY,
  1225  	WGPEER_A_FLAGS,
  1226  	WGPEER_A_ENDPOINT,
  1227  	WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
  1228  	WGPEER_A_LAST_HANDSHAKE_TIME,
  1229  	WGPEER_A_RX_BYTES,
  1230  	WGPEER_A_TX_BYTES,
  1231  	WGPEER_A_ALLOWEDIPS,
  1232  	WGPEER_A_PROTOCOL_VERSION,
  1233  };
  1234  enum wgallowedip_attribute {
  1235  	WGALLOWEDIP_A_UNSPEC,
  1236  	WGALLOWEDIP_A_FAMILY,
  1237  	WGALLOWEDIP_A_IPADDR,
  1238  	WGALLOWEDIP_A_CIDR_MASK,
  1239  };
  1240  
  1241  static void netlink_wireguard_setup(void)
  1242  {
  1243  	const char ifname_a[] = "wg0";
  1244  	const char ifname_b[] = "wg1";
  1245  	const char ifname_c[] = "wg2";
  1246  	const char private_a[] = "\xa0\x5c\xa8\x4f\x6c\x9c\x8e\x38\x53\xe2\xfd\x7a\x70\xae\x0f\xb2\x0f\xa1\x52\x60\x0c\xb0\x08\x45\x17\x4f\x08\x07\x6f\x8d\x78\x43";
  1247  	const char private_b[] = "\xb0\x80\x73\xe8\xd4\x4e\x91\xe3\xda\x92\x2c\x22\x43\x82\x44\xbb\x88\x5c\x69\xe2\x69\xc8\xe9\xd8\x35\xb1\x14\x29\x3a\x4d\xdc\x6e";
  1248  	const char private_c[] = "\xa0\xcb\x87\x9a\x47\xf5\xbc\x64\x4c\x0e\x69\x3f\xa6\xd0\x31\xc7\x4a\x15\x53\xb6\xe9\x01\xb9\xff\x2f\x51\x8c\x78\x04\x2f\xb5\x42";
  1249  	const char public_a[] = "\x97\x5c\x9d\x81\xc9\x83\xc8\x20\x9e\xe7\x81\x25\x4b\x89\x9f\x8e\xd9\x25\xae\x9f\x09\x23\xc2\x3c\x62\xf5\x3c\x57\xcd\xbf\x69\x1c";
  1250  	const char public_b[] = "\xd1\x73\x28\x99\xf6\x11\xcd\x89\x94\x03\x4d\x7f\x41\x3d\xc9\x57\x63\x0e\x54\x93\xc2\x85\xac\xa4\x00\x65\xcb\x63\x11\xbe\x69\x6b";
  1251  	const char public_c[] = "\xf4\x4d\xa3\x67\xa8\x8e\xe6\x56\x4f\x02\x02\x11\x45\x67\x27\x08\x2f\x5c\xeb\xee\x8b\x1b\xf5\xeb\x73\x37\x34\x1b\x45\x9b\x39\x22";
  1252  	const uint16 listen_a = 20001;
  1253  	const uint16 listen_b = 20002;
  1254  	const uint16 listen_c = 20003;
  1255  	const uint16 af_inet = AF_INET;
  1256  	const uint16 af_inet6 = AF_INET6;
  1257  	// Unused, but useful in case we change this:
  1258  	// const struct sockaddr_in endpoint_a_v4 = {
  1259  	//     .sin_family = AF_INET,
  1260  	//     .sin_port = htons(listen_a),
  1261  	//     .sin_addr = {htonl(INADDR_LOOPBACK)}};
  1262  	const struct sockaddr_in endpoint_b_v4 = {
  1263  	    .sin_family = AF_INET,
  1264  	    .sin_port = htons(listen_b),
  1265  	    .sin_addr = {htonl(INADDR_LOOPBACK)}};
  1266  	const struct sockaddr_in endpoint_c_v4 = {
  1267  	    .sin_family = AF_INET,
  1268  	    .sin_port = htons(listen_c),
  1269  	    .sin_addr = {htonl(INADDR_LOOPBACK)}};
  1270  	struct sockaddr_in6 endpoint_a_v6 = {
  1271  	    .sin6_family = AF_INET6,
  1272  	    .sin6_port = htons(listen_a)};
  1273  	endpoint_a_v6.sin6_addr = in6addr_loopback;
  1274  	// Unused, but useful in case we change this:
  1275  	// const struct sockaddr_in6 endpoint_b_v6 = {
  1276  	//     .sin6_family = AF_INET6,
  1277  	//     .sin6_port = htons(listen_b)};
  1278  	// endpoint_b_v6.sin6_addr = in6addr_loopback;
  1279  	struct sockaddr_in6 endpoint_c_v6 = {
  1280  	    .sin6_family = AF_INET6,
  1281  	    .sin6_port = htons(listen_c)};
  1282  	endpoint_c_v6.sin6_addr = in6addr_loopback;
  1283  	const struct in_addr first_half_v4 = {0};
  1284  	const struct in_addr second_half_v4 = {(uint32)htonl(128 << 24)};
  1285  	const struct in6_addr first_half_v6 = {{{0}}};
  1286  	const struct in6_addr second_half_v6 = {{{0x80}}};
  1287  	const uint8 half_cidr = 1;
  1288  	const uint16 persistent_keepalives[] = {1, 3, 7, 9, 14, 19};
  1289  
  1290  	struct genlmsghdr genlhdr = {
  1291  	    .cmd = WG_CMD_SET_DEVICE,
  1292  	    .version = 1};
  1293  	int sock;
  1294  	int id, err;
  1295  
  1296  	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
  1297  	if (sock == -1) {
  1298  		debug("socket(AF_NETLINK) failed: %s\n", strerror(errno));
  1299  		return;
  1300  	}
  1301  
  1302  	id = netlink_query_family_id(&nlmsg, sock, WG_GENL_NAME, true);
  1303  	if (id == -1)
  1304  		goto error;
  1305  
  1306  	netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr));
  1307  	netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_a, strlen(ifname_a) + 1);
  1308  	netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_a, 32);
  1309  	netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_a, 2);
  1310  	netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS);
  1311  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1312  	netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_b, 32);
  1313  	netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_b_v4, sizeof(endpoint_b_v4));
  1314  	netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[0], 2);
  1315  	netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
  1316  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1317  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
  1318  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4));
  1319  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1320  	netlink_done(&nlmsg);
  1321  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1322  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
  1323  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6));
  1324  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1325  	netlink_done(&nlmsg);
  1326  	netlink_done(&nlmsg);
  1327  	netlink_done(&nlmsg);
  1328  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1329  	netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_c, 32);
  1330  	netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_c_v6, sizeof(endpoint_c_v6));
  1331  	netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[1], 2);
  1332  	netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
  1333  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1334  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
  1335  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4));
  1336  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1337  	netlink_done(&nlmsg);
  1338  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1339  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
  1340  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6));
  1341  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1342  	netlink_done(&nlmsg);
  1343  	netlink_done(&nlmsg);
  1344  	netlink_done(&nlmsg);
  1345  	netlink_done(&nlmsg);
  1346  	err = netlink_send(&nlmsg, sock);
  1347  	if (err < 0) {
  1348  		debug("netlink: failed to setup wireguard instance: %s\n", strerror(errno));
  1349  	}
  1350  
  1351  	netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr));
  1352  	netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_b, strlen(ifname_b) + 1);
  1353  	netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_b, 32);
  1354  	netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_b, 2);
  1355  	netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS);
  1356  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1357  	netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_a, 32);
  1358  	netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_a_v6, sizeof(endpoint_a_v6));
  1359  	netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[2], 2);
  1360  	netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
  1361  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1362  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
  1363  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4));
  1364  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1365  	netlink_done(&nlmsg);
  1366  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1367  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
  1368  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6));
  1369  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1370  	netlink_done(&nlmsg);
  1371  	netlink_done(&nlmsg);
  1372  	netlink_done(&nlmsg);
  1373  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1374  	netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_c, 32);
  1375  	netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_c_v4, sizeof(endpoint_c_v4));
  1376  	netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[3], 2);
  1377  	netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
  1378  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1379  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
  1380  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4));
  1381  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1382  	netlink_done(&nlmsg);
  1383  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1384  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
  1385  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6));
  1386  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1387  	netlink_done(&nlmsg);
  1388  	netlink_done(&nlmsg);
  1389  	netlink_done(&nlmsg);
  1390  	netlink_done(&nlmsg);
  1391  	err = netlink_send(&nlmsg, sock);
  1392  	if (err < 0) {
  1393  		debug("netlink: failed to setup wireguard instance: %s\n", strerror(errno));
  1394  	}
  1395  
  1396  	netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr));
  1397  	netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_c, strlen(ifname_c) + 1);
  1398  	netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_c, 32);
  1399  	netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_c, 2);
  1400  	netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS);
  1401  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1402  	netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_a, 32);
  1403  	netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_a_v6, sizeof(endpoint_a_v6));
  1404  	netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[4], 2);
  1405  	netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
  1406  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1407  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
  1408  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4));
  1409  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1410  	netlink_done(&nlmsg);
  1411  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1412  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
  1413  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6));
  1414  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1415  	netlink_done(&nlmsg);
  1416  	netlink_done(&nlmsg);
  1417  	netlink_done(&nlmsg);
  1418  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1419  	netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_b, 32);
  1420  	netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_b_v4, sizeof(endpoint_b_v4));
  1421  	netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[5], 2);
  1422  	netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS);
  1423  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1424  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2);
  1425  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4));
  1426  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1427  	netlink_done(&nlmsg);
  1428  	netlink_nest(&nlmsg, NLA_F_NESTED | 0);
  1429  	netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2);
  1430  	netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6));
  1431  	netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1);
  1432  	netlink_done(&nlmsg);
  1433  	netlink_done(&nlmsg);
  1434  	netlink_done(&nlmsg);
  1435  	netlink_done(&nlmsg);
  1436  	err = netlink_send(&nlmsg, sock);
  1437  	if (err < 0) {
  1438  		debug("netlink: failed to setup wireguard instance: %s\n", strerror(errno));
  1439  	}
  1440  
  1441  error:
  1442  	close(sock);
  1443  }
  1444  
  1445  #if SYZ_EXECUTOR || SYZ_NIC_VF
  1446  
  1447  static void netlink_nicvf_setup(void)
  1448  {
  1449  	char cmdline[256];
  1450  
  1451  #if SYZ_EXECUTOR
  1452  	if (!flag_nic_vf)
  1453  		return;
  1454  #endif
  1455  	if (!vf_intf.ppid)
  1456  		return;
  1457  
  1458  	debug("ppid = %d, vf_intf.pass_thru_intf: %s\n",
  1459  	      vf_intf.ppid, vf_intf.pass_thru_intf);
  1460  
  1461  	sprintf(cmdline, "nsenter -t 1 -n ip link set %s netns %d",
  1462  		vf_intf.pass_thru_intf, getpid());
  1463  	if (runcmdline(cmdline))
  1464  		failmsg("failed to run command", "%s", cmdline);
  1465  	sprintf(cmdline, "ip a s %s", vf_intf.pass_thru_intf);
  1466  	if (runcmdline(cmdline))
  1467  		failmsg("failed to run command", "%s", cmdline);
  1468  	sprintf(cmdline, "ip link set %s down", vf_intf.pass_thru_intf);
  1469  	if (runcmdline(cmdline))
  1470  		failmsg("failed to run command", "%s", cmdline);
  1471  	sprintf(cmdline, "ip link set %s name nicvf0", vf_intf.pass_thru_intf);
  1472  	if (runcmdline(cmdline))
  1473  		failmsg("failed to run command", "%s", cmdline);
  1474  	debug("nicvf0 VF pass-through setup complete.\n");
  1475  }
  1476  #endif // SYZ_NIC_VF
  1477  
  1478  // We test in a separate namespace, which does not have any network devices initially (even lo).
  1479  // Create/up as many as we can.
  1480  static void initialize_netdevices(void)
  1481  {
  1482  #if SYZ_EXECUTOR
  1483  	if (!flag_net_devices)
  1484  		return;
  1485  #endif
  1486  	// TODO: add the following devices:
  1487  	// - vxlan
  1488  	// - ipip
  1489  	// - lowpan (requires link to device of type IEEE802154, e.g. wpan0)
  1490  	// - ipoib (requires link to device of type ARPHRD_INFINIBAND)
  1491  	// - vrf
  1492  	// - rmnet
  1493  	// - openvswitch
  1494  	// Naive attempts to add devices of these types fail with various errors.
  1495  	// Also init namespace contains the following devices (which presumably can't be
  1496  	// created in non-init namespace), can we use them somehow?
  1497  	// - ifb0/1
  1498  	// - teql0
  1499  	// - eql
  1500  	// Note: netdevsim devices can't have the same name even in different namespaces.
  1501  	char netdevsim[16];
  1502  	sprintf(netdevsim, "netdevsim%d", (int)procid);
  1503  	struct {
  1504  		const char* type;
  1505  		const char* dev;
  1506  	} devtypes[] = {
  1507  		// Note: ip6erspan device can't be added if ip6gretap exists in the same namespace.
  1508  		{"ip6gretap", "ip6gretap0"},
  1509  		{"bridge", "bridge0"},
  1510  		{"vcan", "vcan0"},
  1511  		{"bond", "bond0"},
  1512  		{"team", "team0"},
  1513  		{"dummy", "dummy0"},
  1514  #if SYZ_EXECUTOR || SYZ_NIC_VF
  1515  		{"nicvf", "nicvf0"},
  1516  #endif
  1517  		{"nlmon", "nlmon0"},
  1518  		{"caif", "caif0"},
  1519  		{"batadv", "batadv0"},
  1520  		// Note: this adds vxcan0/vxcan1 pair, similar to veth (creating vxcan0 would fail).
  1521  		{"vxcan", "vxcan1"},
  1522  		// This adds connected veth0 and veth1 devices.
  1523  		{"veth", 0},
  1524  		{"wireguard", "wg0"},
  1525  		{"wireguard", "wg1"},
  1526  		{"wireguard", "wg2"},
  1527  	};
  1528  	const char* devmasters[] = {"bridge", "bond", "team", "batadv"};
  1529  	// If you extend this array, also update netdev_addr_id in vnet.txt
  1530  	// and devnames in socket.txt.
  1531  	struct {
  1532  		const char* name;
  1533  		int macsize;
  1534  		bool noipv6;
  1535  	} devices[] = {
  1536  		{"lo", ETH_ALEN},
  1537  		{"sit0", 0},
  1538  		{"bridge0", ETH_ALEN},
  1539  		{"vcan0", 0, true},
  1540  		{"tunl0", 0},
  1541  		{"gre0", 0},
  1542  		{"gretap0", ETH_ALEN},
  1543  		{"ip_vti0", 0},
  1544  		{"ip6_vti0", 0},
  1545  		{"ip6tnl0", 0},
  1546  		{"ip6gre0", 0},
  1547  		{"ip6gretap0", ETH_ALEN},
  1548  		{"erspan0", ETH_ALEN},
  1549  		{"bond0", ETH_ALEN},
  1550  		{"veth0", ETH_ALEN},
  1551  		{"veth1", ETH_ALEN},
  1552  		{"team0", ETH_ALEN},
  1553  		{"veth0_to_bridge", ETH_ALEN},
  1554  		{"veth1_to_bridge", ETH_ALEN},
  1555  		{"veth0_to_bond", ETH_ALEN},
  1556  		{"veth1_to_bond", ETH_ALEN},
  1557  		{"veth0_to_team", ETH_ALEN},
  1558  		{"veth1_to_team", ETH_ALEN},
  1559  		{"veth0_to_hsr", ETH_ALEN},
  1560  		{"veth1_to_hsr", ETH_ALEN},
  1561  		{"hsr0", 0},
  1562  		{"dummy0", ETH_ALEN},
  1563  #if SYZ_EXECUTOR || SYZ_NIC_VF
  1564  		{"nicvf0", 0, true},
  1565  #endif
  1566  		{"nlmon0", 0},
  1567  		{"vxcan0", 0, true},
  1568  		{"vxcan1", 0, true},
  1569  		{"caif0", ETH_ALEN}, // TODO: up'ing caif fails with ENODEV
  1570  		{"batadv0", ETH_ALEN},
  1571  		{netdevsim, ETH_ALEN},
  1572  		{"xfrm0", ETH_ALEN},
  1573  		{"veth0_virt_wifi", ETH_ALEN},
  1574  		{"veth1_virt_wifi", ETH_ALEN},
  1575  		{"virt_wifi0", ETH_ALEN},
  1576  		{"veth0_vlan", ETH_ALEN},
  1577  		{"veth1_vlan", ETH_ALEN},
  1578  		{"vlan0", ETH_ALEN},
  1579  		{"vlan1", ETH_ALEN},
  1580  		{"macvlan0", ETH_ALEN},
  1581  		{"macvlan1", ETH_ALEN},
  1582  		{"ipvlan0", ETH_ALEN},
  1583  		{"ipvlan1", ETH_ALEN},
  1584  		{"veth0_macvtap", ETH_ALEN},
  1585  		{"veth1_macvtap", ETH_ALEN},
  1586  		{"macvtap0", ETH_ALEN},
  1587  		{"macsec0", ETH_ALEN},
  1588  		{"veth0_to_batadv", ETH_ALEN},
  1589  		{"veth1_to_batadv", ETH_ALEN},
  1590  		{"batadv_slave_0", ETH_ALEN},
  1591  		{"batadv_slave_1", ETH_ALEN},
  1592  		{"geneve0", ETH_ALEN},
  1593  		{"geneve1", ETH_ALEN},
  1594  		{"wg0", 0},
  1595  		{"wg1", 0},
  1596  		{"wg2", 0},
  1597  	};
  1598  	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  1599  	if (sock == -1)
  1600  		fail("socket(AF_NETLINK) failed");
  1601  	unsigned i;
  1602  	for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++)
  1603  		netlink_add_device(&nlmsg, sock, devtypes[i].type, devtypes[i].dev);
  1604  	// This creates connected bridge/bond/team_slave devices of type veth,
  1605  	// and makes them slaves of bridge/bond/team devices, respectively.
  1606  	// Note: slave devices don't need MAC/IP addresses, only master devices.
  1607  	//       veth0_to_* is not slave devices, which still need ip addresses.
  1608  	for (i = 0; i < sizeof(devmasters) / (sizeof(devmasters[0])); i++) {
  1609  		char master[32], slave0[32], veth0[32], slave1[32], veth1[32];
  1610  		sprintf(slave0, "%s_slave_0", devmasters[i]);
  1611  		sprintf(veth0, "veth0_to_%s", devmasters[i]);
  1612  		netlink_add_veth(&nlmsg, sock, slave0, veth0);
  1613  		sprintf(slave1, "%s_slave_1", devmasters[i]);
  1614  		sprintf(veth1, "veth1_to_%s", devmasters[i]);
  1615  		netlink_add_veth(&nlmsg, sock, slave1, veth1);
  1616  		sprintf(master, "%s0", devmasters[i]);
  1617  		netlink_device_change(&nlmsg, sock, slave0, false, master, 0, 0, NULL);
  1618  		netlink_device_change(&nlmsg, sock, slave1, false, master, 0, 0, NULL);
  1619  	}
  1620  	netlink_add_xfrm(&nlmsg, sock, "xfrm0");
  1621  
  1622  	// bond/team_slave_* will set up automatically when set their master.
  1623  	// But bridge_slave_* need to set up manually.
  1624  	netlink_device_change(&nlmsg, sock, "bridge_slave_0", true, 0, 0, 0, NULL);
  1625  	netlink_device_change(&nlmsg, sock, "bridge_slave_1", true, 0, 0, 0, NULL);
  1626  
  1627  	// Setup hsr device (slightly different from what we do for devmasters).
  1628  	netlink_add_veth(&nlmsg, sock, "hsr_slave_0", "veth0_to_hsr");
  1629  	netlink_add_veth(&nlmsg, sock, "hsr_slave_1", "veth1_to_hsr");
  1630  	netlink_add_hsr(&nlmsg, sock, "hsr0", "hsr_slave_0", "hsr_slave_1");
  1631  	netlink_device_change(&nlmsg, sock, "hsr_slave_0", true, 0, 0, 0, NULL);
  1632  	netlink_device_change(&nlmsg, sock, "hsr_slave_1", true, 0, 0, 0, NULL);
  1633  
  1634  	netlink_add_veth(&nlmsg, sock, "veth0_virt_wifi", "veth1_virt_wifi");
  1635  	netlink_add_linked(&nlmsg, sock, "virt_wifi", "virt_wifi0", "veth1_virt_wifi");
  1636  
  1637  	netlink_add_veth(&nlmsg, sock, "veth0_vlan", "veth1_vlan");
  1638  	netlink_add_vlan(&nlmsg, sock, "vlan0", "veth0_vlan", 0, htons(ETH_P_8021Q));
  1639  	netlink_add_vlan(&nlmsg, sock, "vlan1", "veth0_vlan", 1, htons(ETH_P_8021AD));
  1640  	netlink_add_macvlan(&nlmsg, sock, "macvlan0", "veth1_vlan");
  1641  	netlink_add_macvlan(&nlmsg, sock, "macvlan1", "veth1_vlan");
  1642  	netlink_add_ipvlan(&nlmsg, sock, "ipvlan0", "veth0_vlan", IPVLAN_MODE_L2, 0);
  1643  	netlink_add_ipvlan(&nlmsg, sock, "ipvlan1", "veth0_vlan", IPVLAN_MODE_L3S, IPVLAN_F_VEPA);
  1644  
  1645  	netlink_add_veth(&nlmsg, sock, "veth0_macvtap", "veth1_macvtap");
  1646  	netlink_add_linked(&nlmsg, sock, "macvtap", "macvtap0", "veth0_macvtap");
  1647  	netlink_add_linked(&nlmsg, sock, "macsec", "macsec0", "veth1_macvtap");
  1648  
  1649  	char addr[32];
  1650  	sprintf(addr, DEV_IPV4, 14 + 10); // should point to veth0
  1651  	struct in_addr geneve_addr4;
  1652  	if (inet_pton(AF_INET, addr, &geneve_addr4) <= 0)
  1653  		fail("geneve0 inet_pton failed");
  1654  	struct in6_addr geneve_addr6;
  1655  	// Must not be link local (our device addresses are link local).
  1656  	if (inet_pton(AF_INET6, "fc00::01", &geneve_addr6) <= 0)
  1657  		fail("geneve1 inet_pton failed");
  1658  	netlink_add_geneve(&nlmsg, sock, "geneve0", 0, &geneve_addr4, 0);
  1659  	netlink_add_geneve(&nlmsg, sock, "geneve1", 1, 0, &geneve_addr6);
  1660  
  1661  	netdevsim_add((int)procid, 4); // Number of port is in sync with value in sys/linux/socket_netlink_generic_devlink.txt
  1662  
  1663  	netlink_wireguard_setup();
  1664  
  1665  #if SYZ_EXECUTOR || SYZ_NIC_VF
  1666  	netlink_nicvf_setup();
  1667  #endif
  1668  
  1669  	for (i = 0; i < sizeof(devices) / (sizeof(devices[0])); i++) {
  1670  		// Assign some unique address to devices. Some devices won't up without this.
  1671  		// Shift addresses by 10 because 0 subnet address can mean special things.
  1672  		char addr[32];
  1673  		sprintf(addr, DEV_IPV4, i + 10);
  1674  		netlink_add_addr4(&nlmsg, sock, devices[i].name, addr);
  1675  		if (!devices[i].noipv6) {
  1676  			sprintf(addr, DEV_IPV6, i + 10);
  1677  			netlink_add_addr6(&nlmsg, sock, devices[i].name, addr);
  1678  		}
  1679  		uint64 macaddr = DEV_MAC + ((i + 10ull) << 40);
  1680  		netlink_device_change(&nlmsg, sock, devices[i].name, true, 0, &macaddr, devices[i].macsize, NULL);
  1681  	}
  1682  	close(sock);
  1683  }
  1684  
  1685  // Same as initialize_netdevices, but called in init net namespace.
  1686  static void initialize_netdevices_init(void)
  1687  {
  1688  #if SYZ_EXECUTOR
  1689  	if (!flag_net_devices)
  1690  		return;
  1691  #endif
  1692  	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  1693  	if (sock == -1)
  1694  		fail("socket(AF_NETLINK) failed");
  1695  	struct {
  1696  		const char* type;
  1697  		int macsize;
  1698  		bool noipv6;
  1699  		bool noup;
  1700  	} devtypes[] = {
  1701  	    // NETROM device, see net/netrom/{af_netrom,nr_dev}.c
  1702  	    {"nr", 7, true},
  1703  	    // ROSE device, see net/rose/{af_rose,rose_dev}.c
  1704  	    // We don't up it yet because it crashes kernel right away:
  1705  	    // https://groups.google.com/d/msg/syzkaller/v-4B3zoBC-4/02SCKEzJBwAJ
  1706  	    {"rose", 5, true, true},
  1707  	};
  1708  	unsigned i;
  1709  	for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++) {
  1710  		char dev[32], addr[32];
  1711  		sprintf(dev, "%s%d", devtypes[i].type, (int)procid);
  1712  		// Note: syscall descriptions know these addresses.
  1713  		sprintf(addr, "172.30.%d.%d", i, (int)procid + 1);
  1714  		netlink_add_addr4(&nlmsg, sock, dev, addr);
  1715  		if (!devtypes[i].noipv6) {
  1716  			sprintf(addr, "fe88::%02x:%02x", i, (int)procid + 1);
  1717  			netlink_add_addr6(&nlmsg, sock, dev, addr);
  1718  		}
  1719  		int macsize = devtypes[i].macsize;
  1720  		uint64 macaddr = 0xbbbbbb + ((unsigned long long)i << (8 * (macsize - 2))) +
  1721  				 (procid << (8 * (macsize - 1)));
  1722  		netlink_device_change(&nlmsg, sock, dev, !devtypes[i].noup, 0, &macaddr, macsize, NULL);
  1723  	}
  1724  	close(sock);
  1725  
  1726  #if SYZ_EXECUTOR || SYZ_NIC_VF
  1727  	find_vf_interface();
  1728  #endif
  1729  }
  1730  #endif
  1731  
  1732  #if SYZ_EXECUTOR || SYZ_NET_INJECTION && (__NR_syz_extract_tcp_res || SYZ_REPEAT)
  1733  #include <errno.h>
  1734  
  1735  static int read_tun(char* data, int size)
  1736  {
  1737  	if (tunfd < 0)
  1738  		return -1;
  1739  
  1740  	int rv = read(tunfd, data, size);
  1741  	if (rv < 0) {
  1742  		// EBADF can be returned if the test closes tunfd with close_range syscall.
  1743  		// Tun sometimes returns EBADFD, unclear if it's a kernel bug or not.
  1744  		if (errno == EAGAIN || errno == EBADF || errno == EBADFD)
  1745  			return -1;
  1746  		fail("tun read failed");
  1747  	}
  1748  	return rv;
  1749  }
  1750  #endif
  1751  
  1752  #if SYZ_EXECUTOR || __NR_syz_emit_ethernet && SYZ_NET_INJECTION
  1753  #include <stdbool.h>
  1754  #include <sys/uio.h>
  1755  
  1756  #if ENABLE_NAPI_FRAGS
  1757  #define MAX_FRAGS 4
  1758  struct vnet_fragmentation {
  1759  	uint32 full;
  1760  	uint32 count;
  1761  	uint32 frags[MAX_FRAGS];
  1762  };
  1763  #endif
  1764  
  1765  static long syz_emit_ethernet(volatile long a0, volatile long a1, volatile long a2)
  1766  {
  1767  	// syz_emit_ethernet(len len[packet], packet ptr[in, eth_packet], frags ptr[in, vnet_fragmentation, opt])
  1768  	// vnet_fragmentation {
  1769  	// 	full	int32[0:1]
  1770  	// 	count	int32[1:4]
  1771  	// 	frags	array[int32[0:4096], 4]
  1772  	// }
  1773  	if (tunfd < 0)
  1774  		return (uintptr_t)-1;
  1775  
  1776  	uint32 length = a0;
  1777  	char* data = (char*)a1;
  1778  	debug_dump_data(data, length);
  1779  
  1780  #if ENABLE_NAPI_FRAGS
  1781  	struct vnet_fragmentation* frags = (struct vnet_fragmentation*)a2;
  1782  	struct iovec vecs[MAX_FRAGS + 1];
  1783  	uint32 nfrags = 0;
  1784  	if (!tun_frags_enabled || frags == NULL) {
  1785  		vecs[nfrags].iov_base = data;
  1786  		vecs[nfrags].iov_len = length;
  1787  		nfrags++;
  1788  	} else {
  1789  		bool full = frags->full;
  1790  		uint32 count = frags->count;
  1791  		if (count > MAX_FRAGS)
  1792  			count = MAX_FRAGS;
  1793  		uint32 i;
  1794  		for (i = 0; i < count && length != 0; i++) {
  1795  			uint32 size = frags->frags[i];
  1796  			if (size > length)
  1797  				size = length;
  1798  			vecs[nfrags].iov_base = data;
  1799  			vecs[nfrags].iov_len = size;
  1800  			nfrags++;
  1801  			data += size;
  1802  			length -= size;
  1803  		}
  1804  		if (length != 0 && (full || nfrags == 0)) {
  1805  			vecs[nfrags].iov_base = data;
  1806  			vecs[nfrags].iov_len = length;
  1807  			nfrags++;
  1808  		}
  1809  	}
  1810  	return writev(tunfd, vecs, nfrags);
  1811  #else
  1812  	return write(tunfd, data, length);
  1813  #endif
  1814  }
  1815  #endif
  1816  
  1817  #if SYZ_EXECUTOR || __NR_syz_io_uring_submit || __NR_syz_io_uring_complete || __NR_syz_io_uring_setup
  1818  
  1819  #define SIZEOF_IO_URING_SQE 64
  1820  #define SIZEOF_IO_URING_CQE 16
  1821  
  1822  // Once a io_uring is set up by calling io_uring_setup, the offsets to the member fields
  1823  // to be used on the mmap'ed area are set in structs io_sqring_offsets and io_cqring_offsets.
  1824  // Except io_sqring_offsets.array, the offsets are static while all depend on how struct io_rings
  1825  // is organized in code. The offsets can be marked as resources in syzkaller descriptions but
  1826  // this makes it difficult to generate correct programs by the fuzzer. Thus, the offsets are
  1827  // hard-coded here (and in the descriptions), and array offset is later computed once the number
  1828  // of entries is available. Another way to obtain the offsets is to setup another io_uring here
  1829  // and use what it returns. It is slower but might be more maintainable.
  1830  #define SQ_HEAD_OFFSET 0
  1831  #define SQ_TAIL_OFFSET 64
  1832  #define SQ_RING_MASK_OFFSET 256
  1833  #define SQ_RING_ENTRIES_OFFSET 264
  1834  #define SQ_FLAGS_OFFSET 276
  1835  #define SQ_DROPPED_OFFSET 272
  1836  #define CQ_HEAD_OFFSET 128
  1837  #define CQ_TAIL_OFFSET 192
  1838  #define CQ_RING_MASK_OFFSET 260
  1839  #define CQ_RING_ENTRIES_OFFSET 268
  1840  #define CQ_RING_OVERFLOW_OFFSET 284
  1841  #define CQ_FLAGS_OFFSET 280
  1842  #define CQ_CQES_OFFSET 320
  1843  
  1844  #if SYZ_EXECUTOR || __NR_syz_io_uring_complete
  1845  
  1846  // From linux/io_uring.h
  1847  struct io_uring_cqe {
  1848  	uint64 user_data;
  1849  	uint32 res;
  1850  	uint32 flags;
  1851  };
  1852  
  1853  static long syz_io_uring_complete(volatile long a0)
  1854  {
  1855  	// syzlang: syz_io_uring_complete(ring_ptr ring_ptr)
  1856  	// C:       syz_io_uring_complete(char* ring_ptr)
  1857  
  1858  	// It is not checked if the ring is empty
  1859  
  1860  	// Cast to original
  1861  	char* ring_ptr = (char*)a0;
  1862  
  1863  	// Compute the head index and the next head value
  1864  	uint32 cq_ring_mask = *(uint32*)(ring_ptr + CQ_RING_MASK_OFFSET);
  1865  	uint32* cq_head_ptr = (uint32*)(ring_ptr + CQ_HEAD_OFFSET);
  1866  	uint32 cq_head = *cq_head_ptr & cq_ring_mask;
  1867  	uint32 cq_head_next = *cq_head_ptr + 1;
  1868  
  1869  	// Compute the ptr to the src cq entry on the ring
  1870  	char* cqe_src = ring_ptr + CQ_CQES_OFFSET + cq_head * SIZEOF_IO_URING_CQE;
  1871  
  1872  	// Get the cq entry from the ring
  1873  	struct io_uring_cqe cqe;
  1874  	memcpy(&cqe, cqe_src, sizeof(cqe));
  1875  
  1876  	// Advance the head. Head is a free-flowing integer and relies on natural wrapping.
  1877  	// Ensure that the kernel will never see a head update without the preceeding CQE
  1878  	// stores being done.
  1879  	__atomic_store_n(cq_head_ptr, cq_head_next, __ATOMIC_RELEASE);
  1880  
  1881  	// In the descriptions (sys/linux/io_uring.txt), openat and openat2 are passed
  1882  	// with a unique range of sqe.user_data (0x12345 and 0x23456) to identify the operations
  1883  	// which produces an fd instance. Check cqe.user_data, which should be the same
  1884  	// as sqe.user_data for that operation. If it falls in that unique range, return
  1885  	// cqe.res as fd. Otherwise, just return an invalid fd.
  1886  	return (cqe.user_data == 0x12345 || cqe.user_data == 0x23456) ? (long)cqe.res : (long)-1;
  1887  }
  1888  
  1889  #endif
  1890  
  1891  #if SYZ_EXECUTOR || __NR_syz_io_uring_setup
  1892  
  1893  struct io_sqring_offsets {
  1894  	uint32 head;
  1895  	uint32 tail;
  1896  	uint32 ring_mask;
  1897  	uint32 ring_entries;
  1898  	uint32 flags;
  1899  	uint32 dropped;
  1900  	uint32 array;
  1901  	uint32 resv1;
  1902  	uint64 resv2;
  1903  };
  1904  
  1905  struct io_cqring_offsets {
  1906  	uint32 head;
  1907  	uint32 tail;
  1908  	uint32 ring_mask;
  1909  	uint32 ring_entries;
  1910  	uint32 overflow;
  1911  	uint32 cqes;
  1912  	uint64 resv[2];
  1913  };
  1914  
  1915  struct io_uring_params {
  1916  	uint32 sq_entries;
  1917  	uint32 cq_entries;
  1918  	uint32 flags;
  1919  	uint32 sq_thread_cpu;
  1920  	uint32 sq_thread_idle;
  1921  	uint32 features;
  1922  	uint32 resv[4];
  1923  	struct io_sqring_offsets sq_off;
  1924  	struct io_cqring_offsets cq_off;
  1925  };
  1926  
  1927  #define IORING_OFF_SQ_RING 0
  1928  #define IORING_OFF_SQES 0x10000000ULL
  1929  #define IORING_SETUP_SQE128 (1U << 10)
  1930  #define IORING_SETUP_CQE32 (1U << 11)
  1931  
  1932  #include <sys/mman.h>
  1933  #include <unistd.h>
  1934  
  1935  // Wrapper for io_uring_setup and the subsequent mmap calls that map the ring and the sqes
  1936  static long syz_io_uring_setup(volatile long a0, volatile long a1, volatile long a2, volatile long a3)
  1937  {
  1938  	// syzlang: syz_io_uring_setup(entries int32[1:IORING_MAX_ENTRIES], params ptr[inout, io_uring_params], ring_ptr ptr[out, ring_ptr], sqes_ptr ptr[out, sqes_ptr]) fd_io_uring
  1939  	// C:       syz_io_uring_setup(uint32 entries, struct io_uring_params* params, void** ring_ptr_out, void** sqes_ptr_out) // returns uint32 fd_io_uring
  1940  
  1941  	// Cast to original
  1942  	uint32 entries = (uint32)a0;
  1943  	struct io_uring_params* setup_params = (struct io_uring_params*)a1;
  1944  	void** ring_ptr_out = (void**)a2;
  1945  	void** sqes_ptr_out = (void**)a3;
  1946  	// Temporarily disable IORING_SETUP_CQE32 and IORING_SETUP_SQE128 that may change SIZEOF_IO_URING_CQE and SIZEOF_IO_URING_SQE.
  1947  	// Tracking bug: https://github.com/google/syzkaller/issues/4531.
  1948  	setup_params->flags &= ~(IORING_SETUP_CQE32 | IORING_SETUP_SQE128);
  1949  	uint32 fd_io_uring = syscall(__NR_io_uring_setup, entries, setup_params);
  1950  
  1951  	// Compute the ring sizes
  1952  	uint32 sq_ring_sz = setup_params->sq_off.array + setup_params->sq_entries * sizeof(uint32);
  1953  	uint32 cq_ring_sz = setup_params->cq_off.cqes + setup_params->cq_entries * SIZEOF_IO_URING_CQE;
  1954  
  1955  	// Asssumed IORING_FEAT_SINGLE_MMAP, which is always the case with the current implementation
  1956  	// The implication is that the sq_ring_ptr and the cq_ring_ptr are the same but the
  1957  	// difference is in the offsets to access the fields of these rings.
  1958  	uint32 ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz;
  1959  	*ring_ptr_out = mmap(0, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd_io_uring, IORING_OFF_SQ_RING);
  1960  
  1961  	uint32 sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE;
  1962  	*sqes_ptr_out = mmap(0, sqes_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd_io_uring, IORING_OFF_SQES);
  1963  
  1964  	uint32* array = (uint32*)((uintptr_t)*ring_ptr_out + setup_params->sq_off.array);
  1965  	for (uint32 index = 0; index < entries; index++)
  1966  		array[index] = index;
  1967  
  1968  	return fd_io_uring;
  1969  }
  1970  
  1971  #endif
  1972  
  1973  #if SYZ_EXECUTOR || __NR_syz_io_uring_submit
  1974  
  1975  static long syz_io_uring_submit(volatile long a0, volatile long a1, volatile long a2)
  1976  {
  1977  	// syzlang: syz_io_uring_submit(ring_ptr ring_ptr, sqes_ptr sqes_ptr, 		sqe ptr[in, io_uring_sqe])
  1978  	// C:       syz_io_uring_submit(char* ring_ptr,       io_uring_sqe* sqes_ptr,    io_uring_sqe* sqe)
  1979  
  1980  	// It is not checked if the ring is full
  1981  
  1982  	// Cast to original
  1983  	char* ring_ptr = (char*)a0; // This will be exposed to offsets in bytes
  1984  	char* sqes_ptr = (char*)a1;
  1985  
  1986  	char* sqe = (char*)a2;
  1987  
  1988  	uint32 sq_ring_mask = *(uint32*)(ring_ptr + SQ_RING_MASK_OFFSET);
  1989  	uint32* sq_tail_ptr = (uint32*)(ring_ptr + SQ_TAIL_OFFSET);
  1990  	uint32 sq_tail = *sq_tail_ptr & sq_ring_mask;
  1991  
  1992  	// Get the ptr to the destination for the sqe
  1993  	char* sqe_dest = sqes_ptr + sq_tail * SIZEOF_IO_URING_SQE;
  1994  
  1995  	// Write the sqe entry to its destination in sqes
  1996  	memcpy(sqe_dest, sqe, SIZEOF_IO_URING_SQE);
  1997  
  1998  	// Write the index to the sqe array
  1999  	uint32 sq_tail_next = *sq_tail_ptr + 1;
  2000  
  2001  	// Advance the tail. Tail is a free-flowing integer and relies on natural wrapping.
  2002  	// Ensure that the kernel will never see a tail update without the preceeding SQE
  2003  	// stores being done.
  2004  	__atomic_store_n(sq_tail_ptr, sq_tail_next, __ATOMIC_RELEASE);
  2005  
  2006  	// Now the application is free to call io_uring_enter() to submit the sqe
  2007  	return 0;
  2008  }
  2009  
  2010  #endif
  2011  
  2012  #endif
  2013  
  2014  #if SYZ_EXECUTOR || __NR_syz_usbip_server_init
  2015  
  2016  #include <errno.h>
  2017  #include <fcntl.h>
  2018  #include <linux/usb/ch9.h>
  2019  #include <stdbool.h>
  2020  #include <stdio.h>
  2021  #include <stdlib.h>
  2022  #include <string.h>
  2023  #include <sys/socket.h>
  2024  #include <unistd.h>
  2025  
  2026  // This should be coherent with CONFIG_USBIP_VHCI_HC_PORTS.
  2027  #define VHCI_HC_PORTS 8
  2028  #define VHCI_PORTS (VHCI_HC_PORTS * 2)
  2029  
  2030  static long syz_usbip_server_init(volatile long a0)
  2031  {
  2032  	// port_alloc[0] corresponds to ports which can be used by usb2 and
  2033  	// port_alloc[1] corresponds to ports which can be used by usb3.
  2034  	static int port_alloc[2];
  2035  
  2036  	int speed = (int)a0;
  2037  	bool usb3 = (speed == USB_SPEED_SUPER);
  2038  
  2039  	int socket_pair[2];
  2040  	if (socketpair(AF_UNIX, SOCK_STREAM, 0, socket_pair)) {
  2041  		// This can happen if the test calls prlimit(RLIMIT_AS).
  2042  		debug("syz_usbip_server_init: socketpair failed (%d)\n", errno);
  2043  		return -1;
  2044  	}
  2045  
  2046  	int client_fd = socket_pair[0];
  2047  	int server_fd = socket_pair[1];
  2048  
  2049  	int available_port_num = __atomic_fetch_add(&port_alloc[usb3], 1, __ATOMIC_RELAXED);
  2050  	if (available_port_num > VHCI_HC_PORTS) {
  2051  		debug("syz_usbip_server_init : no more available port for : %d\n", available_port_num);
  2052  		return -1;
  2053  	}
  2054  
  2055  	// Each port number corresponds to a particular vhci_hcd (USB/IP Virtual Host Controller) and it is used by either
  2056  	// an usb2 device or usb3 device. There are 16 ports available in each vhci_hcd.
  2057  	// (VHCI_PORTS = 16 in our case.) When they are occupied, the following vhci_hcd's ports are used.
  2058  	// First 16 ports correspond to vhci_hcd0, next 16 ports correspond to
  2059  	// vhci_hcd1 etc. In a vhci_hcd, first 8 ports are used by usb2 devices and last 8 are used by usb3 devices.
  2060  	int port_num = procid * VHCI_PORTS + usb3 * VHCI_HC_PORTS + available_port_num;
  2061  
  2062  	// Under normal USB/IP usage, devid represents the device ID on the server.
  2063  	// When fuzzing with syzkaller we don't have an actual server or an actual device, so use 0 for devid.
  2064  	char buffer[100];
  2065  	sprintf(buffer, "%d %d %s %d", port_num, client_fd, "0", speed);
  2066  
  2067  	write_file("/sys/devices/platform/vhci_hcd.0/attach", buffer);
  2068  	return server_fd;
  2069  }
  2070  
  2071  #endif
  2072  
  2073  #if SYZ_EXECUTOR || __NR_syz_btf_id_by_name
  2074  
  2075  #include <errno.h>
  2076  #include <fcntl.h>
  2077  #include <stdbool.h>
  2078  #include <stddef.h>
  2079  #include <stdio.h>
  2080  #include <stdlib.h>
  2081  #include <string.h>
  2082  #include <sys/stat.h>
  2083  #include <unistd.h>
  2084  
  2085  // Some items in linux/btf.h are relatively new, so we copy them here for
  2086  // backward compatibility.
  2087  #define BTF_MAGIC 0xeB9F
  2088  
  2089  struct btf_header {
  2090  	__u16 magic;
  2091  	__u8 version;
  2092  	__u8 flags;
  2093  	__u32 hdr_len;
  2094  	__u32 type_off;
  2095  	__u32 type_len;
  2096  	__u32 str_off;
  2097  	__u32 str_len;
  2098  };
  2099  
  2100  #define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
  2101  #define BTF_INFO_VLEN(info) ((info)&0xffff)
  2102  
  2103  #define BTF_KIND_INT 1
  2104  #define BTF_KIND_ARRAY 3
  2105  #define BTF_KIND_STRUCT 4
  2106  #define BTF_KIND_UNION 5
  2107  #define BTF_KIND_ENUM 6
  2108  #define BTF_KIND_FUNC_PROTO 13
  2109  #define BTF_KIND_VAR 14
  2110  #define BTF_KIND_DATASEC 15
  2111  
  2112  struct btf_type {
  2113  	__u32 name_off;
  2114  	__u32 info;
  2115  	union {
  2116  		__u32 size;
  2117  		__u32 type;
  2118  	};
  2119  };
  2120  
  2121  struct btf_enum {
  2122  	__u32 name_off;
  2123  	__s32 val;
  2124  };
  2125  
  2126  struct btf_array {
  2127  	__u32 type;
  2128  	__u32 index_type;
  2129  	__u32 nelems;
  2130  };
  2131  
  2132  struct btf_member {
  2133  	__u32 name_off;
  2134  	__u32 type;
  2135  	__u32 offset;
  2136  };
  2137  
  2138  struct btf_param {
  2139  	__u32 name_off;
  2140  	__u32 type;
  2141  };
  2142  
  2143  struct btf_var {
  2144  	__u32 linkage;
  2145  };
  2146  
  2147  struct btf_var_secinfo {
  2148  	__u32 type;
  2149  	__u32 offset;
  2150  	__u32 size;
  2151  };
  2152  
  2153  // Set the limit on the maximum size of btf/vmlinux to be 10 MiB.
  2154  #define VMLINUX_MAX_SUPPORT_SIZE (10 * 1024 * 1024)
  2155  
  2156  // Read out all the content of /sys/kernel/btf/vmlinux to the fixed address
  2157  // buffer and return it. Return NULL if failed.
  2158  static char* read_btf_vmlinux()
  2159  {
  2160  	static bool is_read = false;
  2161  	static char buf[VMLINUX_MAX_SUPPORT_SIZE];
  2162  
  2163  	// There could be a race condition here, but it should not be harmful.
  2164  	if (is_read)
  2165  		return buf;
  2166  
  2167  	int fd = open("/sys/kernel/btf/vmlinux", O_RDONLY);
  2168  	if (fd < 0)
  2169  		return NULL;
  2170  
  2171  	unsigned long bytes_read = 0;
  2172  	for (;;) {
  2173  		ssize_t ret = read(fd, buf + bytes_read,
  2174  				   VMLINUX_MAX_SUPPORT_SIZE - bytes_read);
  2175  
  2176  		if (ret < 0 || bytes_read + ret == VMLINUX_MAX_SUPPORT_SIZE)
  2177  			return NULL;
  2178  
  2179  		if (ret == 0)
  2180  			break;
  2181  
  2182  		bytes_read += ret;
  2183  	}
  2184  
  2185  	is_read = true;
  2186  	return buf;
  2187  }
  2188  
  2189  // Given a pointer to a C-string as the only argument a0, return the
  2190  // corresponding btf ID for this name. Return -1 if there is an error when
  2191  // opening the vmlinux file or the name is not found in vmlinux.
  2192  static long syz_btf_id_by_name(volatile long a0)
  2193  {
  2194  	// syzlang: syz_btf_id_by_name(name ptr[in, string]) btf_id
  2195  	// C:		syz_btf_id_by_name(char* name)
  2196  	char* target = (char*)a0;
  2197  
  2198  	char* vmlinux = read_btf_vmlinux();
  2199  	if (vmlinux == NULL)
  2200  		return -1;
  2201  
  2202  	struct btf_header* btf_header = (struct btf_header*)vmlinux;
  2203  	if (btf_header->magic != BTF_MAGIC)
  2204  		return -1;
  2205  	// These offsets are bytes relative to the end of the header.
  2206  	char* btf_type_sec = vmlinux + btf_header->hdr_len + btf_header->type_off;
  2207  	char* btf_str_sec = vmlinux + btf_header->hdr_len + btf_header->str_off;
  2208  	// Scan through the btf type section, and find a type description that
  2209  	// matches the provided name.
  2210  	unsigned int bytes_parsed = 0;
  2211  	// BTF index starts at 1.
  2212  	long idx = 1;
  2213  	while (bytes_parsed < btf_header->type_len) {
  2214  		struct btf_type* btf_type = (struct btf_type*)(btf_type_sec + bytes_parsed);
  2215  		uint32 kind = BTF_INFO_KIND(btf_type->info);
  2216  		uint32 vlen = BTF_INFO_VLEN(btf_type->info);
  2217  		char* name = btf_str_sec + btf_type->name_off;
  2218  
  2219  		if (strcmp(name, target) == 0)
  2220  			return idx;
  2221  
  2222  		// From /include/uapi/linux/btf.h, some kinds of types are
  2223  		// followed by extra data.
  2224  		size_t skip;
  2225  		switch (kind) {
  2226  		case BTF_KIND_INT:
  2227  			skip = sizeof(uint32);
  2228  			break;
  2229  		case BTF_KIND_ENUM:
  2230  			skip = sizeof(struct btf_enum) * vlen;
  2231  			break;
  2232  		case BTF_KIND_ARRAY:
  2233  			skip = sizeof(struct btf_array);
  2234  			break;
  2235  		case BTF_KIND_STRUCT:
  2236  		case BTF_KIND_UNION:
  2237  			skip = sizeof(struct btf_member) * vlen;
  2238  			break;
  2239  		case BTF_KIND_FUNC_PROTO:
  2240  			skip = sizeof(struct btf_param) * vlen;
  2241  			break;
  2242  		case BTF_KIND_VAR:
  2243  			skip = sizeof(struct btf_var);
  2244  			break;
  2245  		case BTF_KIND_DATASEC:
  2246  			skip = sizeof(struct btf_var_secinfo) * vlen;
  2247  			break;
  2248  		default:
  2249  			skip = 0;
  2250  		}
  2251  
  2252  		bytes_parsed += sizeof(struct btf_type) + skip;
  2253  		idx++;
  2254  	}
  2255  
  2256  	return -1;
  2257  }
  2258  
  2259  #endif // SYZ_EXECUTOR || __NR_syz_btf_id_by_name
  2260  
  2261  // Same as memcpy except that it accepts offset to dest and src.
  2262  #if SYZ_EXECUTOR || __NR_syz_memcpy_off
  2263  static long syz_memcpy_off(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4)
  2264  {
  2265  	// C:       syz_memcpy_off(void* dest, uint32 dest_off, void* src, uint32 src_off, size_t n)
  2266  
  2267  	// Cast to original
  2268  	char* dest = (char*)a0;
  2269  	uint32 dest_off = (uint32)a1;
  2270  	char* src = (char*)a2;
  2271  	uint32 src_off = (uint32)a3;
  2272  	size_t n = (size_t)a4;
  2273  
  2274  	return (long)memcpy(dest + dest_off, src + src_off, n);
  2275  }
  2276  #endif
  2277  
  2278  #if (SYZ_EXECUTOR || SYZ_REPEAT && SYZ_NET_INJECTION) && SYZ_EXECUTOR_USES_FORK_SERVER
  2279  static void flush_tun()
  2280  {
  2281  #if SYZ_EXECUTOR
  2282  	if (!flag_net_injection)
  2283  		return;
  2284  #endif
  2285  	char data[1000];
  2286  	while (read_tun(&data[0], sizeof(data)) != -1) {
  2287  	}
  2288  }
  2289  #endif
  2290  
  2291  #if SYZ_EXECUTOR || __NR_syz_extract_tcp_res && SYZ_NET_INJECTION
  2292  #ifndef __ANDROID__
  2293  // Can't include <linux/ipv6.h>, since it causes
  2294  // conflicts due to some structs redefinition.
  2295  struct ipv6hdr {
  2296  	__u8 priority : 4,
  2297  	    version : 4;
  2298  	__u8 flow_lbl[3];
  2299  
  2300  	__be16 payload_len;
  2301  	__u8 nexthdr;
  2302  	__u8 hop_limit;
  2303  
  2304  	struct in6_addr saddr;
  2305  	struct in6_addr daddr;
  2306  };
  2307  #endif
  2308  
  2309  struct tcp_resources {
  2310  	uint32 seq;
  2311  	uint32 ack;
  2312  };
  2313  
  2314  static long syz_extract_tcp_res(volatile long a0, volatile long a1, volatile long a2)
  2315  {
  2316  	// syz_extract_tcp_res(res ptr[out, tcp_resources], seq_inc int32, ack_inc int32)
  2317  
  2318  	if (tunfd < 0)
  2319  		return (uintptr_t)-1;
  2320  
  2321  	// We just need this to be large enough to hold headers that we parse (ethernet/ip/tcp).
  2322  	// Rest of the packet (if any) will be silently truncated which is fine.
  2323  	char data[1000];
  2324  	int rv = read_tun(&data[0], sizeof(data));
  2325  	if (rv == -1)
  2326  		return (uintptr_t)-1;
  2327  	size_t length = rv;
  2328  	debug_dump_data(data, length);
  2329  
  2330  	if (length < sizeof(struct ethhdr))
  2331  		return (uintptr_t)-1;
  2332  	struct ethhdr* ethhdr = (struct ethhdr*)&data[0];
  2333  
  2334  	struct tcphdr* tcphdr = 0;
  2335  	if (ethhdr->h_proto == htons(ETH_P_IP)) {
  2336  		if (length < sizeof(struct ethhdr) + sizeof(struct iphdr))
  2337  			return (uintptr_t)-1;
  2338  		struct iphdr* iphdr = (struct iphdr*)&data[sizeof(struct ethhdr)];
  2339  		if (iphdr->protocol != IPPROTO_TCP)
  2340  			return (uintptr_t)-1;
  2341  		if (length < sizeof(struct ethhdr) + iphdr->ihl * 4 + sizeof(struct tcphdr))
  2342  			return (uintptr_t)-1;
  2343  		tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + iphdr->ihl * 4];
  2344  	} else {
  2345  		if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr))
  2346  			return (uintptr_t)-1;
  2347  		struct ipv6hdr* ipv6hdr = (struct ipv6hdr*)&data[sizeof(struct ethhdr)];
  2348  		// TODO: parse and skip extension headers.
  2349  		if (ipv6hdr->nexthdr != IPPROTO_TCP)
  2350  			return (uintptr_t)-1;
  2351  		if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
  2352  			return (uintptr_t)-1;
  2353  		tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr)];
  2354  	}
  2355  
  2356  	struct tcp_resources* res = (struct tcp_resources*)a0;
  2357  	res->seq = htonl((ntohl(tcphdr->seq) + (uint32)a1));
  2358  	res->ack = htonl((ntohl(tcphdr->ack_seq) + (uint32)a2));
  2359  
  2360  	debug("extracted seq: %08x\n", res->seq);
  2361  	debug("extracted ack: %08x\n", res->ack);
  2362  
  2363  	return 0;
  2364  }
  2365  #endif
  2366  
  2367  #if SYZ_EXECUTOR || SYZ_CLOSE_FDS || __NR_syz_usb_connect || __NR_syz_usb_connect_ath9k
  2368  #define MAX_FDS 30
  2369  #endif
  2370  
  2371  #if SYZ_EXECUTOR || __NR_syz_usb_connect || __NR_syz_usb_connect_ath9k ||       \
  2372      __NR_syz_usb_ep_write || __NR_syz_usb_ep_read || __NR_syz_usb_control_io || \
  2373      __NR_syz_usb_disconnect
  2374  #include <errno.h>
  2375  #include <fcntl.h>
  2376  #include <linux/usb/ch9.h>
  2377  #include <stdarg.h>
  2378  #include <stdbool.h>
  2379  #include <stddef.h>
  2380  #include <stdio.h>
  2381  #include <sys/mount.h>
  2382  #include <sys/stat.h>
  2383  #include <sys/types.h>
  2384  
  2385  #include "common_usb_linux.h"
  2386  #endif
  2387  
  2388  #if SYZ_EXECUTOR || __NR_syz_open_dev
  2389  #include <fcntl.h>
  2390  #include <string.h>
  2391  #include <sys/stat.h>
  2392  #include <sys/types.h>
  2393  
  2394  static long syz_open_dev(volatile long a0, volatile long a1, volatile long a2)
  2395  {
  2396  	if (a0 == 0xc || a0 == 0xb) {
  2397  		// syz_open_dev$char(dev const[0xc], major intptr, minor intptr) fd
  2398  		// syz_open_dev$block(dev const[0xb], major intptr, minor intptr) fd
  2399  		char buf[128];
  2400  		sprintf(buf, "/dev/%s/%d:%d", a0 == 0xc ? "char" : "block", (uint8)a1, (uint8)a2);
  2401  		return open(buf, O_RDWR, 0);
  2402  	} else {
  2403  		// syz_open_dev(dev strconst, id intptr, flags flags[open_flags]) fd
  2404  		char buf[1024];
  2405  		char* hash;
  2406  		strncpy(buf, (char*)a0, sizeof(buf) - 1);
  2407  		buf[sizeof(buf) - 1] = 0;
  2408  		while ((hash = strchr(buf, '#'))) {
  2409  			*hash = '0' + (char)(a1 % 10); // 10 devices should be enough for everyone.
  2410  			a1 /= 10;
  2411  		}
  2412  		return open(buf, a2, 0);
  2413  	}
  2414  }
  2415  #endif
  2416  
  2417  #if SYZ_EXECUTOR || __NR_syz_open_procfs
  2418  #include <fcntl.h>
  2419  #include <string.h>
  2420  #include <sys/stat.h>
  2421  #include <sys/types.h>
  2422  
  2423  static long syz_open_procfs(volatile long a0, volatile long a1)
  2424  {
  2425  	// syz_open_procfs(pid pid, file ptr[in, string[procfs_file]]) fd
  2426  
  2427  	char buf[128];
  2428  	memset(buf, 0, sizeof(buf));
  2429  	if (a0 == 0) {
  2430  		snprintf(buf, sizeof(buf), "/proc/self/%s", (char*)a1);
  2431  	} else if (a0 == -1) {
  2432  		snprintf(buf, sizeof(buf), "/proc/thread-self/%s", (char*)a1);
  2433  	} else {
  2434  		snprintf(buf, sizeof(buf), "/proc/self/task/%d/%s", (int)a0, (char*)a1);
  2435  	}
  2436  	int fd = open(buf, O_RDWR);
  2437  	if (fd == -1)
  2438  		fd = open(buf, O_RDONLY);
  2439  	return fd;
  2440  }
  2441  #endif
  2442  
  2443  #if SYZ_EXECUTOR || __NR_syz_open_pts
  2444  #include <fcntl.h>
  2445  #include <sys/ioctl.h>
  2446  #include <sys/stat.h>
  2447  #include <sys/types.h>
  2448  
  2449  static long syz_open_pts(volatile long a0, volatile long a1)
  2450  {
  2451  	// syz_openpts(fd fd[tty], flags flags[open_flags]) fd[tty]
  2452  	int ptyno = 0;
  2453  	if (ioctl(a0, TIOCGPTN, &ptyno))
  2454  		return -1;
  2455  	char buf[128];
  2456  	sprintf(buf, "/dev/pts/%d", ptyno);
  2457  	return open(buf, a1, 0);
  2458  }
  2459  #endif
  2460  
  2461  #if SYZ_EXECUTOR || __NR_syz_init_net_socket
  2462  #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID
  2463  #include <fcntl.h>
  2464  #include <sched.h>
  2465  #include <sys/stat.h>
  2466  #include <sys/types.h>
  2467  #include <unistd.h>
  2468  
  2469  // syz_init_net_socket opens a socket in init net namespace.
  2470  // Used for families that can only be created in init net namespace.
  2471  static long syz_init_net_socket(volatile long domain, volatile long type, volatile long proto)
  2472  {
  2473  	int netns = open("/proc/self/ns/net", O_RDONLY);
  2474  	if (netns == -1)
  2475  		return netns;
  2476  	if (setns(kInitNetNsFd, 0))
  2477  		return -1;
  2478  	int sock = syscall(__NR_socket, domain, type, proto);
  2479  	int err = errno;
  2480  	if (setns(netns, 0)) {
  2481  		// The operation may fail if the fd is closed by
  2482  		// a syscall from another thread.
  2483  		exitf("setns(netns) failed");
  2484  	}
  2485  	close(netns);
  2486  	errno = err;
  2487  	return sock;
  2488  }
  2489  #else
  2490  static long syz_init_net_socket(volatile long domain, volatile long type, volatile long proto)
  2491  {
  2492  	return syscall(__NR_socket, domain, type, proto);
  2493  }
  2494  #endif
  2495  #endif
  2496  
  2497  #if SYZ_EXECUTOR || __NR_syz_socket_connect_nvme_tcp
  2498  #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE
  2499  #include <arpa/inet.h>
  2500  #include <fcntl.h>
  2501  #include <netinet/in.h>
  2502  #include <sched.h>
  2503  #include <sys/socket.h>
  2504  #include <sys/stat.h>
  2505  #include <sys/types.h>
  2506  #include <unistd.h>
  2507  
  2508  static long syz_socket_connect_nvme_tcp()
  2509  {
  2510  	struct sockaddr_in nvme_local_address;
  2511  	int netns = open("/proc/self/ns/net", O_RDONLY);
  2512  	if (netns == -1)
  2513  		return netns;
  2514  	if (setns(kInitNetNsFd, 0))
  2515  		return -1;
  2516  	int sock = syscall(__NR_socket, AF_INET, SOCK_STREAM, 0x0);
  2517  	int err = errno;
  2518  	if (setns(netns, 0)) {
  2519  		// The operation may fail if the fd is closed by
  2520  		// a syscall from another thread.
  2521  		exitf("setns(netns) failed");
  2522  	}
  2523  	close(netns);
  2524  	errno = err;
  2525  	// We only connect to an NVMe-oF/TCP server on 127.0.0.1:4420
  2526  	nvme_local_address.sin_family = AF_INET;
  2527  	nvme_local_address.sin_port = htobe16(4420);
  2528  	nvme_local_address.sin_addr.s_addr = htobe32(0x7f000001);
  2529  	err = syscall(__NR_connect, sock, &nvme_local_address, sizeof(nvme_local_address));
  2530  	if (err != 0) {
  2531  		close(sock);
  2532  		return -1;
  2533  	}
  2534  	return sock;
  2535  }
  2536  #else
  2537  static long syz_socket_connect_nvme_tcp()
  2538  {
  2539  	return syscall(__NR_socket, -1, 0, 0);
  2540  }
  2541  #endif
  2542  #endif
  2543  
  2544  #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION
  2545  #include <errno.h>
  2546  #include <fcntl.h>
  2547  #include <linux/rfkill.h>
  2548  #include <pthread.h>
  2549  #include <sys/epoll.h>
  2550  #include <sys/ioctl.h>
  2551  #include <sys/socket.h>
  2552  #include <sys/uio.h>
  2553  
  2554  #define BTPROTO_HCI 1
  2555  #define ACL_LINK 1
  2556  #define SCAN_PAGE 2
  2557  
  2558  typedef struct {
  2559  	uint8 b[6];
  2560  } __attribute__((packed)) bdaddr_t;
  2561  
  2562  #define HCI_COMMAND_PKT 1
  2563  #define HCI_EVENT_PKT 4
  2564  #define HCI_VENDOR_PKT 0xff
  2565  
  2566  struct hci_command_hdr {
  2567  	uint16 opcode;
  2568  	uint8 plen;
  2569  } __attribute__((packed));
  2570  
  2571  struct hci_event_hdr {
  2572  	uint8 evt;
  2573  	uint8 plen;
  2574  } __attribute__((packed));
  2575  
  2576  #define HCI_EV_CONN_COMPLETE 0x03
  2577  struct hci_ev_conn_complete {
  2578  	uint8 status;
  2579  	uint16 handle;
  2580  	bdaddr_t bdaddr;
  2581  	uint8 link_type;
  2582  	uint8 encr_mode;
  2583  } __attribute__((packed));
  2584  
  2585  #define HCI_EV_CONN_REQUEST 0x04
  2586  struct hci_ev_conn_request {
  2587  	bdaddr_t bdaddr;
  2588  	uint8 dev_class[3];
  2589  	uint8 link_type;
  2590  } __attribute__((packed));
  2591  
  2592  #define HCI_EV_REMOTE_FEATURES 0x0b
  2593  struct hci_ev_remote_features {
  2594  	uint8 status;
  2595  	uint16 handle;
  2596  	uint8 features[8];
  2597  } __attribute__((packed));
  2598  
  2599  #define HCI_EV_CMD_COMPLETE 0x0e
  2600  struct hci_ev_cmd_complete {
  2601  	uint8 ncmd;
  2602  	uint16 opcode;
  2603  } __attribute__((packed));
  2604  
  2605  #define HCI_OP_WRITE_SCAN_ENABLE 0x0c1a
  2606  
  2607  #define HCI_OP_READ_BUFFER_SIZE 0x1005
  2608  struct hci_rp_read_buffer_size {
  2609  	uint8 status;
  2610  	uint16 acl_mtu;
  2611  	uint8 sco_mtu;
  2612  	uint16 acl_max_pkt;
  2613  	uint16 sco_max_pkt;
  2614  } __attribute__((packed));
  2615  
  2616  #define HCI_OP_READ_BD_ADDR 0x1009
  2617  struct hci_rp_read_bd_addr {
  2618  	uint8 status;
  2619  	bdaddr_t bdaddr;
  2620  } __attribute__((packed));
  2621  
  2622  #define HCI_EV_LE_META 0x3e
  2623  struct hci_ev_le_meta {
  2624  	uint8 subevent;
  2625  } __attribute__((packed));
  2626  
  2627  #define HCI_EV_LE_CONN_COMPLETE 0x01
  2628  struct hci_ev_le_conn_complete {
  2629  	uint8 status;
  2630  	uint16 handle;
  2631  	uint8 role;
  2632  	uint8 bdaddr_type;
  2633  	bdaddr_t bdaddr;
  2634  	uint16 interval;
  2635  	uint16 latency;
  2636  	uint16 supervision_timeout;
  2637  	uint8 clk_accurancy;
  2638  } __attribute__((packed));
  2639  
  2640  struct hci_dev_req {
  2641  	uint16 dev_id;
  2642  	uint32 dev_opt;
  2643  };
  2644  
  2645  struct vhci_vendor_pkt_request {
  2646  	uint8 type;
  2647  	uint8 opcode;
  2648  } __attribute__((packed));
  2649  
  2650  struct vhci_pkt {
  2651  	uint8 type;
  2652  	union {
  2653  		struct {
  2654  			uint8 opcode;
  2655  			uint16 id;
  2656  		} __attribute__((packed)) vendor_pkt;
  2657  		struct hci_command_hdr command_hdr;
  2658  	};
  2659  } __attribute__((packed));
  2660  
  2661  #define HCIDEVUP _IOW('H', 201, int)
  2662  #define HCISETSCAN _IOW('H', 221, int)
  2663  
  2664  static int vhci_fd = -1;
  2665  
  2666  static void rfkill_unblock_all()
  2667  {
  2668  	int fd = open("/dev/rfkill", O_WRONLY);
  2669  	if (fd < 0)
  2670  		fail("open /dev/rfkill failed");
  2671  	struct rfkill_event event = {0};
  2672  	event.idx = 0;
  2673  	event.type = RFKILL_TYPE_ALL;
  2674  	event.op = RFKILL_OP_CHANGE_ALL;
  2675  	event.soft = 0;
  2676  	event.hard = 0;
  2677  	if (write(fd, &event, sizeof(event)) < 0)
  2678  		fail("write rfkill event failed");
  2679  	close(fd);
  2680  }
  2681  
  2682  static void hci_send_event_packet(int fd, uint8 evt, void* data, size_t data_len)
  2683  {
  2684  	struct iovec iv[3];
  2685  
  2686  	struct hci_event_hdr hdr;
  2687  	hdr.evt = evt;
  2688  	hdr.plen = data_len;
  2689  
  2690  	uint8 type = HCI_EVENT_PKT;
  2691  
  2692  	iv[0].iov_base = &type;
  2693  	iv[0].iov_len = sizeof(type);
  2694  	iv[1].iov_base = &hdr;
  2695  	iv[1].iov_len = sizeof(hdr);
  2696  	iv[2].iov_base = data;
  2697  	iv[2].iov_len = data_len;
  2698  
  2699  	if (writev(fd, iv, sizeof(iv) / sizeof(struct iovec)) < 0)
  2700  		fail("writev failed");
  2701  }
  2702  
  2703  static void hci_send_event_cmd_complete(int fd, uint16 opcode, void* data, size_t data_len)
  2704  {
  2705  	struct iovec iv[4];
  2706  
  2707  	struct hci_event_hdr hdr;
  2708  	hdr.evt = HCI_EV_CMD_COMPLETE;
  2709  	hdr.plen = sizeof(struct hci_ev_cmd_complete) + data_len;
  2710  
  2711  	struct hci_ev_cmd_complete evt_hdr;
  2712  	evt_hdr.ncmd = 1;
  2713  	evt_hdr.opcode = opcode;
  2714  
  2715  	uint8 type = HCI_EVENT_PKT;
  2716  
  2717  	iv[0].iov_base = &type;
  2718  	iv[0].iov_len = sizeof(type);
  2719  	iv[1].iov_base = &hdr;
  2720  	iv[1].iov_len = sizeof(hdr);
  2721  	iv[2].iov_base = &evt_hdr;
  2722  	iv[2].iov_len = sizeof(evt_hdr);
  2723  	iv[3].iov_base = data;
  2724  	iv[3].iov_len = data_len;
  2725  
  2726  	if (writev(fd, iv, sizeof(iv) / sizeof(struct iovec)) < 0)
  2727  		fail("writev failed");
  2728  }
  2729  
  2730  static bool process_command_pkt(int fd, char* buf, ssize_t buf_size)
  2731  {
  2732  	struct hci_command_hdr* hdr = (struct hci_command_hdr*)buf;
  2733  	if (buf_size < (ssize_t)sizeof(struct hci_command_hdr) ||
  2734  	    hdr->plen != buf_size - sizeof(struct hci_command_hdr))
  2735  		failmsg("process_command_pkt: invalid size", "suze=%zx", buf_size);
  2736  
  2737  	switch (hdr->opcode) {
  2738  	case HCI_OP_WRITE_SCAN_ENABLE: {
  2739  		uint8 status = 0;
  2740  		hci_send_event_cmd_complete(fd, hdr->opcode, &status, sizeof(status));
  2741  		return true;
  2742  	}
  2743  	case HCI_OP_READ_BD_ADDR: {
  2744  		struct hci_rp_read_bd_addr rp = {0};
  2745  		rp.status = 0;
  2746  		memset(&rp.bdaddr, 0xaa, 6);
  2747  		hci_send_event_cmd_complete(fd, hdr->opcode, &rp, sizeof(rp));
  2748  		return false;
  2749  	}
  2750  	case HCI_OP_READ_BUFFER_SIZE: {
  2751  		struct hci_rp_read_buffer_size rp = {0};
  2752  		rp.status = 0;
  2753  		rp.acl_mtu = 1021;
  2754  		rp.sco_mtu = 96;
  2755  		rp.acl_max_pkt = 4;
  2756  		rp.sco_max_pkt = 6;
  2757  		hci_send_event_cmd_complete(fd, hdr->opcode, &rp, sizeof(rp));
  2758  		return false;
  2759  	}
  2760  	}
  2761  
  2762  	char dummy[0xf9] = {0};
  2763  	hci_send_event_cmd_complete(fd, hdr->opcode, dummy, sizeof(dummy));
  2764  	return false;
  2765  }
  2766  
  2767  static void* event_thread(void* arg)
  2768  {
  2769  	while (1) {
  2770  		char buf[1024] = {0};
  2771  		ssize_t buf_size = read(vhci_fd, buf, sizeof(buf));
  2772  		if (buf_size < 0)
  2773  			fail("read failed");
  2774  		debug_dump_data(buf, buf_size);
  2775  		if (buf_size > 0 && buf[0] == HCI_COMMAND_PKT) {
  2776  			if (process_command_pkt(vhci_fd, buf + 1, buf_size - 1))
  2777  				break;
  2778  		}
  2779  	}
  2780  	return NULL;
  2781  }
  2782  
  2783  // Matches hci_handles in sys/linux/dev_vhci.txt.
  2784  #define HCI_HANDLE_1 200
  2785  #define HCI_HANDLE_2 201
  2786  
  2787  #define HCI_PRIMARY 0
  2788  #define HCI_OP_RESET 0x0c03
  2789  
  2790  static void initialize_vhci()
  2791  {
  2792  #if SYZ_EXECUTOR
  2793  	if (!flag_vhci_injection)
  2794  		return;
  2795  #endif
  2796  
  2797  	int hci_sock = socket(AF_BLUETOOTH, SOCK_RAW, BTPROTO_HCI);
  2798  	if (hci_sock < 0)
  2799  		fail("socket(AF_BLUETOOTH, SOCK_RAW, BTPROTO_HCI) failed");
  2800  
  2801  	vhci_fd = open("/dev/vhci", O_RDWR);
  2802  	if (vhci_fd == -1)
  2803  		fail("open /dev/vhci failed");
  2804  
  2805  	// Remap vhci onto higher fd number to hide it from fuzzer and to keep
  2806  	// fd numbers stable regardless of whether vhci is opened or not (also see kMaxFd).
  2807  	const int kVhciFd = 202;
  2808  	if (dup2(vhci_fd, kVhciFd) < 0)
  2809  		fail("dup2(vhci_fd, kVhciFd) failed");
  2810  	close(vhci_fd);
  2811  	vhci_fd = kVhciFd;
  2812  
  2813  	struct vhci_vendor_pkt_request vendor_pkt_req = {HCI_VENDOR_PKT, HCI_PRIMARY};
  2814  	if (write(vhci_fd, &vendor_pkt_req, sizeof(vendor_pkt_req)) != sizeof(vendor_pkt_req))
  2815  		fail("vendor_pkt_req write failed");
  2816  
  2817  	struct vhci_pkt vhci_pkt;
  2818  	if (read(vhci_fd, &vhci_pkt, sizeof(vhci_pkt)) != sizeof(vhci_pkt))
  2819  		fail("vhci_pkt read failed");
  2820  
  2821  	if (vhci_pkt.type == HCI_COMMAND_PKT && vhci_pkt.command_hdr.opcode == HCI_OP_RESET) {
  2822  		char response[1] = {0};
  2823  		hci_send_event_cmd_complete(vhci_fd, HCI_OP_RESET, response, sizeof(response));
  2824  
  2825  		if (read(vhci_fd, &vhci_pkt, sizeof(vhci_pkt)) != sizeof(vhci_pkt))
  2826  			fail("vhci_pkt read failed");
  2827  	}
  2828  
  2829  	if (vhci_pkt.type != HCI_VENDOR_PKT)
  2830  		fail("wrong response packet");
  2831  
  2832  	int dev_id = vhci_pkt.vendor_pkt.id;
  2833  	debug("hci dev id: %x\n", dev_id);
  2834  
  2835  	pthread_t th;
  2836  	if (pthread_create(&th, NULL, event_thread, NULL))
  2837  		fail("pthread_create failed");
  2838  
  2839  	// Bring hci device up
  2840  	int ret = ioctl(hci_sock, HCIDEVUP, dev_id);
  2841  	if (ret) {
  2842  		if (errno == ERFKILL) {
  2843  			rfkill_unblock_all();
  2844  			ret = ioctl(hci_sock, HCIDEVUP, dev_id);
  2845  		}
  2846  
  2847  		if (ret && errno != EALREADY)
  2848  			fail("ioctl(HCIDEVUP) failed");
  2849  	}
  2850  
  2851  	// Activate page scanning mode which is required to fake a connection.
  2852  	struct hci_dev_req dr = {0};
  2853  	dr.dev_id = dev_id;
  2854  	dr.dev_opt = SCAN_PAGE;
  2855  	if (ioctl(hci_sock, HCISETSCAN, &dr))
  2856  		fail("ioctl(HCISETSCAN) failed");
  2857  
  2858  	// Fake a connection with bd address 10:aa:aa:aa:aa:aa.
  2859  	// This is a fixed address used in sys/linux/socket_bluetooth.txt.
  2860  	struct hci_ev_conn_request request;
  2861  	memset(&request, 0, sizeof(request));
  2862  	memset(&request.bdaddr, 0xaa, 6);
  2863  	*(uint8*)&request.bdaddr.b[5] = 0x10;
  2864  	request.link_type = ACL_LINK;
  2865  	hci_send_event_packet(vhci_fd, HCI_EV_CONN_REQUEST, &request, sizeof(request));
  2866  
  2867  	struct hci_ev_conn_complete complete;
  2868  	memset(&complete, 0, sizeof(complete));
  2869  	complete.status = 0;
  2870  	complete.handle = HCI_HANDLE_1;
  2871  	memset(&complete.bdaddr, 0xaa, 6);
  2872  	*(uint8*)&complete.bdaddr.b[5] = 0x10;
  2873  	complete.link_type = ACL_LINK;
  2874  	complete.encr_mode = 0;
  2875  	hci_send_event_packet(vhci_fd, HCI_EV_CONN_COMPLETE, &complete, sizeof(complete));
  2876  
  2877  	struct hci_ev_remote_features features;
  2878  	memset(&features, 0, sizeof(features));
  2879  	features.status = 0;
  2880  	features.handle = HCI_HANDLE_1;
  2881  	hci_send_event_packet(vhci_fd, HCI_EV_REMOTE_FEATURES, &features, sizeof(features));
  2882  
  2883  	// Fake a low-energy connection with bd address 11:aa:aa:aa:aa:aa.
  2884  	// This is a fixed address used in sys/linux/socket_bluetooth.txt.
  2885  	struct {
  2886  		struct hci_ev_le_meta le_meta;
  2887  		struct hci_ev_le_conn_complete le_conn;
  2888  	} le_conn;
  2889  	memset(&le_conn, 0, sizeof(le_conn));
  2890  	le_conn.le_meta.subevent = HCI_EV_LE_CONN_COMPLETE;
  2891  	memset(&le_conn.le_conn.bdaddr, 0xaa, 6);
  2892  	*(uint8*)&le_conn.le_conn.bdaddr.b[5] = 0x11;
  2893  	le_conn.le_conn.role = 1;
  2894  	le_conn.le_conn.handle = HCI_HANDLE_2;
  2895  	hci_send_event_packet(vhci_fd, HCI_EV_LE_META, &le_conn, sizeof(le_conn));
  2896  
  2897  	pthread_join(th, NULL);
  2898  	close(hci_sock);
  2899  }
  2900  #endif
  2901  
  2902  #if SYZ_EXECUTOR || __NR_syz_emit_vhci && SYZ_VHCI_INJECTION
  2903  static long syz_emit_vhci(volatile long a0, volatile long a1)
  2904  {
  2905  	if (vhci_fd < 0)
  2906  		return (uintptr_t)-1;
  2907  
  2908  	char* data = (char*)a0;
  2909  	uint32 length = a1;
  2910  
  2911  	return write(vhci_fd, data, length);
  2912  }
  2913  #endif
  2914  
  2915  #if SYZ_EXECUTOR || __NR_syz_genetlink_get_family_id
  2916  #include <errno.h>
  2917  #include <sys/socket.h>
  2918  
  2919  static long syz_genetlink_get_family_id(volatile long name, volatile long sock_arg)
  2920  {
  2921  	debug("syz_genetlink_get_family_id(%s, %d)\n", (char*)name, (int)sock_arg);
  2922  	int fd = sock_arg;
  2923  	if (fd < 0) {
  2924  		fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
  2925  		if (fd == -1) {
  2926  			debug("syz_genetlink_get_family_id: socket failed: %d\n", errno);
  2927  			return -1;
  2928  		}
  2929  	}
  2930  	struct nlmsg nlmsg_tmp;
  2931  	int ret = netlink_query_family_id(&nlmsg_tmp, fd, (char*)name, false);
  2932  	if ((int)sock_arg < 0)
  2933  		close(fd);
  2934  	if (ret < 0) {
  2935  		debug("syz_genetlink_get_family_id: netlink_query_family_id failed: %d\n", ret);
  2936  		return -1;
  2937  	}
  2938  
  2939  	return ret;
  2940  }
  2941  #endif
  2942  
  2943  #if SYZ_EXECUTOR || __NR_syz_mount_image || __NR_syz_read_part_table
  2944  #include "common_zlib.h"
  2945  #include <errno.h>
  2946  #include <fcntl.h>
  2947  #include <linux/loop.h>
  2948  #include <stdbool.h>
  2949  #include <sys/ioctl.h>
  2950  #include <sys/stat.h>
  2951  #include <sys/types.h>
  2952  
  2953  // Setup the loop device needed for mounting a filesystem image. Takes care of
  2954  // creating and initializing the underlying file backing the loop device and
  2955  // returns the fds to the file and device.
  2956  // Returns 0 on success, -1 otherwise.
  2957  static int setup_loop_device(unsigned char* data, unsigned long size, const char* loopname, int* loopfd_p)
  2958  {
  2959  	int err = 0, loopfd = -1;
  2960  	int memfd = syscall(__NR_memfd_create, "syzkaller", 0);
  2961  	if (memfd == -1) {
  2962  		err = errno;
  2963  		goto error;
  2964  	}
  2965  	if (puff_zlib_to_file(data, size, memfd)) {
  2966  		err = errno;
  2967  		debug("setup_loop_device: could not decompress data: %d\n", errno);
  2968  		goto error_close_memfd;
  2969  	}
  2970  
  2971  	loopfd = open(loopname, O_RDWR);
  2972  	if (loopfd == -1) {
  2973  		err = errno;
  2974  		debug("setup_loop_device: open failed: %d\n", errno);
  2975  		goto error_close_memfd;
  2976  	}
  2977  	if (ioctl(loopfd, LOOP_SET_FD, memfd)) {
  2978  		if (errno != EBUSY) {
  2979  			err = errno;
  2980  			goto error_close_loop;
  2981  		}
  2982  		ioctl(loopfd, LOOP_CLR_FD, 0);
  2983  		usleep(1000);
  2984  		if (ioctl(loopfd, LOOP_SET_FD, memfd)) {
  2985  			err = errno;
  2986  			goto error_close_loop;
  2987  		}
  2988  	}
  2989  
  2990  	close(memfd);
  2991  	*loopfd_p = loopfd;
  2992  	return 0;
  2993  
  2994  error_close_loop:
  2995  	close(loopfd);
  2996  error_close_memfd:
  2997  	close(memfd);
  2998  error:
  2999  	errno = err;
  3000  	return -1;
  3001  }
  3002  
  3003  #if SYZ_EXECUTOR || __NR_syz_mount_image
  3004  
  3005  static void reset_loop_device(const char* loopname)
  3006  {
  3007  	int loopfd = open(loopname, O_RDWR);
  3008  	if (loopfd == -1) {
  3009  		debug("reset_loop_device: open failed: %d\n", errno);
  3010  		return;
  3011  	}
  3012  	if (ioctl(loopfd, LOOP_CLR_FD, 0)) {
  3013  		debug("reset_loop_device: LOOP_CLR_FD failed: %d\n", errno);
  3014  	}
  3015  	close(loopfd);
  3016  }
  3017  
  3018  #endif
  3019  
  3020  #endif
  3021  
  3022  #if SYZ_EXECUTOR || __NR_syz_read_part_table
  3023  // syz_read_part_table(size len[img], img ptr[in, compressed_image])
  3024  static long syz_read_part_table(volatile unsigned long size, volatile long image)
  3025  {
  3026  	unsigned char* data = (unsigned char*)image;
  3027  	int err = 0, res = -1, loopfd = -1;
  3028  	char loopname[64];
  3029  
  3030  	snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid);
  3031  	if (setup_loop_device(data, size, loopname, &loopfd) == -1)
  3032  		return -1;
  3033  
  3034  	struct loop_info64 info;
  3035  	if (ioctl(loopfd, LOOP_GET_STATUS64, &info)) {
  3036  		err = errno;
  3037  		goto error_clear_loop;
  3038  	}
  3039  #if SYZ_EXECUTOR
  3040  	cover_reset(0);
  3041  #endif
  3042  	info.lo_flags |= LO_FLAGS_PARTSCAN;
  3043  	if (ioctl(loopfd, LOOP_SET_STATUS64, &info)) {
  3044  		err = errno;
  3045  		goto error_clear_loop;
  3046  	}
  3047  	res = 0;
  3048  	// If we managed to parse some partitions, symlink them into our work dir.
  3049  	for (unsigned long i = 1, j = 0; i < 8; i++) {
  3050  		snprintf(loopname, sizeof(loopname), "/dev/loop%llup%d", procid, (int)i);
  3051  		struct stat statbuf;
  3052  		if (stat(loopname, &statbuf) == 0) {
  3053  			char linkname[64];
  3054  			snprintf(linkname, sizeof(linkname), "./file%d", (int)j++);
  3055  			if (symlink(loopname, linkname)) {
  3056  				debug("syz_read_part_table: symlink(%s, %s) failed: %d\n", loopname, linkname, errno);
  3057  			}
  3058  		}
  3059  	}
  3060  error_clear_loop:
  3061  	if (res)
  3062  		ioctl(loopfd, LOOP_CLR_FD, 0);
  3063  	close(loopfd);
  3064  	errno = err;
  3065  	return res;
  3066  }
  3067  #endif
  3068  
  3069  #if SYZ_EXECUTOR || __NR_syz_mount_image
  3070  #include <stddef.h>
  3071  #include <string.h>
  3072  #include <sys/mount.h>
  3073  
  3074  // syz_mount_image(
  3075  // 	fs ptr[in, string[fs]],
  3076  // 	dir ptr[in, filename],
  3077  // 	flags flags[mount_flags],
  3078  // 	opts ptr[in, fs_options],
  3079  // 	chdir bool8,
  3080  // 	size len[img],
  3081  // 	img ptr[in, compressed_image]
  3082  // ) fd_dir
  3083  static long syz_mount_image(
  3084      volatile long fsarg,
  3085      volatile long dir,
  3086      volatile long flags,
  3087      volatile long optsarg,
  3088      volatile long change_dir,
  3089      volatile unsigned long size,
  3090      volatile long image)
  3091  {
  3092  	unsigned char* data = (unsigned char*)image;
  3093  	int res = -1, err = 0, need_loop_device = !!size;
  3094  	char* mount_opts = (char*)optsarg;
  3095  	char* target = (char*)dir;
  3096  	char* fs = (char*)fsarg;
  3097  	char* source = NULL;
  3098  	char loopname[64];
  3099  
  3100  	if (need_loop_device) {
  3101  		int loopfd;
  3102  		// Some filesystems (e.g. FUSE) do not need a backing device or
  3103  		// filesystem image.
  3104  		memset(loopname, 0, sizeof(loopname));
  3105  		snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid);
  3106  		if (setup_loop_device(data, size, loopname, &loopfd) == -1)
  3107  			return -1;
  3108  		// If BLK_DEV_WRITE_MOUNTED is set, we won't be able to mount()
  3109  		// while holding the loop device fd.
  3110  		close(loopfd);
  3111  		source = loopname;
  3112  	}
  3113  
  3114  	mkdir(target, 0777);
  3115  	char opts[256];
  3116  	memset(opts, 0, sizeof(opts));
  3117  	// Leave some space for the additional options we append below.
  3118  	if (strlen(mount_opts) > (sizeof(opts) - 32)) {
  3119  		debug("ERROR: syz_mount_image parameter optsarg bigger than internal opts\n");
  3120  	}
  3121  	strncpy(opts, mount_opts, sizeof(opts) - 32);
  3122  	if (strcmp(fs, "iso9660") == 0) {
  3123  		flags |= MS_RDONLY;
  3124  	} else if (strncmp(fs, "ext", 3) == 0) {
  3125  		// For ext2/3/4 we have to have errors=continue because the image
  3126  		// can contain errors=panic flag and can legally crash kernel.
  3127  		bool has_remount_ro = false;
  3128  		char* remount_ro_start = strstr(opts, "errors=remount-ro");
  3129  		if (remount_ro_start != NULL) {
  3130  			// syzkaller can sometimes break the options format, so we have to make sure this option can really be parsed.
  3131  			char after = *(remount_ro_start + strlen("errors=remount-ro"));
  3132  			char before = remount_ro_start == opts ? '\0' : *(remount_ro_start - 1);
  3133  			has_remount_ro = ((before == '\0' || before == ',') && (after == '\0' || after == ','));
  3134  		}
  3135  		if (strstr(opts, "errors=panic") || !has_remount_ro)
  3136  			strcat(opts, ",errors=continue");
  3137  	} else if (strcmp(fs, "xfs") == 0) {
  3138  		// For xfs we need nouuid because xfs has a global uuids table
  3139  		// and if two parallel executors mounts fs with the same uuid, second mount fails.
  3140  		strcat(opts, ",nouuid");
  3141  	}
  3142  	debug("syz_mount_image: size=%llu loop='%s' dir='%s' fs='%s' flags=%llu opts='%s'\n", (uint64)size, loopname, target, fs, (uint64)flags, opts);
  3143  #if SYZ_EXECUTOR
  3144  	cover_reset(0);
  3145  #endif
  3146  	res = mount(source, target, fs, flags, opts);
  3147  	if (res == -1) {
  3148  		debug("syz_mount_image > mount error: %d\n", errno);
  3149  		err = errno;
  3150  		goto error_clear_loop;
  3151  	}
  3152  	res = open(target, O_RDONLY | O_DIRECTORY);
  3153  	if (res == -1) {
  3154  		debug("syz_mount_image > open error: %d\n", errno);
  3155  		err = errno;
  3156  		goto error_clear_loop;
  3157  	}
  3158  	if (change_dir) {
  3159  		res = chdir(target);
  3160  		if (res == -1) {
  3161  			debug("syz_mount_image > chdir error: %d\n", errno);
  3162  			err = errno;
  3163  		}
  3164  	}
  3165  
  3166  error_clear_loop:
  3167  	if (need_loop_device)
  3168  		reset_loop_device(loopname);
  3169  	errno = err;
  3170  	return res;
  3171  }
  3172  #endif
  3173  
  3174  #if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu
  3175  // KVM is not yet supported on RISC-V
  3176  #if !GOARCH_riscv64 && !GOARCH_arm
  3177  #include <errno.h>
  3178  #include <fcntl.h>
  3179  #include <linux/kvm.h>
  3180  #include <stdarg.h>
  3181  #include <stddef.h>
  3182  #include <sys/ioctl.h>
  3183  #include <sys/stat.h>
  3184  
  3185  #if GOARCH_amd64
  3186  #include "common_kvm_amd64.h"
  3187  #elif GOARCH_arm64
  3188  #include "common_kvm_arm64.h"
  3189  #elif GOARCH_ppc64 || GOARCH_ppc64le
  3190  #include "common_kvm_ppc64.h"
  3191  #elif !GOARCH_arm
  3192  static volatile long syz_kvm_setup_cpu(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4, volatile long a5, volatile long a6, volatile long a7)
  3193  {
  3194  	return 0;
  3195  }
  3196  #endif
  3197  #endif
  3198  #endif
  3199  
  3200  #if (SYZ_EXECUTOR || SYZ_NET_RESET) && SYZ_EXECUTOR_USES_FORK_SERVER
  3201  #include <errno.h>
  3202  #include <net/if.h>
  3203  #include <netinet/in.h>
  3204  #include <string.h>
  3205  #include <sys/socket.h>
  3206  
  3207  #include <linux/net.h>
  3208  
  3209  // checkpoint/reset_net_namespace partially resets net namespace to initial state
  3210  // after each test. Currently it resets only ipv4 netfilter state.
  3211  // Ideally, we just create a new net namespace for each test,
  3212  // however it's too slow (1-1.5 seconds per namespace, not parallelizable).
  3213  
  3214  // Linux headers do not compile for C++, so we have to define the structs manualy.
  3215  #define XT_TABLE_SIZE 1536
  3216  #define XT_MAX_ENTRIES 10
  3217  
  3218  struct xt_counters {
  3219  	uint64 pcnt, bcnt;
  3220  };
  3221  
  3222  struct ipt_getinfo {
  3223  	char name[32];
  3224  	unsigned int valid_hooks;
  3225  	unsigned int hook_entry[5];
  3226  	unsigned int underflow[5];
  3227  	unsigned int num_entries;
  3228  	unsigned int size;
  3229  };
  3230  
  3231  struct ipt_get_entries {
  3232  	char name[32];
  3233  	unsigned int size;
  3234  	uint64 entrytable[XT_TABLE_SIZE / sizeof(uint64)];
  3235  };
  3236  
  3237  struct ipt_replace {
  3238  	char name[32];
  3239  	unsigned int valid_hooks;
  3240  	unsigned int num_entries;
  3241  	unsigned int size;
  3242  	unsigned int hook_entry[5];
  3243  	unsigned int underflow[5];
  3244  	unsigned int num_counters;
  3245  	struct xt_counters* counters;
  3246  	uint64 entrytable[XT_TABLE_SIZE / sizeof(uint64)];
  3247  };
  3248  
  3249  struct ipt_table_desc {
  3250  	const char* name;
  3251  	struct ipt_getinfo info;
  3252  	struct ipt_replace replace;
  3253  };
  3254  
  3255  static struct ipt_table_desc ipv4_tables[] = {
  3256      {.name = "filter"},
  3257      {.name = "nat"},
  3258      {.name = "mangle"},
  3259      {.name = "raw"},
  3260      {.name = "security"},
  3261  };
  3262  
  3263  static struct ipt_table_desc ipv6_tables[] = {
  3264      {.name = "filter"},
  3265      {.name = "nat"},
  3266      {.name = "mangle"},
  3267      {.name = "raw"},
  3268      {.name = "security"},
  3269  };
  3270  
  3271  #define IPT_BASE_CTL 64
  3272  #define IPT_SO_SET_REPLACE (IPT_BASE_CTL)
  3273  #define IPT_SO_GET_INFO (IPT_BASE_CTL)
  3274  #define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1)
  3275  
  3276  struct arpt_getinfo {
  3277  	char name[32];
  3278  	unsigned int valid_hooks;
  3279  	unsigned int hook_entry[3];
  3280  	unsigned int underflow[3];
  3281  	unsigned int num_entries;
  3282  	unsigned int size;
  3283  };
  3284  
  3285  struct arpt_get_entries {
  3286  	char name[32];
  3287  	unsigned int size;
  3288  	uint64 entrytable[XT_TABLE_SIZE / sizeof(uint64)];
  3289  };
  3290  
  3291  struct arpt_replace {
  3292  	char name[32];
  3293  	unsigned int valid_hooks;
  3294  	unsigned int num_entries;
  3295  	unsigned int size;
  3296  	unsigned int hook_entry[3];
  3297  	unsigned int underflow[3];
  3298  	unsigned int num_counters;
  3299  	struct xt_counters* counters;
  3300  	uint64 entrytable[XT_TABLE_SIZE / sizeof(uint64)];
  3301  };
  3302  
  3303  struct arpt_table_desc {
  3304  	const char* name;
  3305  	struct arpt_getinfo info;
  3306  	struct arpt_replace replace;
  3307  };
  3308  
  3309  static struct arpt_table_desc arpt_tables[] = {
  3310      {.name = "filter"},
  3311  };
  3312  
  3313  #define ARPT_BASE_CTL 96
  3314  #define ARPT_SO_SET_REPLACE (ARPT_BASE_CTL)
  3315  #define ARPT_SO_GET_INFO (ARPT_BASE_CTL)
  3316  #define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1)
  3317  
  3318  static void checkpoint_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level)
  3319  {
  3320  	int fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
  3321  	if (fd == -1) {
  3322  		switch (errno) {
  3323  		case EAFNOSUPPORT:
  3324  		case ENOPROTOOPT:
  3325  		// ENOENT can be returned if smack lsm is used. Smack tried to aplly netlbl to created sockets,
  3326  		// but the fuzzer can manage to remove netlbl entry for SOCK_STREAM/IPPROTO_TCP using
  3327  		// NLBL_MGMT_C_REMOVE, which is unfortunately global (not part of net namespace). In this state
  3328  		// creation of such sockets will fail all the time in all processes (so in some sense the machine
  3329  		// is indeed broken), but ignoring the error is still probably the best option given we allow
  3330  		// the fuzzer to invoke NLBL_MGMT_C_REMOVE in the first place.
  3331  		case ENOENT:
  3332  			return;
  3333  		}
  3334  		failmsg("iptable checkpoint: socket(SOCK_STREAM, IPPROTO_TCP) failed", "family=%d", family);
  3335  	}
  3336  	for (int i = 0; i < num_tables; i++) {
  3337  		struct ipt_table_desc* table = &tables[i];
  3338  		strcpy(table->info.name, table->name);
  3339  		strcpy(table->replace.name, table->name);
  3340  		socklen_t optlen = sizeof(table->info);
  3341  		if (getsockopt(fd, level, IPT_SO_GET_INFO, &table->info, &optlen)) {
  3342  			switch (errno) {
  3343  			case EPERM:
  3344  			case ENOENT:
  3345  			case ENOPROTOOPT:
  3346  				continue;
  3347  			}
  3348  			failmsg("iptable checkpoint: getsockopt(IPT_SO_GET_INFO) failed",
  3349  				"table=%s, family=%d", table->name, family);
  3350  		}
  3351  		debug("iptable checkpoint %s/%d: checkpoint entries=%d hooks=%x size=%d\n",
  3352  		      table->name, family, table->info.num_entries,
  3353  		      table->info.valid_hooks, table->info.size);
  3354  		if (table->info.size > sizeof(table->replace.entrytable))
  3355  			failmsg("iptable checkpoint: table size is too large", "table=%s, family=%d, size=%u",
  3356  				table->name, family, table->info.size);
  3357  		if (table->info.num_entries > XT_MAX_ENTRIES)
  3358  			failmsg("iptable checkpoint: too many counters", "table=%s, family=%d, counters=%d",
  3359  				table->name, family, table->info.num_entries);
  3360  		struct ipt_get_entries entries;
  3361  		memset(&entries, 0, sizeof(entries));
  3362  		strcpy(entries.name, table->name);
  3363  		entries.size = table->info.size;
  3364  		optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size;
  3365  		if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen))
  3366  			failmsg("iptable checkpoint: getsockopt(IPT_SO_GET_ENTRIES) failed",
  3367  				"table=%s, family=%d", table->name, family);
  3368  		table->replace.valid_hooks = table->info.valid_hooks;
  3369  		table->replace.num_entries = table->info.num_entries;
  3370  		table->replace.size = table->info.size;
  3371  		memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry));
  3372  		memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow));
  3373  		memcpy(table->replace.entrytable, entries.entrytable, table->info.size);
  3374  	}
  3375  	close(fd);
  3376  }
  3377  
  3378  static void reset_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level)
  3379  {
  3380  	int fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
  3381  	if (fd == -1) {
  3382  		switch (errno) {
  3383  		case EAFNOSUPPORT:
  3384  		case ENOPROTOOPT:
  3385  		case ENOENT:
  3386  			return;
  3387  		}
  3388  		failmsg("iptable: socket(SOCK_STREAM, IPPROTO_TCP) failed", "family=%d", family);
  3389  	}
  3390  	for (int i = 0; i < num_tables; i++) {
  3391  		struct ipt_table_desc* table = &tables[i];
  3392  		if (table->info.valid_hooks == 0)
  3393  			continue;
  3394  		struct ipt_getinfo info;
  3395  		memset(&info, 0, sizeof(info));
  3396  		strcpy(info.name, table->name);
  3397  		socklen_t optlen = sizeof(info);
  3398  		if (getsockopt(fd, level, IPT_SO_GET_INFO, &info, &optlen))
  3399  			failmsg("iptable: getsockopt(IPT_SO_GET_INFO) failed",
  3400  				"table=%s, family=%d", table->name, family);
  3401  		if (memcmp(&table->info, &info, sizeof(table->info)) == 0) {
  3402  			struct ipt_get_entries entries;
  3403  			memset(&entries, 0, sizeof(entries));
  3404  			strcpy(entries.name, table->name);
  3405  			entries.size = table->info.size;
  3406  			optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size;
  3407  			if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen))
  3408  				failmsg("iptable: getsockopt(IPT_SO_GET_ENTRIES) failed",
  3409  					"table=%s, family=%d", table->name, family);
  3410  			if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0)
  3411  				continue;
  3412  		}
  3413  		debug("iptable %s/%d: resetting\n", table->name, family);
  3414  		struct xt_counters counters[XT_MAX_ENTRIES];
  3415  		table->replace.num_counters = info.num_entries;
  3416  		table->replace.counters = counters;
  3417  		optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size;
  3418  		if (setsockopt(fd, level, IPT_SO_SET_REPLACE, &table->replace, optlen))
  3419  			failmsg("iptable: setsockopt(IPT_SO_SET_REPLACE) failed",
  3420  				"table=%s, family=%d", table->name, family);
  3421  	}
  3422  	close(fd);
  3423  }
  3424  
  3425  static void checkpoint_arptables(void)
  3426  {
  3427  	int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
  3428  	if (fd == -1) {
  3429  		switch (errno) {
  3430  		case EAFNOSUPPORT:
  3431  		case ENOPROTOOPT:
  3432  		case ENOENT:
  3433  			return;
  3434  		}
  3435  		fail("arptable checkpoint: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP) failed");
  3436  	}
  3437  	for (unsigned i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) {
  3438  		struct arpt_table_desc* table = &arpt_tables[i];
  3439  		strcpy(table->info.name, table->name);
  3440  		strcpy(table->replace.name, table->name);
  3441  		socklen_t optlen = sizeof(table->info);
  3442  		if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &table->info, &optlen)) {
  3443  			switch (errno) {
  3444  			case EPERM:
  3445  			case ENOENT:
  3446  			case ENOPROTOOPT:
  3447  				continue;
  3448  			}
  3449  			failmsg("arptable checkpoint: getsockopt(ARPT_SO_GET_INFO) failed", "table=%s", table->name);
  3450  		}
  3451  		debug("arptable checkpoint %s: entries=%d hooks=%x size=%d\n",
  3452  		      table->name, table->info.num_entries, table->info.valid_hooks, table->info.size);
  3453  		if (table->info.size > sizeof(table->replace.entrytable))
  3454  			failmsg("arptable checkpoint: table size is too large",
  3455  				"table=%s, size=%u", table->name, table->info.size);
  3456  		if (table->info.num_entries > XT_MAX_ENTRIES)
  3457  			failmsg("arptable checkpoint: too many counters",
  3458  				"table=%s, counters=%u", table->name, table->info.num_entries);
  3459  		struct arpt_get_entries entries;
  3460  		memset(&entries, 0, sizeof(entries));
  3461  		strcpy(entries.name, table->name);
  3462  		entries.size = table->info.size;
  3463  		optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size;
  3464  		if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen))
  3465  			failmsg("arptable checkpoint: getsockopt(ARPT_SO_GET_ENTRIES) failed", "table=%s", table->name);
  3466  		table->replace.valid_hooks = table->info.valid_hooks;
  3467  		table->replace.num_entries = table->info.num_entries;
  3468  		table->replace.size = table->info.size;
  3469  		memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry));
  3470  		memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow));
  3471  		memcpy(table->replace.entrytable, entries.entrytable, table->info.size);
  3472  	}
  3473  	close(fd);
  3474  }
  3475  
  3476  static void reset_arptables()
  3477  {
  3478  	int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
  3479  	if (fd == -1) {
  3480  		switch (errno) {
  3481  		case EAFNOSUPPORT:
  3482  		case ENOPROTOOPT:
  3483  		case ENOENT:
  3484  			return;
  3485  		}
  3486  		fail("arptable: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
  3487  	}
  3488  	for (unsigned i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) {
  3489  		struct arpt_table_desc* table = &arpt_tables[i];
  3490  		if (table->info.valid_hooks == 0)
  3491  			continue;
  3492  		struct arpt_getinfo info;
  3493  		memset(&info, 0, sizeof(info));
  3494  		strcpy(info.name, table->name);
  3495  		socklen_t optlen = sizeof(info);
  3496  		if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &info, &optlen))
  3497  			failmsg("arptable: getsockopt(ARPT_SO_GET_INFO) failed", "table=%s", table->name);
  3498  		if (memcmp(&table->info, &info, sizeof(table->info)) == 0) {
  3499  			struct arpt_get_entries entries;
  3500  			memset(&entries, 0, sizeof(entries));
  3501  			strcpy(entries.name, table->name);
  3502  			entries.size = table->info.size;
  3503  			optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size;
  3504  			if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen))
  3505  				failmsg("arptable: getsockopt(ARPT_SO_GET_ENTRIES) failed", "table=%s", table->name);
  3506  			if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0)
  3507  				continue;
  3508  			debug("arptable %s: data changed\n", table->name);
  3509  		} else {
  3510  			debug("arptable %s: header changed\n", table->name);
  3511  		}
  3512  		debug("arptable %s: resetting\n", table->name);
  3513  		struct xt_counters counters[XT_MAX_ENTRIES];
  3514  		table->replace.num_counters = info.num_entries;
  3515  		table->replace.counters = counters;
  3516  		optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size;
  3517  		if (setsockopt(fd, SOL_IP, ARPT_SO_SET_REPLACE, &table->replace, optlen))
  3518  			failmsg("arptable: setsockopt(ARPT_SO_SET_REPLACE) failed",
  3519  				"table=%s", table->name);
  3520  	}
  3521  	close(fd);
  3522  }
  3523  
  3524  // ebtables.h is broken too:
  3525  // ebtables.h: In function ‘ebt_entry_target* ebt_get_target(ebt_entry*)’:
  3526  // ebtables.h:197:19: error: invalid conversion from ‘void*’ to ‘ebt_entry_target*’
  3527  
  3528  #define NF_BR_NUMHOOKS 6
  3529  #define EBT_TABLE_MAXNAMELEN 32
  3530  #define EBT_CHAIN_MAXNAMELEN 32
  3531  #define EBT_BASE_CTL 128
  3532  #define EBT_SO_SET_ENTRIES (EBT_BASE_CTL)
  3533  #define EBT_SO_GET_INFO (EBT_BASE_CTL)
  3534  #define EBT_SO_GET_ENTRIES (EBT_SO_GET_INFO + 1)
  3535  #define EBT_SO_GET_INIT_INFO (EBT_SO_GET_ENTRIES + 1)
  3536  #define EBT_SO_GET_INIT_ENTRIES (EBT_SO_GET_INIT_INFO + 1)
  3537  
  3538  struct ebt_replace {
  3539  	char name[EBT_TABLE_MAXNAMELEN];
  3540  	unsigned int valid_hooks;
  3541  	unsigned int nentries;
  3542  	unsigned int entries_size;
  3543  	struct ebt_entries* hook_entry[NF_BR_NUMHOOKS];
  3544  	unsigned int num_counters;
  3545  	struct ebt_counter* counters;
  3546  	char* entries;
  3547  };
  3548  
  3549  struct ebt_entries {
  3550  	unsigned int distinguisher;
  3551  	char name[EBT_CHAIN_MAXNAMELEN];
  3552  	unsigned int counter_offset;
  3553  	int policy;
  3554  	unsigned int nentries;
  3555  	char data[0] __attribute__((aligned(__alignof__(struct ebt_replace))));
  3556  };
  3557  
  3558  struct ebt_table_desc {
  3559  	const char* name;
  3560  	struct ebt_replace replace;
  3561  	char entrytable[XT_TABLE_SIZE];
  3562  };
  3563  
  3564  static struct ebt_table_desc ebt_tables[] = {
  3565      {.name = "filter"},
  3566      {.name = "nat"},
  3567      {.name = "broute"},
  3568  };
  3569  
  3570  static void checkpoint_ebtables(void)
  3571  {
  3572  	int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
  3573  	if (fd == -1) {
  3574  		switch (errno) {
  3575  		case EAFNOSUPPORT:
  3576  		case ENOPROTOOPT:
  3577  		case ENOENT:
  3578  			return;
  3579  		}
  3580  		fail("ebtable checkpoint: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
  3581  	}
  3582  	for (size_t i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) {
  3583  		struct ebt_table_desc* table = &ebt_tables[i];
  3584  		strcpy(table->replace.name, table->name);
  3585  		socklen_t optlen = sizeof(table->replace);
  3586  		if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_INFO, &table->replace, &optlen)) {
  3587  			switch (errno) {
  3588  			case EPERM:
  3589  			case ENOENT:
  3590  			case ENOPROTOOPT:
  3591  				continue;
  3592  			}
  3593  			failmsg("ebtable checkpoint: getsockopt(EBT_SO_GET_INIT_INFO) failed",
  3594  				"table=%s", table->name);
  3595  		}
  3596  		debug("ebtable checkpoint %s: entries=%d hooks=%x size=%d\n",
  3597  		      table->name, table->replace.nentries, table->replace.valid_hooks,
  3598  		      table->replace.entries_size);
  3599  		if (table->replace.entries_size > sizeof(table->entrytable))
  3600  			failmsg("ebtable checkpoint: table size is too large", "table=%s, size=%u",
  3601  				table->name, table->replace.entries_size);
  3602  		table->replace.num_counters = 0;
  3603  		table->replace.entries = table->entrytable;
  3604  		optlen = sizeof(table->replace) + table->replace.entries_size;
  3605  		if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_ENTRIES, &table->replace, &optlen))
  3606  			failmsg("ebtable checkpoint: getsockopt(EBT_SO_GET_INIT_ENTRIES) failed",
  3607  				"table=%s", table->name);
  3608  	}
  3609  	close(fd);
  3610  }
  3611  
  3612  static void reset_ebtables()
  3613  {
  3614  	int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
  3615  	if (fd == -1) {
  3616  		switch (errno) {
  3617  		case EAFNOSUPPORT:
  3618  		case ENOPROTOOPT:
  3619  		case ENOENT:
  3620  			return;
  3621  		}
  3622  		fail("ebtable: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
  3623  	}
  3624  	for (unsigned i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) {
  3625  		struct ebt_table_desc* table = &ebt_tables[i];
  3626  		if (table->replace.valid_hooks == 0)
  3627  			continue;
  3628  		struct ebt_replace replace;
  3629  		memset(&replace, 0, sizeof(replace));
  3630  		strcpy(replace.name, table->name);
  3631  		socklen_t optlen = sizeof(replace);
  3632  		if (getsockopt(fd, SOL_IP, EBT_SO_GET_INFO, &replace, &optlen))
  3633  			failmsg("ebtable: getsockopt(EBT_SO_GET_INFO)", "table=%s", table->name);
  3634  		replace.num_counters = 0;
  3635  		table->replace.entries = 0;
  3636  		for (unsigned h = 0; h < NF_BR_NUMHOOKS; h++)
  3637  			table->replace.hook_entry[h] = 0;
  3638  		if (memcmp(&table->replace, &replace, sizeof(table->replace)) == 0) {
  3639  			char entrytable[XT_TABLE_SIZE];
  3640  			memset(&entrytable, 0, sizeof(entrytable));
  3641  			replace.entries = entrytable;
  3642  			optlen = sizeof(replace) + replace.entries_size;
  3643  			if (getsockopt(fd, SOL_IP, EBT_SO_GET_ENTRIES, &replace, &optlen))
  3644  				failmsg("ebtable: getsockopt(EBT_SO_GET_ENTRIES) failed", "table=%s", table->name);
  3645  			if (memcmp(table->entrytable, entrytable, replace.entries_size) == 0)
  3646  				continue;
  3647  		}
  3648  		debug("ebtable %s: resetting\n", table->name);
  3649  		// Kernel does not seem to return actual entry points (wat?).
  3650  		for (unsigned j = 0, h = 0; h < NF_BR_NUMHOOKS; h++) {
  3651  			if (table->replace.valid_hooks & (1 << h)) {
  3652  				table->replace.hook_entry[h] = (struct ebt_entries*)table->entrytable + j;
  3653  				j++;
  3654  			}
  3655  		}
  3656  		table->replace.entries = table->entrytable;
  3657  		optlen = sizeof(table->replace) + table->replace.entries_size;
  3658  		if (setsockopt(fd, SOL_IP, EBT_SO_SET_ENTRIES, &table->replace, optlen))
  3659  			failmsg("ebtable: setsockopt(EBT_SO_SET_ENTRIES) failed", "table=%s", table->name);
  3660  	}
  3661  	close(fd);
  3662  }
  3663  
  3664  static void checkpoint_net_namespace(void)
  3665  {
  3666  #if SYZ_EXECUTOR
  3667  	if (!flag_net_reset || flag_sandbox_setuid)
  3668  		return;
  3669  #endif
  3670  	checkpoint_ebtables();
  3671  	checkpoint_arptables();
  3672  	checkpoint_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP);
  3673  	checkpoint_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6);
  3674  }
  3675  
  3676  static void reset_net_namespace(void)
  3677  {
  3678  #if SYZ_EXECUTOR
  3679  	if (!flag_net_reset || flag_sandbox_setuid)
  3680  		return;
  3681  #endif
  3682  	reset_ebtables();
  3683  	reset_arptables();
  3684  	reset_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP);
  3685  	reset_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6);
  3686  }
  3687  #endif
  3688  
  3689  #if SYZ_EXECUTOR || (SYZ_CGROUPS && (SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID))
  3690  #include <fcntl.h>
  3691  #include <string.h>
  3692  #include <sys/mount.h>
  3693  #include <sys/stat.h>
  3694  #include <sys/types.h>
  3695  
  3696  static void mount_cgroups(const char* dir, const char** controllers, int count)
  3697  {
  3698  	if (mkdir(dir, 0777)) {
  3699  		debug("mkdir(%s) failed: %d\n", dir, errno);
  3700  		return;
  3701  	}
  3702  	// First, probe one-by-one to understand what controllers are present.
  3703  	char enabled[128] = {0};
  3704  	int i = 0;
  3705  	for (; i < count; i++) {
  3706  		if (mount("none", dir, "cgroup", 0, controllers[i])) {
  3707  			debug("mount(%s, %s) failed: %d\n", dir, controllers[i], errno);
  3708  			continue;
  3709  		}
  3710  		umount(dir);
  3711  		strcat(enabled, ",");
  3712  		strcat(enabled, controllers[i]);
  3713  	}
  3714  	if (enabled[0] == 0) {
  3715  		if (rmdir(dir) && errno != EBUSY)
  3716  			failmsg("rmdir failed", "dir=%s", dir);
  3717  		return;
  3718  	}
  3719  	// Now mount all at once.
  3720  	if (mount("none", dir, "cgroup", 0, enabled + 1)) {
  3721  		// In systemd/stretch images this is failing with EBUSY
  3722  		// (systemd starts messing with these mounts?),
  3723  		// so we don't fail, but just log the error.
  3724  		debug("mount(%s, %s) failed: %d\n", dir, enabled + 1, errno);
  3725  		if (rmdir(dir) && errno != EBUSY)
  3726  			failmsg("rmdir failed", "dir=%s enabled=%s", dir, enabled);
  3727  	}
  3728  	if (chmod(dir, 0777)) {
  3729  		debug("chmod(%s) failed: %d\n", dir, errno);
  3730  	}
  3731  }
  3732  
  3733  static void mount_cgroups2(const char** controllers, int count)
  3734  {
  3735  	if (mkdir("/syzcgroup/unified", 0777)) {
  3736  		debug("mkdir(/syzcgroup/unified) failed: %d\n", errno);
  3737  		return;
  3738  	}
  3739  	if (mount("none", "/syzcgroup/unified", "cgroup2", 0, NULL)) {
  3740  		debug("mount(cgroup2) failed: %d\n", errno);
  3741  		// For all cases when we don't end up mounting cgroup/cgroup2
  3742  		// in /syzcgroup/{unified,net,cpu}, we need to remove the dir.
  3743  		// Otherwise these will end up as normal dirs and the fuzzer may
  3744  		// create huge files there. These files won't be cleaned up
  3745  		// after tests and may easily consume all disk space.
  3746  		// EBUSY usually means that cgroup is already mounted there
  3747  		// by a previous run of e.g. syz-execprog.
  3748  		if (rmdir("/syzcgroup/unified") && errno != EBUSY)
  3749  			fail("rmdir(/syzcgroup/unified) failed");
  3750  		return;
  3751  	}
  3752  	if (chmod("/syzcgroup/unified", 0777)) {
  3753  		debug("chmod(/syzcgroup/unified) failed: %d\n", errno);
  3754  	}
  3755  	int control = open("/syzcgroup/unified/cgroup.subtree_control", O_WRONLY);
  3756  	if (control == -1)
  3757  		return;
  3758  	int i;
  3759  	for (i = 0; i < count; i++)
  3760  		if (write(control, controllers[i], strlen(controllers[i])) < 0) {
  3761  			debug("write(cgroup.subtree_control, %s) failed: %d\n", controllers[i], errno);
  3762  		}
  3763  	close(control);
  3764  }
  3765  
  3766  static void setup_cgroups()
  3767  {
  3768  	// We want to cover both cgroup and cgroup2.
  3769  	// Each resource controller can be bound to only one of them,
  3770  	// so to cover both we divide all controllers into 3 arbitrary groups.
  3771  	// One group is then bound to cgroup2/unified, and 2 other groups
  3772  	// are bound to 2 cgroup hierarchies.
  3773  	// Note: we need to enable controllers one-by-one for both cgroup and cgroup2.
  3774  	// If we enable all at the same time and one of them fails (b/c of older kernel
  3775  	// or not enabled configs), then all will fail.
  3776  	const char* unified_controllers[] = {"+cpu", "+io", "+pids"};
  3777  	const char* net_controllers[] = {"net", "net_prio", "devices", "blkio", "freezer"};
  3778  	const char* cpu_controllers[] = {"cpuset", "cpuacct", "hugetlb", "rlimit", "memory"};
  3779  	if (mkdir("/syzcgroup", 0777)) {
  3780  		// Can happen due to e.g. read-only file system (EROFS).
  3781  		debug("mkdir(/syzcgroup) failed: %d\n", errno);
  3782  		return;
  3783  	}
  3784  	mount_cgroups2(unified_controllers, sizeof(unified_controllers) / sizeof(unified_controllers[0]));
  3785  	mount_cgroups("/syzcgroup/net", net_controllers, sizeof(net_controllers) / sizeof(net_controllers[0]));
  3786  	mount_cgroups("/syzcgroup/cpu", cpu_controllers, sizeof(cpu_controllers) / sizeof(cpu_controllers[0]));
  3787  	write_file("/syzcgroup/cpu/cgroup.clone_children", "1");
  3788  	write_file("/syzcgroup/cpu/cpuset.memory_pressure_enabled", "1");
  3789  }
  3790  
  3791  #if (SYZ_EXECUTOR || SYZ_REPEAT) && SYZ_EXECUTOR_USES_FORK_SERVER
  3792  static void setup_cgroups_loop()
  3793  {
  3794  #if SYZ_EXECUTOR
  3795  	if (!flag_cgroups)
  3796  		return;
  3797  #endif
  3798  	int pid = getpid();
  3799  	char file[128];
  3800  	char cgroupdir[64];
  3801  	snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
  3802  	if (mkdir(cgroupdir, 0777)) {
  3803  		debug("mkdir(%s) failed: %d\n", cgroupdir, errno);
  3804  	}
  3805  	// Restrict number of pids per test process to prevent fork bombs.
  3806  	// We have up to 16 threads + main process + loop.
  3807  	// 32 pids should be enough for everyone.
  3808  	snprintf(file, sizeof(file), "%s/pids.max", cgroupdir);
  3809  	write_file(file, "32");
  3810  	// Setup some v1 groups to make things more interesting.
  3811  	snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
  3812  	write_file(file, "%d", pid);
  3813  	snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
  3814  	if (mkdir(cgroupdir, 0777)) {
  3815  		debug("mkdir(%s) failed: %d\n", cgroupdir, errno);
  3816  	}
  3817  	snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
  3818  	write_file(file, "%d", pid);
  3819  	// Restrict memory consumption.
  3820  	// We have some syscalls that inherently consume lots of memory,
  3821  	// e.g. mounting some filesystem images requires at least 128MB
  3822  	// image in memory. We restrict RLIMIT_AS to 200MB. Here we gradually
  3823  	// increase memory limits to make things more interesting.
  3824  	// Also this takes into account KASAN quarantine size.
  3825  	// If the limit is lower than KASAN quarantine size, then it can happen
  3826  	// so that we kill the process, but all of its memory is in quarantine
  3827  	// and is still accounted against memcg. As the result memcg won't
  3828  	// allow to allocate any memory in the parent and in the new test process.
  3829  	// The current limit of 300MB supports up to 9.6GB RAM (quarantine is 1/32).
  3830  	snprintf(file, sizeof(file), "%s/memory.soft_limit_in_bytes", cgroupdir);
  3831  	write_file(file, "%d", 299 << 20);
  3832  	snprintf(file, sizeof(file), "%s/memory.limit_in_bytes", cgroupdir);
  3833  	write_file(file, "%d", 300 << 20);
  3834  	snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
  3835  	if (mkdir(cgroupdir, 0777)) {
  3836  		debug("mkdir(%s) failed: %d\n", cgroupdir, errno);
  3837  	}
  3838  	snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir);
  3839  	write_file(file, "%d", pid);
  3840  }
  3841  
  3842  static void setup_cgroups_test()
  3843  {
  3844  #if SYZ_EXECUTOR
  3845  	if (!flag_cgroups)
  3846  		return;
  3847  #endif
  3848  	char cgroupdir[64];
  3849  	snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
  3850  	if (symlink(cgroupdir, "./cgroup")) {
  3851  		debug("symlink(%s, ./cgroup) failed: %d\n", cgroupdir, errno);
  3852  	}
  3853  	snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
  3854  	if (symlink(cgroupdir, "./cgroup.cpu")) {
  3855  		debug("symlink(%s, ./cgroup.cpu) failed: %d\n", cgroupdir, errno);
  3856  	}
  3857  	snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
  3858  	if (symlink(cgroupdir, "./cgroup.net")) {
  3859  		debug("symlink(%s, ./cgroup.net) failed: %d\n", cgroupdir, errno);
  3860  	}
  3861  }
  3862  #endif
  3863  
  3864  #if SYZ_EXECUTOR || SYZ_SANDBOX_NAMESPACE
  3865  static void initialize_cgroups()
  3866  {
  3867  #if SYZ_EXECUTOR
  3868  	if (!flag_cgroups)
  3869  		return;
  3870  #endif
  3871  	if (mkdir("./syz-tmp/newroot/syzcgroup", 0700))
  3872  		fail("mkdir failed");
  3873  	if (mkdir("./syz-tmp/newroot/syzcgroup/unified", 0700))
  3874  		fail("mkdir failed");
  3875  	if (mkdir("./syz-tmp/newroot/syzcgroup/cpu", 0700))
  3876  		fail("mkdir failed");
  3877  	if (mkdir("./syz-tmp/newroot/syzcgroup/net", 0700))
  3878  		fail("mkdir failed");
  3879  	unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE;
  3880  	if (mount("/syzcgroup/unified", "./syz-tmp/newroot/syzcgroup/unified", NULL, bind_mount_flags, NULL)) {
  3881  		debug("mount(cgroup2, MS_BIND) failed: %d\n", errno);
  3882  	}
  3883  	if (mount("/syzcgroup/cpu", "./syz-tmp/newroot/syzcgroup/cpu", NULL, bind_mount_flags, NULL)) {
  3884  		debug("mount(cgroup/cpu, MS_BIND) failed: %d\n", errno);
  3885  	}
  3886  	if (mount("/syzcgroup/net", "./syz-tmp/newroot/syzcgroup/net", NULL, bind_mount_flags, NULL)) {
  3887  		debug("mount(cgroup/net, MS_BIND) failed: %d\n", errno);
  3888  	}
  3889  }
  3890  #endif
  3891  #endif
  3892  
  3893  #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID
  3894  #include <errno.h>
  3895  #include <sys/mount.h>
  3896  #include <sys/stat.h>
  3897  #include <unistd.h>
  3898  
  3899  static void setup_common()
  3900  {
  3901  	if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) {
  3902  		debug("mount(fusectl) failed: %d\n", errno);
  3903  	}
  3904  }
  3905  
  3906  static void setup_binderfs()
  3907  {
  3908  	// NOTE: this function must be called after chroot.
  3909  	// Bind an instance of binderfs specific just to this executor - it will
  3910  	// only be visible in its mount namespace and will help isolate binder
  3911  	// devices during fuzzing.
  3912  	// These commands will just silently fail if binderfs is not supported.
  3913  	// Ideally it should have been added as a separate feature (with lots of
  3914  	// minor changes throughout the code base), but it seems to be an overkill
  3915  	// for just 2 simple lines of code.
  3916  	if (mkdir("/dev/binderfs", 0777)) {
  3917  		debug("mkdir(/dev/binderfs) failed: %d\n", errno);
  3918  	}
  3919  
  3920  	if (mount("binder", "/dev/binderfs", "binder", 0, NULL)) {
  3921  		debug("mount of binder at /dev/binderfs failed: %d\n", errno);
  3922  	}
  3923  #if !SYZ_EXECUTOR && !SYZ_USE_TMP_DIR
  3924  	// Do a local symlink right away.
  3925  	if (symlink("/dev/binderfs", "./binderfs")) {
  3926  		debug("symlink(/dev/binderfs, ./binderfs) failed: %d\n", errno);
  3927  	}
  3928  #endif
  3929  }
  3930  
  3931  #include <sched.h>
  3932  #include <sys/prctl.h>
  3933  #include <sys/resource.h>
  3934  #include <sys/time.h>
  3935  #include <sys/wait.h>
  3936  
  3937  static void loop();
  3938  
  3939  static void sandbox_common()
  3940  {
  3941  	prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
  3942  	setsid();
  3943  
  3944  #if SYZ_EXECUTOR || __NR_syz_init_net_socket || SYZ_DEVLINK_PCI || __NR_syz_socket_connect_nvme_tcp
  3945  	int netns = open("/proc/self/ns/net", O_RDONLY);
  3946  	if (netns == -1)
  3947  		fail("open(/proc/self/ns/net) failed");
  3948  	if (dup2(netns, kInitNetNsFd) < 0)
  3949  		fail("dup2(netns, kInitNetNsFd) failed");
  3950  	close(netns);
  3951  #endif
  3952  
  3953  	struct rlimit rlim;
  3954  #if SYZ_EXECUTOR
  3955  	rlim.rlim_cur = rlim.rlim_max = (200 << 20) +
  3956  					(kMaxThreads * kCoverSize + kExtraCoverSize) * sizeof(void*);
  3957  #else
  3958  	rlim.rlim_cur = rlim.rlim_max = (200 << 20);
  3959  #endif
  3960  	setrlimit(RLIMIT_AS, &rlim);
  3961  	rlim.rlim_cur = rlim.rlim_max = 32 << 20;
  3962  	setrlimit(RLIMIT_MEMLOCK, &rlim);
  3963  	rlim.rlim_cur = rlim.rlim_max = 136 << 20;
  3964  	setrlimit(RLIMIT_FSIZE, &rlim);
  3965  	rlim.rlim_cur = rlim.rlim_max = 1 << 20;
  3966  	setrlimit(RLIMIT_STACK, &rlim);
  3967  	// Note: core size is also restricted by RLIMIT_FSIZE.
  3968  	rlim.rlim_cur = rlim.rlim_max = 128 << 20;
  3969  	setrlimit(RLIMIT_CORE, &rlim);
  3970  	rlim.rlim_cur = rlim.rlim_max = 256; // see kMaxFd
  3971  	setrlimit(RLIMIT_NOFILE, &rlim);
  3972  
  3973  	// CLONE_NEWNS/NEWCGROUP cause EINVAL on some systems,
  3974  	// so we do them separately of clone in do_sandbox_namespace.
  3975  	if (unshare(CLONE_NEWNS)) {
  3976  		debug("unshare(CLONE_NEWNS): %d\n", errno);
  3977  	}
  3978  	if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
  3979  		debug("mount(\"/\", MS_REC | MS_PRIVATE): %d\n", errno);
  3980  	}
  3981  	if (unshare(CLONE_NEWIPC)) {
  3982  		debug("unshare(CLONE_NEWIPC): %d\n", errno);
  3983  	}
  3984  	if (unshare(0x02000000)) {
  3985  		debug("unshare(CLONE_NEWCGROUP): %d\n", errno);
  3986  	}
  3987  	if (unshare(CLONE_NEWUTS)) {
  3988  		debug("unshare(CLONE_NEWUTS): %d\n", errno);
  3989  	}
  3990  	if (unshare(CLONE_SYSVSEM)) {
  3991  		debug("unshare(CLONE_SYSVSEM): %d\n", errno);
  3992  	}
  3993  	// These sysctl's restrict ipc resource usage (by default it's possible
  3994  	// to eat all system memory by creating e.g. lots of large sem sets).
  3995  	// These sysctl's are per-namespace, so we need to set them inside
  3996  	// of the test ipc namespace (after CLONE_NEWIPC).
  3997  	typedef struct {
  3998  		const char* name;
  3999  		const char* value;
  4000  	} sysctl_t;
  4001  	static const sysctl_t sysctls[] = {
  4002  	    {"/proc/sys/kernel/shmmax", "16777216"},
  4003  	    {"/proc/sys/kernel/shmall", "536870912"},
  4004  	    {"/proc/sys/kernel/shmmni", "1024"},
  4005  	    {"/proc/sys/kernel/msgmax", "8192"},
  4006  	    {"/proc/sys/kernel/msgmni", "1024"},
  4007  	    {"/proc/sys/kernel/msgmnb", "1024"},
  4008  	    {"/proc/sys/kernel/sem", "1024 1048576 500 1024"},
  4009  	};
  4010  	unsigned i;
  4011  	for (i = 0; i < sizeof(sysctls) / sizeof(sysctls[0]); i++)
  4012  		write_file(sysctls[i].name, sysctls[i].value);
  4013  }
  4014  #endif
  4015  
  4016  #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE
  4017  static int wait_for_loop(int pid)
  4018  {
  4019  	if (pid < 0)
  4020  		fail("sandbox fork failed");
  4021  	debug("spawned loop pid %d\n", pid);
  4022  	int status = 0;
  4023  	while (waitpid(-1, &status, __WALL) != pid) {
  4024  	}
  4025  	return WEXITSTATUS(status);
  4026  }
  4027  #endif
  4028  
  4029  #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID
  4030  #include <linux/capability.h>
  4031  
  4032  static void drop_caps(void)
  4033  {
  4034  	struct __user_cap_header_struct cap_hdr = {};
  4035  	struct __user_cap_data_struct cap_data[2] = {};
  4036  	cap_hdr.version = _LINUX_CAPABILITY_VERSION_3;
  4037  	cap_hdr.pid = getpid();
  4038  	if (syscall(SYS_capget, &cap_hdr, &cap_data))
  4039  		fail("capget failed");
  4040  	// Drop CAP_SYS_PTRACE so that test processes can't attach to parent processes.
  4041  	// Previously it lead to hangs because the loop process stopped due to SIGSTOP.
  4042  	// Note that a process can always ptrace its direct children, which is enough for testing purposes.
  4043  	//
  4044  	// A process with CAP_SYS_NICE can bring kernel down by asking for too high SCHED_DEADLINE priority,
  4045  	// as the result rcu and other system services that use kernel threads will stop functioning.
  4046  	// Some parameters for SCHED_DEADLINE should be OK, but we don't have means to enforce
  4047  	// values of indirect syscall arguments. Peter Zijlstra proposed sysctl_deadline_period_{min,max}
  4048  	// which could be used to enfore safe limits without droppping CAP_SYS_NICE, but we don't have it yet.
  4049  	// See the following bug for details:
  4050  	// https://groups.google.com/forum/#!topic/syzkaller-bugs/G6Wl_PKPIWI
  4051  	const int drop = (1 << CAP_SYS_PTRACE) | (1 << CAP_SYS_NICE);
  4052  	cap_data[0].effective &= ~drop;
  4053  	cap_data[0].permitted &= ~drop;
  4054  	cap_data[0].inheritable &= ~drop;
  4055  	if (syscall(SYS_capset, &cap_hdr, &cap_data))
  4056  		fail("capset failed");
  4057  }
  4058  #endif
  4059  
  4060  #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE
  4061  #include <sched.h>
  4062  #include <sys/types.h>
  4063  
  4064  static int do_sandbox_none(void)
  4065  {
  4066  	// CLONE_NEWPID takes effect for the first child of the current process,
  4067  	// so we do it before fork to make the loop "init" process of the namespace.
  4068  	// We ought to do fail here, but sandbox=none is used in pkg/ipc tests
  4069  	// and they are usually run under non-root.
  4070  	// Also since debug is stripped by pkg/csource, we need to do {}
  4071  	// even though we generally don't do {} around single statements.
  4072  	if (unshare(CLONE_NEWPID)) {
  4073  		debug("unshare(CLONE_NEWPID): %d\n", errno);
  4074  	}
  4075  	int pid = fork();
  4076  	if (pid != 0)
  4077  		return wait_for_loop(pid);
  4078  
  4079  	setup_common();
  4080  #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION
  4081  	initialize_vhci();
  4082  #endif
  4083  	sandbox_common();
  4084  	drop_caps();
  4085  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  4086  	initialize_netdevices_init();
  4087  #endif
  4088  	if (unshare(CLONE_NEWNET)) {
  4089  		debug("unshare(CLONE_NEWNET): %d\n", errno);
  4090  	}
  4091  	// Enable access to IPPROTO_ICMP sockets, must be done after CLONE_NEWNET.
  4092  	write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535");
  4093  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
  4094  	initialize_devlink_pci();
  4095  #endif
  4096  #if SYZ_EXECUTOR || SYZ_NET_INJECTION
  4097  	initialize_tun();
  4098  #endif
  4099  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  4100  	initialize_netdevices();
  4101  #endif
  4102  #if SYZ_EXECUTOR || SYZ_WIFI
  4103  	initialize_wifi_devices();
  4104  #endif
  4105  	setup_binderfs();
  4106  	loop();
  4107  	doexit(1);
  4108  }
  4109  #endif
  4110  
  4111  #if SYZ_EXECUTOR || SYZ_SANDBOX_SETUID
  4112  #include <grp.h>
  4113  #include <sched.h>
  4114  #include <sys/prctl.h>
  4115  
  4116  #define SYZ_HAVE_SANDBOX_SETUID 1
  4117  static int do_sandbox_setuid(void)
  4118  {
  4119  	if (unshare(CLONE_NEWPID)) {
  4120  		debug("unshare(CLONE_NEWPID): %d\n", errno);
  4121  	}
  4122  	int pid = fork();
  4123  	if (pid != 0)
  4124  		return wait_for_loop(pid);
  4125  
  4126  	setup_common();
  4127  #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION
  4128  	initialize_vhci();
  4129  #endif
  4130  	sandbox_common();
  4131  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  4132  	initialize_netdevices_init();
  4133  #endif
  4134  	if (unshare(CLONE_NEWNET)) {
  4135  		debug("unshare(CLONE_NEWNET): %d\n", errno);
  4136  	}
  4137  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
  4138  	initialize_devlink_pci();
  4139  #endif
  4140  #if SYZ_EXECUTOR || SYZ_NET_INJECTION
  4141  	initialize_tun();
  4142  #endif
  4143  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  4144  	initialize_netdevices();
  4145  #endif
  4146  #if SYZ_EXECUTOR || SYZ_WIFI
  4147  	initialize_wifi_devices();
  4148  #endif
  4149  	setup_binderfs();
  4150  
  4151  	const int nobody = 65534;
  4152  	if (setgroups(0, NULL))
  4153  		fail("failed to setgroups");
  4154  	if (syscall(SYS_setresgid, nobody, nobody, nobody))
  4155  		fail("failed to setresgid");
  4156  	if (syscall(SYS_setresuid, nobody, nobody, nobody))
  4157  		fail("failed to setresuid");
  4158  
  4159  	// setresuid and setresgid clear the parent-death signal.
  4160  	prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
  4161  	// This is required to open /proc/self/ files.
  4162  	// Otherwise they are owned by root and we can't open them after setuid.
  4163  	// See task_dump_owner function in kernel.
  4164  	prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
  4165  
  4166  	loop();
  4167  	doexit(1);
  4168  }
  4169  #endif
  4170  
  4171  #if SYZ_EXECUTOR || SYZ_SANDBOX_NAMESPACE
  4172  #include <sched.h>
  4173  #include <sys/mman.h>
  4174  #include <sys/mount.h>
  4175  
  4176  static int real_uid;
  4177  static int real_gid;
  4178  __attribute__((aligned(64 << 10))) static char sandbox_stack[1 << 20];
  4179  
  4180  static int namespace_sandbox_proc(void* arg)
  4181  {
  4182  	sandbox_common();
  4183  
  4184  	// /proc/self/setgroups is not present on some systems, ignore error.
  4185  	write_file("/proc/self/setgroups", "deny");
  4186  	if (!write_file("/proc/self/uid_map", "0 %d 1\n", real_uid))
  4187  		fail("write of /proc/self/uid_map failed");
  4188  	if (!write_file("/proc/self/gid_map", "0 %d 1\n", real_gid))
  4189  		fail("write of /proc/self/gid_map failed");
  4190  
  4191  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  4192  	initialize_netdevices_init();
  4193  #endif
  4194  	// CLONE_NEWNET must always happen before tun setup,
  4195  	// because we want the tun device in the test namespace.
  4196  	if (unshare(CLONE_NEWNET))
  4197  		fail("unshare(CLONE_NEWNET)");
  4198  	// Enable access to IPPROTO_ICMP sockets, must be done after CLONE_NEWNET.
  4199  	write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535");
  4200  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
  4201  	initialize_devlink_pci();
  4202  #endif
  4203  #if SYZ_EXECUTOR || SYZ_NET_INJECTION
  4204  	// We setup tun here as it needs to be in the test net namespace,
  4205  	// which in turn needs to be in the test user namespace.
  4206  	// However, IFF_NAPI_FRAGS will fail as we are not root already.
  4207  	// TODO: we should create tun in the init net namespace and use setns
  4208  	// to move it to the target namespace.
  4209  	initialize_tun();
  4210  #endif
  4211  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  4212  	initialize_netdevices();
  4213  #endif
  4214  #if SYZ_EXECUTOR || SYZ_WIFI
  4215  	initialize_wifi_devices();
  4216  #endif
  4217  
  4218  	if (mkdir("./syz-tmp", 0777))
  4219  		fail("mkdir(syz-tmp) failed");
  4220  	if (mount("", "./syz-tmp", "tmpfs", 0, NULL))
  4221  		fail("mount(tmpfs) failed");
  4222  	if (mkdir("./syz-tmp/newroot", 0777))
  4223  		fail("mkdir failed");
  4224  	if (mkdir("./syz-tmp/newroot/dev", 0700))
  4225  		fail("mkdir failed");
  4226  	unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE;
  4227  	if (mount("/dev", "./syz-tmp/newroot/dev", NULL, bind_mount_flags, NULL))
  4228  		fail("mount(dev) failed");
  4229  	if (mkdir("./syz-tmp/newroot/proc", 0700))
  4230  		fail("mkdir failed");
  4231  	if (mount(NULL, "./syz-tmp/newroot/proc", "proc", 0, NULL))
  4232  		fail("mount(proc) failed");
  4233  	if (mkdir("./syz-tmp/newroot/selinux", 0700))
  4234  		fail("mkdir failed");
  4235  	// selinux mount used to be at /selinux, but then moved to /sys/fs/selinux.
  4236  	const char* selinux_path = "./syz-tmp/newroot/selinux";
  4237  	if (mount("/selinux", selinux_path, NULL, bind_mount_flags, NULL)) {
  4238  		if (errno != ENOENT)
  4239  			fail("mount(/selinux) failed");
  4240  		if (mount("/sys/fs/selinux", selinux_path, NULL, bind_mount_flags, NULL) && errno != ENOENT)
  4241  			fail("mount(/sys/fs/selinux) failed");
  4242  	}
  4243  	if (mkdir("./syz-tmp/newroot/sys", 0700))
  4244  		fail("mkdir failed");
  4245  	if (mount("/sys", "./syz-tmp/newroot/sys", 0, bind_mount_flags, NULL))
  4246  		fail("mount(sysfs) failed");
  4247  #if SYZ_EXECUTOR || SYZ_CGROUPS
  4248  	initialize_cgroups();
  4249  #endif
  4250  	if (mkdir("./syz-tmp/pivot", 0777))
  4251  		fail("mkdir failed");
  4252  	if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) {
  4253  		debug("pivot_root failed\n");
  4254  		if (chdir("./syz-tmp"))
  4255  			fail("chdir failed");
  4256  	} else {
  4257  		debug("pivot_root OK\n");
  4258  		if (chdir("/"))
  4259  			fail("chdir failed");
  4260  		if (umount2("./pivot", MNT_DETACH))
  4261  			fail("umount failed");
  4262  	}
  4263  	if (chroot("./newroot"))
  4264  		fail("chroot failed");
  4265  	if (chdir("/"))
  4266  		fail("chdir failed");
  4267  	setup_binderfs();
  4268  	drop_caps();
  4269  
  4270  	loop();
  4271  	doexit(1);
  4272  }
  4273  
  4274  #define SYZ_HAVE_SANDBOX_NAMESPACE 1
  4275  static int do_sandbox_namespace(void)
  4276  {
  4277  	setup_common();
  4278  #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION
  4279  	// HCIDEVUP requires CAP_ADMIN, so this needs to happen early.
  4280  	initialize_vhci();
  4281  #endif
  4282  	real_uid = getuid();
  4283  	real_gid = getgid();
  4284  	mprotect(sandbox_stack, 4096, PROT_NONE); // to catch stack underflows
  4285  	int pid = clone(namespace_sandbox_proc, &sandbox_stack[sizeof(sandbox_stack) - 64],
  4286  			CLONE_NEWUSER | CLONE_NEWPID, 0);
  4287  	return wait_for_loop(pid);
  4288  }
  4289  #endif
  4290  
  4291  #if SYZ_EXECUTOR || SYZ_SANDBOX_ANDROID
  4292  // seccomp only supported for Arm, Arm64, X86, and X86_64 archs
  4293  #if GOARCH_arm || GOARCH_arm64 || GOARCH_386 || GOARCH_amd64
  4294  #include <assert.h>
  4295  #include <errno.h>
  4296  #include <linux/audit.h>
  4297  #include <linux/filter.h>
  4298  #include <linux/seccomp.h>
  4299  #include <stddef.h>
  4300  #include <stdlib.h>
  4301  #include <sys/prctl.h>
  4302  #include <sys/syscall.h>
  4303  
  4304  #include "android/android_seccomp.h"
  4305  
  4306  #if GOARCH_amd64 || GOARCH_386
  4307  // Syz-executor is linked against glibc when fuzzing runs on Cuttlefish x86-x64.
  4308  // However Android blocks calls into mkdir, rmdir, symlink which causes
  4309  // syz-executor to crash. When fuzzing runs on Android device this issue
  4310  // is not observed, because syz-executor is linked against Bionic. Under
  4311  // the hood Bionic invokes mkdirat, inlinkat and symlinkat, which are
  4312  // allowed by seccomp-bpf.
  4313  // This issue may exist not only in Android, but also in Linux in general
  4314  // where seccomp filtering is enforced.
  4315  //
  4316  // This trick makes linker believe it matched the correct version of mkdir,
  4317  // rmdir, symlink. So now behavior is the same across ARM and non-ARM builds.
  4318  inline int mkdir(const char* path, mode_t mode)
  4319  {
  4320  	return mkdirat(AT_FDCWD, path, mode);
  4321  }
  4322  
  4323  inline int rmdir(const char* path)
  4324  {
  4325  	return unlinkat(AT_FDCWD, path, AT_REMOVEDIR);
  4326  }
  4327  
  4328  inline int symlink(const char* old_path, const char* new_path)
  4329  {
  4330  	return symlinkat(old_path, AT_FDCWD, new_path);
  4331  }
  4332  #endif
  4333  
  4334  #endif
  4335  #include <fcntl.h> // open(2)
  4336  #include <grp.h> // setgroups
  4337  #include <sys/xattr.h> // setxattr, getxattr
  4338  
  4339  #define AID_NET_BT_ADMIN 3001
  4340  #define AID_NET_BT 3002
  4341  #define AID_INET 3003
  4342  #define AID_EVERYBODY 9997
  4343  #define AID_APP 10000
  4344  
  4345  #define UNTRUSTED_APP_UID (AID_APP + 999)
  4346  #define UNTRUSTED_APP_GID (AID_APP + 999)
  4347  
  4348  #define SYSTEM_UID 1000
  4349  #define SYSTEM_GID 1000
  4350  
  4351  const char* const SELINUX_CONTEXT_UNTRUSTED_APP = "u:r:untrusted_app:s0:c512,c768";
  4352  const char* const SELINUX_LABEL_APP_DATA_FILE = "u:object_r:app_data_file:s0:c512,c768";
  4353  const char* const SELINUX_CONTEXT_FILE = "/proc/thread-self/attr/current";
  4354  const char* const SELINUX_XATTR_NAME = "security.selinux";
  4355  
  4356  const gid_t UNTRUSTED_APP_GROUPS[] = {UNTRUSTED_APP_GID, AID_NET_BT_ADMIN, AID_NET_BT, AID_INET, AID_EVERYBODY};
  4357  const size_t UNTRUSTED_APP_NUM_GROUPS = sizeof(UNTRUSTED_APP_GROUPS) / sizeof(UNTRUSTED_APP_GROUPS[0]);
  4358  
  4359  const gid_t SYSTEM_GROUPS[] = {SYSTEM_GID, AID_NET_BT_ADMIN, AID_NET_BT, AID_INET, AID_EVERYBODY};
  4360  const size_t SYSTEM_NUM_GROUPS = sizeof(SYSTEM_GROUPS) / sizeof(SYSTEM_GROUPS[0]);
  4361  
  4362  // Similar to libselinux getcon(3), but:
  4363  // - No library dependency
  4364  // - No dynamic memory allocation
  4365  // - Uses fail() instead of returning an error code
  4366  static void getcon(char* context, size_t context_size)
  4367  {
  4368  	int fd = open(SELINUX_CONTEXT_FILE, O_RDONLY);
  4369  	if (fd < 0)
  4370  		fail("getcon: couldn't open context file");
  4371  
  4372  	ssize_t nread = read(fd, context, context_size);
  4373  
  4374  	close(fd);
  4375  
  4376  	if (nread <= 0)
  4377  		fail("getcon: failed to read context file");
  4378  
  4379  	// The contents of the context file MAY end with a newline
  4380  	// and MAY not have a null terminator.  Handle this here.
  4381  	if (context[nread - 1] == '\n')
  4382  		context[nread - 1] = '\0';
  4383  }
  4384  
  4385  // Similar to libselinux setcon(3), but:
  4386  // - No library dependency
  4387  // - No dynamic memory allocation
  4388  // - Uses fail() instead of returning an error code
  4389  static void setcon(const char* context)
  4390  {
  4391  	char new_context[512];
  4392  
  4393  	// Attempt to write the new context
  4394  	int fd = open(SELINUX_CONTEXT_FILE, O_WRONLY);
  4395  
  4396  	if (fd < 0)
  4397  		fail("setcon: could not open context file");
  4398  
  4399  	ssize_t bytes_written = write(fd, context, strlen(context));
  4400  
  4401  	// N.B.: We cannot reuse this file descriptor, since the target SELinux context
  4402  	//       may not be able to read from it.
  4403  	close(fd);
  4404  
  4405  	if (bytes_written != (ssize_t)strlen(context))
  4406  		failmsg("setcon: could not write entire context", "wrote=%zi, expected=%zu", bytes_written, strlen(context));
  4407  
  4408  	// Validate the transition by checking the context
  4409  	getcon(new_context, sizeof(new_context));
  4410  
  4411  	if (strcmp(context, new_context) != 0)
  4412  		failmsg("setcon: failed to change", "want=%s, context=%s", context, new_context);
  4413  }
  4414  
  4415  // Similar to libselinux setfilecon(3), but:
  4416  // - No library dependency
  4417  // - No dynamic memory allocation
  4418  // - Uses fail() instead of returning an error code
  4419  static void setfilecon(const char* path, const char* context)
  4420  {
  4421  	char new_context[512];
  4422  
  4423  	if (setxattr(path, SELINUX_XATTR_NAME, context, strlen(context) + 1, 0) != 0)
  4424  		fail("setfilecon: setxattr failed");
  4425  	if (getxattr(path, SELINUX_XATTR_NAME, new_context, sizeof(new_context)) < 0)
  4426  		fail("setfilecon: getxattr failed");
  4427  	if (strcmp(context, new_context) != 0)
  4428  		failmsg("setfilecon: could not set context", "want=%s, got=%s", context, new_context);
  4429  }
  4430  
  4431  #define SYZ_HAVE_SANDBOX_ANDROID 1
  4432  
  4433  static int do_sandbox_android(uint64 sandbox_arg)
  4434  {
  4435  	setup_common();
  4436  #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION
  4437  	initialize_vhci();
  4438  #endif
  4439  	sandbox_common();
  4440  	drop_caps();
  4441  
  4442  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  4443  	initialize_netdevices_init();
  4444  #endif
  4445  	// CLONE_NEWNET must always happen before tun setup, because we want the tun
  4446  	// device in the test namespace. If we don't do this, executor will crash with
  4447  	// SYZFATAL: executor NUM failed NUM times: executor NUM: EOF
  4448  	if (unshare(CLONE_NEWNET)) {
  4449  		debug("unshare(CLONE_NEWNET): %d\n", errno);
  4450  	}
  4451  	// Enable access to IPPROTO_ICMP sockets, must be done after CLONE_NEWNET.
  4452  	write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535");
  4453  #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI
  4454  	initialize_devlink_pci();
  4455  #endif
  4456  #if SYZ_EXECUTOR || SYZ_NET_INJECTION
  4457  	initialize_tun();
  4458  #endif
  4459  #if SYZ_EXECUTOR || SYZ_NET_DEVICES
  4460  	initialize_netdevices();
  4461  #endif
  4462  	uid_t uid = UNTRUSTED_APP_UID;
  4463  	size_t num_groups = UNTRUSTED_APP_NUM_GROUPS;
  4464  	const gid_t* groups = UNTRUSTED_APP_GROUPS;
  4465  	gid_t gid = UNTRUSTED_APP_GID;
  4466  	debug("executor received sandbox_arg=%llu\n", sandbox_arg);
  4467  	if (sandbox_arg == 1) {
  4468  		uid = SYSTEM_UID;
  4469  		num_groups = SYSTEM_NUM_GROUPS;
  4470  		groups = SYSTEM_GROUPS;
  4471  		gid = SYSTEM_GID;
  4472  
  4473  		debug("fuzzing under SYSTEM account\n");
  4474  	}
  4475  	if (chown(".", uid, uid) != 0)
  4476  		failmsg("do_sandbox_android: chmod failed", "sandbox_arg=%llu", sandbox_arg);
  4477  
  4478  	if (setgroups(num_groups, groups) != 0)
  4479  		failmsg("do_sandbox_android: setgroups failed", "sandbox_arg=%llu", sandbox_arg);
  4480  
  4481  	if (setresgid(gid, gid, gid) != 0)
  4482  		failmsg("do_sandbox_android: setresgid failed", "sandbox_arg=%llu", sandbox_arg);
  4483  
  4484  	setup_binderfs();
  4485  
  4486  #if GOARCH_arm || GOARCH_arm64 || GOARCH_386 || GOARCH_amd64
  4487  	// Will fail() if anything fails.
  4488  	// Must be called when the new process still has CAP_SYS_ADMIN, in this case,
  4489  	// before changing uid from 0, which clears capabilities.
  4490  	int account = SCFS_RestrictedApp;
  4491  	if (sandbox_arg == 1)
  4492  		account = SCFS_SystemAccount;
  4493  	set_app_seccomp_filter(account);
  4494  #endif
  4495  
  4496  	if (setresuid(uid, uid, uid) != 0)
  4497  		failmsg("do_sandbox_android: setresuid failed", "sandbox_arg=%llu", sandbox_arg);
  4498  
  4499  	// setresuid and setresgid clear the parent-death signal.
  4500  	prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
  4501  
  4502  	setfilecon(".", SELINUX_LABEL_APP_DATA_FILE);
  4503  	if (uid == UNTRUSTED_APP_UID)
  4504  		setcon(SELINUX_CONTEXT_UNTRUSTED_APP);
  4505  
  4506  	loop();
  4507  	doexit(1);
  4508  }
  4509  #endif
  4510  
  4511  #if SYZ_EXECUTOR || SYZ_REPEAT && SYZ_USE_TMP_DIR
  4512  #include <dirent.h>
  4513  #include <errno.h>
  4514  #include <string.h>
  4515  #include <sys/ioctl.h>
  4516  #include <sys/mount.h>
  4517  
  4518  #define FS_IOC_SETFLAGS _IOW('f', 2, long)
  4519  
  4520  // One does not simply remove a directory.
  4521  // There can be mounts, so we need to try to umount.
  4522  // Moreover, a mount can be mounted several times, so we need to try to umount in a loop.
  4523  // Moreover, after umount a dir can become non-empty again, so we need another loop.
  4524  // Moreover, a mount can be re-mounted as read-only and then we will fail to make a dir empty.
  4525  static void remove_dir(const char* dir)
  4526  {
  4527  	int iter = 0;
  4528  	DIR* dp = 0;
  4529  retry:
  4530  #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID
  4531  	// Starting from v6.9, it does no longer make sense to use MNT_DETACH, because
  4532  	// a loop device may only be reused in RW mode if no mounted filesystem keeps a
  4533  	// reference to it. So we have to umount them synchronously.
  4534  	// MNT_FORCE should hopefully prevent hangs for filesystems that may require a complex cleanup.
  4535  	const int umount_flags = MNT_FORCE | UMOUNT_NOFOLLOW;
  4536  #if SYZ_EXECUTOR
  4537  	if (!flag_sandbox_android)
  4538  #endif
  4539  		while (umount2(dir, umount_flags) == 0) {
  4540  			debug("umount(%s)\n", dir);
  4541  		}
  4542  #endif
  4543  	dp = opendir(dir);
  4544  	if (dp == NULL) {
  4545  		if (errno == EMFILE) {
  4546  			// This happens when the test process casts prlimit(NOFILE) on us.
  4547  			// Ideally we somehow prevent test processes from messing with parent processes.
  4548  			// But full sandboxing is expensive, so let's ignore this error for now.
  4549  			exitf("opendir(%s) failed due to NOFILE, exiting", dir);
  4550  		}
  4551  		exitf("opendir(%s) failed", dir);
  4552  	}
  4553  	struct dirent* ep = 0;
  4554  	while ((ep = readdir(dp))) {
  4555  		if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0)
  4556  			continue;
  4557  		char filename[FILENAME_MAX];
  4558  		snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name);
  4559  		// If it's 9p mount with broken transport, lstat will fail.
  4560  		// So try to umount first.
  4561  #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID
  4562  #if SYZ_EXECUTOR
  4563  		if (!flag_sandbox_android)
  4564  #endif
  4565  			while (umount2(filename, umount_flags) == 0) {
  4566  				debug("umount(%s)\n", filename);
  4567  			}
  4568  #endif
  4569  		struct stat st;
  4570  		if (lstat(filename, &st))
  4571  			exitf("lstat(%s) failed", filename);
  4572  		if (S_ISDIR(st.st_mode)) {
  4573  			remove_dir(filename);
  4574  			continue;
  4575  		}
  4576  		int i;
  4577  		for (i = 0;; i++) {
  4578  			if (unlink(filename) == 0)
  4579  				break;
  4580  			if (errno == EPERM) {
  4581  				// Try to reset FS_XFLAG_IMMUTABLE.
  4582  				int fd = open(filename, O_RDONLY);
  4583  				if (fd != -1) {
  4584  					long flags = 0;
  4585  					if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) {
  4586  						debug("reset FS_XFLAG_IMMUTABLE\n");
  4587  					}
  4588  					close(fd);
  4589  					continue;
  4590  				}
  4591  			}
  4592  			if (errno == EROFS) {
  4593  				debug("ignoring EROFS\n");
  4594  				break;
  4595  			}
  4596  			if (errno != EBUSY || i > 100)
  4597  				exitf("unlink(%s) failed", filename);
  4598  #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID
  4599  #if SYZ_EXECUTOR
  4600  			if (!flag_sandbox_android) {
  4601  #endif
  4602  				debug("umount(%s)\n", filename);
  4603  				if (umount2(filename, umount_flags))
  4604  					exitf("umount(%s) failed", filename);
  4605  #if SYZ_EXECUTOR
  4606  			}
  4607  #endif
  4608  #endif
  4609  		}
  4610  	}
  4611  	closedir(dp);
  4612  	for (int i = 0;; i++) {
  4613  		if (rmdir(dir) == 0)
  4614  			break;
  4615  		if (i < 100) {
  4616  			if (errno == EPERM) {
  4617  				// Try to reset FS_XFLAG_IMMUTABLE.
  4618  				int fd = open(dir, O_RDONLY);
  4619  				if (fd != -1) {
  4620  					long flags = 0;
  4621  					if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) {
  4622  						debug("reset FS_XFLAG_IMMUTABLE\n");
  4623  					}
  4624  					close(fd);
  4625  					continue;
  4626  				}
  4627  			}
  4628  			if (errno == EROFS) {
  4629  				debug("ignoring EROFS\n");
  4630  				break;
  4631  			}
  4632  			if (errno == EBUSY) {
  4633  #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID
  4634  #if SYZ_EXECUTOR
  4635  				if (!flag_sandbox_android) {
  4636  #endif
  4637  					debug("umount(%s)\n", dir);
  4638  					if (umount2(dir, umount_flags))
  4639  						exitf("umount(%s) failed", dir);
  4640  #if SYZ_EXECUTOR
  4641  				}
  4642  #endif
  4643  #endif
  4644  				continue;
  4645  			}
  4646  			if (errno == ENOTEMPTY) {
  4647  				if (iter < 100) {
  4648  					iter++;
  4649  					goto retry;
  4650  				}
  4651  			}
  4652  		}
  4653  		exitf("rmdir(%s) failed", dir);
  4654  	}
  4655  }
  4656  #endif
  4657  
  4658  #if SYZ_EXECUTOR || SYZ_FAULT
  4659  #include <fcntl.h>
  4660  #include <string.h>
  4661  #include <sys/stat.h>
  4662  #include <sys/types.h>
  4663  
  4664  static int inject_fault(int nth)
  4665  {
  4666  	int fd;
  4667  	fd = open("/proc/thread-self/fail-nth", O_RDWR);
  4668  	// We treat errors here as temporal/non-critical because we see
  4669  	// occasional ENOENT/EACCES errors returned. It seems that fuzzer
  4670  	// somehow gets its hands to it.
  4671  	if (fd == -1)
  4672  		exitf("failed to open /proc/thread-self/fail-nth");
  4673  	char buf[16];
  4674  	sprintf(buf, "%d", nth);
  4675  	if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf))
  4676  		exitf("failed to write /proc/thread-self/fail-nth");
  4677  	return fd;
  4678  }
  4679  #endif
  4680  
  4681  #if SYZ_EXECUTOR
  4682  static int fault_injected(int fail_fd)
  4683  {
  4684  	char buf[16];
  4685  	int n = read(fail_fd, buf, sizeof(buf) - 1);
  4686  	if (n <= 0)
  4687  		exitf("failed to read /proc/thread-self/fail-nth");
  4688  	int res = n == 2 && buf[0] == '0' && buf[1] == '\n';
  4689  	buf[0] = '0';
  4690  	if (write(fail_fd, buf, 1) != 1)
  4691  		exitf("failed to write /proc/thread-self/fail-nth");
  4692  	close(fail_fd);
  4693  	return res;
  4694  }
  4695  #endif
  4696  
  4697  #if (SYZ_EXECUTOR || SYZ_REPEAT) && SYZ_EXECUTOR_USES_FORK_SERVER
  4698  #include <dirent.h>
  4699  #include <errno.h>
  4700  #include <fcntl.h>
  4701  #include <signal.h>
  4702  #include <string.h>
  4703  #include <sys/stat.h>
  4704  #include <sys/types.h>
  4705  #include <sys/wait.h>
  4706  
  4707  static void kill_and_wait(int pid, int* status)
  4708  {
  4709  	kill(-pid, SIGKILL);
  4710  	kill(pid, SIGKILL);
  4711  	// First, give it up to 100 ms to surrender.
  4712  	for (int i = 0; i < 100; i++) {
  4713  		if (waitpid(-1, status, WNOHANG | __WALL) == pid)
  4714  			return;
  4715  		usleep(1000);
  4716  	}
  4717  	// Now, try to abort fuse connections as they cause deadlocks,
  4718  	// see Documentation/filesystems/fuse.txt for details.
  4719  	// There is no good way to figure out the right connections
  4720  	// provided that the process could use unshare(CLONE_NEWNS),
  4721  	// so we abort all.
  4722  	debug("kill is not working\n");
  4723  	DIR* dir = opendir("/sys/fs/fuse/connections");
  4724  	if (dir) {
  4725  		for (;;) {
  4726  			struct dirent* ent = readdir(dir);
  4727  			if (!ent)
  4728  				break;
  4729  			if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
  4730  				continue;
  4731  			char abort[300];
  4732  			snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name);
  4733  			int fd = open(abort, O_WRONLY);
  4734  			if (fd == -1) {
  4735  				debug("failed to open %s: %d\n", abort, errno);
  4736  				continue;
  4737  			}
  4738  			debug("aborting fuse conn %s\n", ent->d_name);
  4739  			if (write(fd, abort, 1) < 0) {
  4740  				debug("failed to abort: %d\n", errno);
  4741  			}
  4742  			close(fd);
  4743  		}
  4744  		closedir(dir);
  4745  	} else {
  4746  		debug("failed to open /sys/fs/fuse/connections: %d\n", errno);
  4747  	}
  4748  	// Now, just wait, no other options.
  4749  	while (waitpid(-1, status, __WALL) != pid) {
  4750  	}
  4751  }
  4752  #endif
  4753  
  4754  #if (SYZ_EXECUTOR || SYZ_REPEAT && (SYZ_CGROUPS || SYZ_NET_RESET)) && SYZ_EXECUTOR_USES_FORK_SERVER
  4755  #include <fcntl.h>
  4756  #include <sys/ioctl.h>
  4757  #include <sys/stat.h>
  4758  #include <sys/types.h>
  4759  #include <unistd.h>
  4760  
  4761  #define SYZ_HAVE_SETUP_LOOP 1
  4762  static void setup_loop()
  4763  {
  4764  #if SYZ_EXECUTOR || SYZ_CGROUPS
  4765  	setup_cgroups_loop();
  4766  #endif
  4767  #if SYZ_EXECUTOR || SYZ_NET_RESET
  4768  	checkpoint_net_namespace();
  4769  #endif
  4770  }
  4771  #endif
  4772  
  4773  #if (SYZ_EXECUTOR || SYZ_REPEAT && (SYZ_NET_RESET || __NR_syz_mount_image || __NR_syz_read_part_table)) && SYZ_EXECUTOR_USES_FORK_SERVER
  4774  #define SYZ_HAVE_RESET_LOOP 1
  4775  static void reset_loop()
  4776  {
  4777  #if SYZ_EXECUTOR || __NR_syz_mount_image || __NR_syz_read_part_table
  4778  	char buf[64];
  4779  	snprintf(buf, sizeof(buf), "/dev/loop%llu", procid);
  4780  	int loopfd = open(buf, O_RDWR);
  4781  	if (loopfd != -1) {
  4782  		ioctl(loopfd, LOOP_CLR_FD, 0);
  4783  		close(loopfd);
  4784  	}
  4785  #endif
  4786  #if SYZ_EXECUTOR || SYZ_NET_RESET
  4787  	reset_net_namespace();
  4788  #endif
  4789  }
  4790  #endif
  4791  
  4792  #if (SYZ_EXECUTOR || SYZ_REPEAT) && SYZ_EXECUTOR_USES_FORK_SERVER
  4793  #include <sys/prctl.h>
  4794  #include <unistd.h>
  4795  
  4796  #define SYZ_HAVE_SETUP_TEST 1
  4797  static void setup_test()
  4798  {
  4799  	prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
  4800  	setpgrp();
  4801  #if SYZ_EXECUTOR || SYZ_CGROUPS
  4802  	setup_cgroups_test();
  4803  #endif
  4804  	// It's the leaf test process we want to be always killed first.
  4805  	write_file("/proc/self/oom_score_adj", "1000");
  4806  #if SYZ_EXECUTOR || SYZ_NET_INJECTION
  4807  	// Read all remaining packets from tun to better
  4808  	// isolate consequently executing programs.
  4809  	flush_tun();
  4810  #endif
  4811  #if SYZ_EXECUTOR || SYZ_USE_TMP_DIR
  4812  	// Add a binderfs symlink to the tmp folder.
  4813  	if (symlink("/dev/binderfs", "./binderfs")) {
  4814  		debug("symlink(/dev/binderfs, ./binderfs) failed: %d", errno);
  4815  	}
  4816  #endif
  4817  }
  4818  #endif
  4819  
  4820  #if SYZ_EXECUTOR || SYZ_CLOSE_FDS
  4821  #define SYZ_HAVE_CLOSE_FDS 1
  4822  static void close_fds()
  4823  {
  4824  #if SYZ_EXECUTOR
  4825  	if (!flag_close_fds)
  4826  		return;
  4827  #endif
  4828  	// Keeping a 9p transport pipe open will hang the proccess dead,
  4829  	// so close all opened file descriptors.
  4830  	// Also close all USB emulation descriptors to trigger exit from USB
  4831  	// event loop to collect coverage.
  4832  	for (int fd = 3; fd < MAX_FDS; fd++)
  4833  		close(fd);
  4834  }
  4835  #endif
  4836  
  4837  #if SYZ_EXECUTOR || SYZ_FAULT
  4838  #include <errno.h>
  4839  
  4840  static void setup_fault()
  4841  {
  4842  	int fd = open("/proc/self/make-it-fail", O_WRONLY);
  4843  	if (fd == -1)
  4844  		fail("CONFIG_FAULT_INJECTION is not enabled");
  4845  	close(fd);
  4846  
  4847  	fd = open("/proc/thread-self/fail-nth", O_WRONLY);
  4848  	if (fd == -1)
  4849  		fail("kernel does not have systematic fault injection support");
  4850  	close(fd);
  4851  
  4852  	static struct {
  4853  		const char* file;
  4854  		const char* val;
  4855  		bool fatal;
  4856  	} files[] = {
  4857  	    {"/sys/kernel/debug/failslab/ignore-gfp-wait", "N", true},
  4858  	    // These are enabled by separate configs (e.g. CONFIG_FAIL_FUTEX)
  4859  	    // and we did not check all of them in host.checkFaultInjection, so we ignore errors.
  4860  	    {"/sys/kernel/debug/fail_futex/ignore-private", "N", false},
  4861  	    {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem", "N", false},
  4862  	    {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-wait", "N", false},
  4863  	    {"/sys/kernel/debug/fail_page_alloc/min-order", "0", false},
  4864  	};
  4865  	unsigned i;
  4866  	for (i = 0; i < sizeof(files) / sizeof(files[0]); i++) {
  4867  		if (!write_file(files[i].file, files[i].val)) {
  4868  			debug("failed to write %s: %d\n", files[i].file, errno);
  4869  			if (files[i].fatal)
  4870  				failmsg("failed to write fault injection file", "file=%s", files[i].file);
  4871  		}
  4872  	}
  4873  }
  4874  #endif
  4875  
  4876  #if SYZ_EXECUTOR || SYZ_LEAK
  4877  #include <fcntl.h>
  4878  #include <stdio.h>
  4879  #include <string.h>
  4880  #include <sys/stat.h>
  4881  #include <sys/types.h>
  4882  
  4883  #define KMEMLEAK_FILE "/sys/kernel/debug/kmemleak"
  4884  
  4885  static void setup_leak()
  4886  {
  4887  	if (!write_file(KMEMLEAK_FILE, "scan=off")) {
  4888  		if (errno == EBUSY)
  4889  			fail("KMEMLEAK disabled: increase CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE"
  4890  			     " or unset CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF");
  4891  		fail("failed to write(kmemleak, \"scan=off\")");
  4892  	}
  4893  	// Flush boot leaks.
  4894  	if (!write_file(KMEMLEAK_FILE, "scan"))
  4895  		fail("failed to write(kmemleak, \"scan\")");
  4896  	sleep(5); // account for MSECS_MIN_AGE
  4897  	if (!write_file(KMEMLEAK_FILE, "scan"))
  4898  		fail("failed to write(kmemleak, \"scan\")");
  4899  	if (!write_file(KMEMLEAK_FILE, "clear"))
  4900  		fail("failed to write(kmemleak, \"clear\")");
  4901  }
  4902  
  4903  #define SYZ_HAVE_LEAK_CHECK 1
  4904  #if SYZ_EXECUTOR
  4905  static void check_leaks(char** frames, int nframes)
  4906  #else
  4907  static void check_leaks(void)
  4908  #endif
  4909  {
  4910  	int fd = open(KMEMLEAK_FILE, O_RDWR);
  4911  	if (fd == -1)
  4912  		fail("failed to open(kmemleak)");
  4913  	// KMEMLEAK has false positives. To mitigate most of them, it checksums
  4914  	// potentially leaked objects, and reports them only on the next scan
  4915  	// iff the checksum does not change. Because of that we do the following
  4916  	// intricate dance:
  4917  	// Scan, sleep, scan again. At this point we can get some leaks.
  4918  	// If there are leaks, we sleep and scan again, this can remove
  4919  	// false leaks. Then, read kmemleak again. If we get leaks now, then
  4920  	// hopefully these are true positives during the previous testing cycle.
  4921  	uint64 start = current_time_ms();
  4922  	if (write(fd, "scan", 4) != 4)
  4923  		fail("failed to write(kmemleak, \"scan\")");
  4924  	sleep(1);
  4925  	// Account for MSECS_MIN_AGE
  4926  	// (1 second less because scanning will take at least a second).
  4927  	while (current_time_ms() - start < 4 * 1000)
  4928  		sleep(1);
  4929  	if (write(fd, "scan", 4) != 4)
  4930  		fail("failed to write(kmemleak, \"scan\")");
  4931  	static char buf[128 << 10];
  4932  	ssize_t n = read(fd, buf, sizeof(buf) - 1);
  4933  	if (n < 0)
  4934  		fail("failed to read(kmemleak)");
  4935  	int nleaks = 0;
  4936  	if (n != 0) {
  4937  		sleep(1);
  4938  		if (write(fd, "scan", 4) != 4)
  4939  			fail("failed to write(kmemleak, \"scan\")");
  4940  		if (lseek(fd, 0, SEEK_SET) < 0)
  4941  			fail("failed to lseek(kmemleak)");
  4942  		n = read(fd, buf, sizeof(buf) - 1);
  4943  		if (n < 0)
  4944  			fail("failed to read(kmemleak)");
  4945  		buf[n] = 0;
  4946  		char* pos = buf;
  4947  		char* end = buf + n;
  4948  		while (pos < end) {
  4949  			char* next = strstr(pos + 1, "unreferenced object");
  4950  			if (!next)
  4951  				next = end;
  4952  			char prev = *next;
  4953  			*next = 0;
  4954  #if SYZ_EXECUTOR
  4955  			int f;
  4956  			for (f = 0; f < nframes; f++) {
  4957  				if (strstr(pos, frames[f]))
  4958  					break;
  4959  			}
  4960  			if (f != nframes) {
  4961  				*next = prev;
  4962  				pos = next;
  4963  				continue;
  4964  			}
  4965  #endif
  4966  			// BUG in output should be recognized by manager.
  4967  			fprintf(stderr, "BUG: memory leak\n%s\n", pos);
  4968  			*next = prev;
  4969  			pos = next;
  4970  			nleaks++;
  4971  		}
  4972  	}
  4973  	if (write(fd, "clear", 5) != 5)
  4974  		fail("failed to write(kmemleak, \"clear\")");
  4975  	close(fd);
  4976  	if (nleaks)
  4977  		doexit(1);
  4978  }
  4979  #endif
  4980  
  4981  #if SYZ_EXECUTOR || SYZ_BINFMT_MISC
  4982  #include <fcntl.h>
  4983  #include <sys/mount.h>
  4984  #include <sys/stat.h>
  4985  #include <sys/types.h>
  4986  
  4987  static void setup_binfmt_misc()
  4988  {
  4989  	if (mount(0, "/proc/sys/fs/binfmt_misc", "binfmt_misc", 0, 0)) {
  4990  		debug("mount(binfmt_misc) failed: %d\n", errno);
  4991  		return;
  4992  	}
  4993  	if (!write_file("/proc/sys/fs/binfmt_misc/register", ":syz0:M:0:\x01::./file0:") ||
  4994  	    !write_file("/proc/sys/fs/binfmt_misc/register", ":syz1:M:1:\x02::./file0:POC"))
  4995  		fail("write(/proc/sys/fs/binfmt_misc/register) failed");
  4996  }
  4997  #endif
  4998  
  4999  #if SYZ_EXECUTOR || SYZ_KCSAN
  5000  #define KCSAN_DEBUGFS_FILE "/sys/kernel/debug/kcsan"
  5001  
  5002  static void setup_kcsan()
  5003  {
  5004  	if (!write_file(KCSAN_DEBUGFS_FILE, "on"))
  5005  		fail("write(/sys/kernel/debug/kcsan, on) failed");
  5006  }
  5007  
  5008  #if SYZ_EXECUTOR // currently only used by executor
  5009  static void setup_kcsan_filterlist(char** frames, int nframes, bool suppress)
  5010  {
  5011  	int fd = open(KCSAN_DEBUGFS_FILE, O_WRONLY);
  5012  	if (fd == -1)
  5013  		fail("failed to open kcsan debugfs file");
  5014  
  5015  	printf("%s KCSAN reports in functions: ",
  5016  	       suppress ? "suppressing" : "only showing");
  5017  	if (!suppress)
  5018  		dprintf(fd, "whitelist\n");
  5019  	for (int i = 0; i < nframes; ++i) {
  5020  		printf("'%s' ", frames[i]);
  5021  		dprintf(fd, "!%s\n", frames[i]);
  5022  	}
  5023  	printf("\n");
  5024  
  5025  	close(fd);
  5026  }
  5027  
  5028  #define SYZ_HAVE_KCSAN 1
  5029  #endif
  5030  #endif
  5031  
  5032  #if SYZ_EXECUTOR || SYZ_USB
  5033  static void setup_usb()
  5034  {
  5035  	if (chmod("/dev/raw-gadget", 0666))
  5036  		fail("failed to chmod /dev/raw-gadget");
  5037  }
  5038  #endif
  5039  
  5040  #if SYZ_EXECUTOR || SYZ_SYSCTL
  5041  #include <errno.h>
  5042  #include <stdio.h>
  5043  #include <string.h>
  5044  
  5045  static void setup_sysctl()
  5046  {
  5047  	char mypid[32];
  5048  	snprintf(mypid, sizeof(mypid), "%d", getpid());
  5049  
  5050  	// TODO: consider moving all sysctl's into CMDLINE config later.
  5051  	// Kernel has support for setting sysctl's via command line since 3db978d480e28 (v5.8).
  5052  	struct {
  5053  		const char* name;
  5054  		const char* data;
  5055  	} files[] = {
  5056  #if GOARCH_amd64 || GOARCH_386
  5057  		// nmi_check_duration() prints "INFO: NMI handler took too long" on slow debug kernels.
  5058  		// It happens a lot in qemu, and the messages are frequently corrupted
  5059  		// (intermixed with other kernel output as they are printed from NMI)
  5060  		// and are not matched against the suppression in pkg/report.
  5061  		// This write prevents these messages from being printed.
  5062  		{"/sys/kernel/debug/x86/nmi_longest_ns", "10000000000"},
  5063  #endif
  5064  		{"/proc/sys/kernel/hung_task_check_interval_secs", "20"},
  5065  		// bpf_jit_kallsyms and disabling bpf_jit_harden are required
  5066  		// for unwinding through bpf functions.
  5067  		{"/proc/sys/net/core/bpf_jit_kallsyms", "1"},
  5068  		{"/proc/sys/net/core/bpf_jit_harden", "0"},
  5069  		// This is to provide more useful info in crash reports.
  5070  		{"/proc/sys/kernel/kptr_restrict", "0"},
  5071  		{"/proc/sys/kernel/softlockup_all_cpu_backtrace", "1"},
  5072  		// This is to restrict effects of recursive exponential mounts, for details see
  5073  		// "mnt: Add a per mount namespace limit on the number of mounts" commit.
  5074  		{"/proc/sys/fs/mount-max", "100"},
  5075  		// Dumping all tasks to console can take too long.
  5076  		{"/proc/sys/vm/oom_dump_tasks", "0"},
  5077  		// Executor hits lots of SIGSEGVs, no point in logging them.
  5078  		{"/proc/sys/debug/exception-trace", "0"},
  5079  		{"/proc/sys/kernel/printk", "7 4 1 3"},
  5080  		// Faster gc (1 second) is intended to make tests more repeatable.
  5081  		{"/proc/sys/kernel/keys/gc_delay", "1"},
  5082  		// We always want to prefer killing the allocating test process rather than somebody else
  5083  		// (sshd or another random test process).
  5084  		{"/proc/sys/vm/oom_kill_allocating_task", "1"},
  5085  		// This blocks some of the ways the fuzzer can trigger a reboot.
  5086  		// ctrl-alt-del=0 tells kernel to signal cad_pid instead of rebooting
  5087  		// and setting cad_pid to the current pid (transient "syz-executor setup") makes it a no-op.
  5088  		// For context see: https://groups.google.com/g/syzkaller-bugs/c/WqOY4TiRnFg/m/6P9u8lWZAQAJ
  5089  		{"/proc/sys/kernel/ctrl-alt-del", "0"},
  5090  		{"/proc/sys/kernel/cad_pid", mypid},
  5091  	};
  5092  	for (size_t i = 0; i < sizeof(files) / sizeof(files[0]); i++) {
  5093  		if (!write_file(files[i].name, files[i].data))
  5094  			printf("write to %s failed: %s\n", files[i].name, strerror(errno));
  5095  	}
  5096  }
  5097  #endif
  5098  
  5099  #if SYZ_EXECUTOR || SYZ_802154
  5100  #include <net/if.h>
  5101  #include <string.h>
  5102  #include <sys/socket.h>
  5103  #include <sys/types.h>
  5104  
  5105  #define NL802154_CMD_SET_SHORT_ADDR 11
  5106  #define NL802154_ATTR_IFINDEX 3
  5107  #define NL802154_ATTR_SHORT_ADDR 10
  5108  
  5109  static void setup_802154()
  5110  {
  5111  	int sock_route = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
  5112  	if (sock_route == -1)
  5113  		fail("socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE) failed");
  5114  	int sock_generic = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
  5115  	if (sock_generic < 0)
  5116  		fail("socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC) failed");
  5117  	int nl802154_family_id = netlink_query_family_id(&nlmsg, sock_generic, "nl802154", true);
  5118  	for (int i = 0; i < 2; i++) {
  5119  		// wpan0/1 are created by CONFIG_IEEE802154_HWSIM.
  5120  		// sys/linux/socket_ieee802154.txt knowns about these names and consts.
  5121  		char devname[] = "wpan0";
  5122  		devname[strlen(devname) - 1] += i;
  5123  		uint64 hwaddr = 0xaaaaaaaaaaaa0002 + (i << 8);
  5124  		uint16 shortaddr = 0xaaa0 + i;
  5125  		int ifindex = if_nametoindex(devname);
  5126  		struct genlmsghdr genlhdr;
  5127  		memset(&genlhdr, 0, sizeof(genlhdr));
  5128  		genlhdr.cmd = NL802154_CMD_SET_SHORT_ADDR;
  5129  		netlink_init(&nlmsg, nl802154_family_id, 0, &genlhdr, sizeof(genlhdr));
  5130  		netlink_attr(&nlmsg, NL802154_ATTR_IFINDEX, &ifindex, sizeof(ifindex));
  5131  		netlink_attr(&nlmsg, NL802154_ATTR_SHORT_ADDR, &shortaddr, sizeof(shortaddr));
  5132  		int err = netlink_send(&nlmsg, sock_generic);
  5133  		if (err < 0)
  5134  			fail("NL802154_CMD_SET_SHORT_ADDR failed");
  5135  		netlink_device_change(&nlmsg, sock_route, devname, true, 0, &hwaddr, sizeof(hwaddr), 0);
  5136  		if (i == 0) {
  5137  			netlink_add_device_impl(&nlmsg, "lowpan", "lowpan0", false);
  5138  			netlink_done(&nlmsg);
  5139  			netlink_attr(&nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex));
  5140  			int err = netlink_send(&nlmsg, sock_route);
  5141  			if (err < 0)
  5142  				fail("netlink: adding device lowpan0 type lowpan link wpan0");
  5143  		}
  5144  	}
  5145  	close(sock_route);
  5146  	close(sock_generic);
  5147  }
  5148  #endif
  5149  
  5150  #if GOARCH_s390x
  5151  #include <sys/mman.h>
  5152  // Ugly way to work around gcc's "error: function called through a non-compatible type".
  5153  // Simply casting via (void*) inline does not work b/c gcc sees through a chain of casts.
  5154  // The macro is used in generated C code.
  5155  #define CAST(f) ({void* p = (void*)f; p; })
  5156  #endif
  5157  
  5158  #if SYZ_EXECUTOR || __NR_syz_fuse_handle_req
  5159  #include <fcntl.h>
  5160  #include <stddef.h>
  5161  #include <stdio.h>
  5162  #include <sys/stat.h>
  5163  #include <sys/types.h>
  5164  
  5165  // From linux/fuse.h
  5166  #define FUSE_MIN_READ_BUFFER 8192
  5167  
  5168  // From linux/fuse.h
  5169  enum fuse_opcode {
  5170  	FUSE_LOOKUP = 1,
  5171  	FUSE_FORGET = 2, // no reply
  5172  	FUSE_GETATTR = 3,
  5173  	FUSE_SETATTR = 4,
  5174  	FUSE_READLINK = 5,
  5175  	FUSE_SYMLINK = 6,
  5176  	FUSE_MKNOD = 8,
  5177  	FUSE_MKDIR = 9,
  5178  	FUSE_UNLINK = 10,
  5179  	FUSE_RMDIR = 11,
  5180  	FUSE_RENAME = 12,
  5181  	FUSE_LINK = 13,
  5182  	FUSE_OPEN = 14,
  5183  	FUSE_READ = 15,
  5184  	FUSE_WRITE = 16,
  5185  	FUSE_STATFS = 17,
  5186  	FUSE_RELEASE = 18,
  5187  	FUSE_FSYNC = 20,
  5188  	FUSE_SETXATTR = 21,
  5189  	FUSE_GETXATTR = 22,
  5190  	FUSE_LISTXATTR = 23,
  5191  	FUSE_REMOVEXATTR = 24,
  5192  	FUSE_FLUSH = 25,
  5193  	FUSE_INIT = 26,
  5194  	FUSE_OPENDIR = 27,
  5195  	FUSE_READDIR = 28,
  5196  	FUSE_RELEASEDIR = 29,
  5197  	FUSE_FSYNCDIR = 30,
  5198  	FUSE_GETLK = 31,
  5199  	FUSE_SETLK = 32,
  5200  	FUSE_SETLKW = 33,
  5201  	FUSE_ACCESS = 34,
  5202  	FUSE_CREATE = 35,
  5203  	FUSE_INTERRUPT = 36,
  5204  	FUSE_BMAP = 37,
  5205  	FUSE_DESTROY = 38,
  5206  	FUSE_IOCTL = 39,
  5207  	FUSE_POLL = 40,
  5208  	FUSE_NOTIFY_REPLY = 41,
  5209  	FUSE_BATCH_FORGET = 42,
  5210  	FUSE_FALLOCATE = 43,
  5211  	FUSE_READDIRPLUS = 44,
  5212  	FUSE_RENAME2 = 45,
  5213  	FUSE_LSEEK = 46,
  5214  	FUSE_COPY_FILE_RANGE = 47,
  5215  	FUSE_SETUPMAPPING = 48,
  5216  	FUSE_REMOVEMAPPING = 49,
  5217  
  5218  	// CUSE specific operations
  5219  	CUSE_INIT = 4096,
  5220  
  5221  	// Reserved opcodes: helpful to detect structure endian-ness
  5222  	CUSE_INIT_BSWAP_RESERVED = 1048576, // CUSE_INIT << 8
  5223  	FUSE_INIT_BSWAP_RESERVED = 436207616, // FUSE_INIT << 24
  5224  };
  5225  
  5226  // From linux/fuse.h
  5227  struct fuse_in_header {
  5228  	uint32 len;
  5229  	uint32 opcode;
  5230  	uint64 unique;
  5231  	uint64 nodeid;
  5232  	uint32 uid;
  5233  	uint32 gid;
  5234  	uint32 pid;
  5235  	uint32 padding;
  5236  };
  5237  
  5238  // From linux/fuse.h
  5239  struct fuse_out_header {
  5240  	uint32 len;
  5241  	// This is actually a int32_t but *_t variants fail to compile inside
  5242  	// the executor (it appends an additional _t for some reason) and int32
  5243  	// does not exist. Since we don't touch this field, defining it as
  5244  	// unsigned should not cause any problems.
  5245  	uint32 error;
  5246  	uint64 unique;
  5247  };
  5248  
  5249  // Struct shared between syz_fuse_handle_req() and the fuzzer. Used to provide
  5250  // a fuzzed response for each request type.
  5251  struct syz_fuse_req_out {
  5252  	struct fuse_out_header* init;
  5253  	struct fuse_out_header* lseek;
  5254  	struct fuse_out_header* bmap;
  5255  	struct fuse_out_header* poll;
  5256  	struct fuse_out_header* getxattr;
  5257  	struct fuse_out_header* lk;
  5258  	struct fuse_out_header* statfs;
  5259  	struct fuse_out_header* write;
  5260  	struct fuse_out_header* read;
  5261  	struct fuse_out_header* open;
  5262  	struct fuse_out_header* attr;
  5263  	struct fuse_out_header* entry;
  5264  	struct fuse_out_header* dirent;
  5265  	struct fuse_out_header* direntplus;
  5266  	struct fuse_out_header* create_open;
  5267  	struct fuse_out_header* ioctl;
  5268  };
  5269  
  5270  // Link the reponse to the request and send it to /dev/fuse.
  5271  static int fuse_send_response(int fd,
  5272  			      const struct fuse_in_header* in_hdr,
  5273  			      struct fuse_out_header* out_hdr)
  5274  {
  5275  	if (!out_hdr) {
  5276  		debug("fuse_send_response: received a NULL out_hdr\n");
  5277  		return -1;
  5278  	}
  5279  
  5280  	out_hdr->unique = in_hdr->unique;
  5281  	if (write(fd, out_hdr, out_hdr->len) == -1) {
  5282  		debug("fuse_send_response > write failed: %d\n", errno);
  5283  		return -1;
  5284  	}
  5285  
  5286  	return 0;
  5287  }
  5288  
  5289  // This function reads a request from /dev/fuse and tries to pick the correct
  5290  // response from the input struct syz_fuse_req_out (a3). Responses are still
  5291  // generated by the fuzzer.
  5292  static volatile long syz_fuse_handle_req(volatile long a0, // /dev/fuse fd.
  5293  					 volatile long a1, // Read buffer.
  5294  					 volatile long a2, // Buffer len.
  5295  					 volatile long a3) // syz_fuse_req_out.
  5296  {
  5297  	struct syz_fuse_req_out* req_out = (struct syz_fuse_req_out*)a3;
  5298  	struct fuse_out_header* out_hdr = NULL;
  5299  	char* buf = (char*)a1;
  5300  	int buf_len = (int)a2;
  5301  	int fd = (int)a0;
  5302  
  5303  	if (!req_out) {
  5304  		debug("syz_fuse_handle_req: received a NULL syz_fuse_req_out\n");
  5305  		return -1;
  5306  	}
  5307  	if (buf_len < FUSE_MIN_READ_BUFFER) {
  5308  		debug("FUSE requires the read buffer to be at least %u\n", FUSE_MIN_READ_BUFFER);
  5309  		return -1;
  5310  	}
  5311  
  5312  	int ret = read(fd, buf, buf_len);
  5313  	if (ret == -1) {
  5314  		debug("syz_fuse_handle_req > read failed: %d\n", errno);
  5315  		return -1;
  5316  	}
  5317  	// Safe to do because ret > 0 (!= -1) and < FUSE_MIN_READ_BUFFER (= 8192).
  5318  	if ((size_t)ret < sizeof(struct fuse_in_header)) {
  5319  		debug("syz_fuse_handle_req: received a truncated FUSE header\n");
  5320  		return -1;
  5321  	}
  5322  
  5323  	const struct fuse_in_header* in_hdr = (const struct fuse_in_header*)buf;
  5324  	debug("syz_fuse_handle_req: received opcode %d\n", in_hdr->opcode);
  5325  	if (in_hdr->len > (uint32)ret) {
  5326  		debug("syz_fuse_handle_req: received a truncated message\n");
  5327  		return -1;
  5328  	}
  5329  
  5330  	switch (in_hdr->opcode) {
  5331  	case FUSE_GETATTR:
  5332  	case FUSE_SETATTR:
  5333  		out_hdr = req_out->attr;
  5334  		break;
  5335  	case FUSE_LOOKUP:
  5336  	case FUSE_SYMLINK:
  5337  	case FUSE_LINK:
  5338  	case FUSE_MKNOD:
  5339  	case FUSE_MKDIR:
  5340  		out_hdr = req_out->entry;
  5341  		break;
  5342  	case FUSE_OPEN:
  5343  	case FUSE_OPENDIR:
  5344  		out_hdr = req_out->open;
  5345  		break;
  5346  	case FUSE_STATFS:
  5347  		out_hdr = req_out->statfs;
  5348  		break;
  5349  	case FUSE_RMDIR:
  5350  	case FUSE_RENAME:
  5351  	case FUSE_RENAME2:
  5352  	case FUSE_FALLOCATE:
  5353  	case FUSE_SETXATTR:
  5354  	case FUSE_REMOVEXATTR:
  5355  	case FUSE_FSYNCDIR:
  5356  	case FUSE_FSYNC:
  5357  	case FUSE_SETLKW:
  5358  	case FUSE_SETLK:
  5359  	case FUSE_ACCESS:
  5360  	case FUSE_FLUSH:
  5361  	case FUSE_RELEASE:
  5362  	case FUSE_RELEASEDIR:
  5363  	case FUSE_UNLINK:
  5364  	case FUSE_DESTROY:
  5365  		// These opcodes do not have any reply data. Hence, we pick
  5366  		// another response and only use the shared header.
  5367  		out_hdr = req_out->init;
  5368  		if (!out_hdr) {
  5369  			debug("syz_fuse_handle_req: received a NULL out_hdr\n");
  5370  			return -1;
  5371  		}
  5372  		out_hdr->len = sizeof(struct fuse_out_header);
  5373  		break;
  5374  	case FUSE_READ:
  5375  		out_hdr = req_out->read;
  5376  		break;
  5377  	case FUSE_READDIR:
  5378  		out_hdr = req_out->dirent;
  5379  		break;
  5380  	case FUSE_READDIRPLUS:
  5381  		out_hdr = req_out->direntplus;
  5382  		break;
  5383  	case FUSE_INIT:
  5384  		out_hdr = req_out->init;
  5385  		break;
  5386  	case FUSE_LSEEK:
  5387  		out_hdr = req_out->lseek;
  5388  		break;
  5389  	case FUSE_GETLK:
  5390  		out_hdr = req_out->lk;
  5391  		break;
  5392  	case FUSE_BMAP:
  5393  		out_hdr = req_out->bmap;
  5394  		break;
  5395  	case FUSE_POLL:
  5396  		out_hdr = req_out->poll;
  5397  		break;
  5398  	case FUSE_GETXATTR:
  5399  	case FUSE_LISTXATTR:
  5400  		out_hdr = req_out->getxattr;
  5401  		break;
  5402  	case FUSE_WRITE:
  5403  	case FUSE_COPY_FILE_RANGE:
  5404  		out_hdr = req_out->write;
  5405  		break;
  5406  	case FUSE_FORGET:
  5407  	case FUSE_BATCH_FORGET:
  5408  		// FUSE_FORGET and FUSE_BATCH_FORGET expect no reply.
  5409  		return 0;
  5410  	case FUSE_CREATE:
  5411  		out_hdr = req_out->create_open;
  5412  		break;
  5413  	case FUSE_IOCTL:
  5414  		out_hdr = req_out->ioctl;
  5415  		break;
  5416  	default:
  5417  		debug("syz_fuse_handle_req: unknown FUSE opcode\n");
  5418  		return -1;
  5419  	}
  5420  
  5421  	return fuse_send_response(fd, in_hdr, out_hdr);
  5422  }
  5423  #endif
  5424  
  5425  #if SYZ_EXECUTOR || __NR_syz_80211_inject_frame
  5426  #include <errno.h>
  5427  #include <linux/genetlink.h>
  5428  #include <linux/if_ether.h>
  5429  #include <linux/nl80211.h>
  5430  #include <net/if.h>
  5431  #include <sys/ioctl.h>
  5432  
  5433  // This pseudo syscall performs 802.11 frame injection.
  5434  //
  5435  // Its current implementation performs the injection by means of mac80211_hwsim.
  5436  // The procedure consists of the following steps:
  5437  // 1. Open a netlink socket
  5438  // 2. Register as an application responsible for wireless medium simulation by executing
  5439  //    HWSIM_CMD_REGISTER. This is a preq-requisite for the following step. After HWSIM_CMD_REGISTER
  5440  //    is executed, mac80211_hwsim stops simulating a perfect medium.
  5441  //    It is also important to note that this command registers a specific socket, not a netlink port.
  5442  // 3. Inject a frame to the required interface by executing HWSIM_CMD_FRAME.
  5443  // 4. Close the socket. mac80211_hwsim will detect this and return to perfect medium simulation.
  5444  //
  5445  // Note that we cannot (should not) open a socket, register it once and then use it for frame injection
  5446  // throughout the lifetime of a proc. When some socket is registered, mac80211_hwsim does not broadcast
  5447  // frames to all interfaces itself. As we do not perform this activity either, a permanently registered
  5448  // socket will disrupt normal network operation.
  5449  
  5450  #define HWSIM_ATTR_RX_RATE 5
  5451  #define HWSIM_ATTR_SIGNAL 6
  5452  #define HWSIM_ATTR_ADDR_RECEIVER 1
  5453  #define HWSIM_ATTR_FRAME 3
  5454  
  5455  #define WIFI_MAX_INJECT_LEN 2048
  5456  
  5457  static int hwsim_register_socket(struct nlmsg* nlmsg, int sock, int hwsim_family)
  5458  {
  5459  	struct genlmsghdr genlhdr;
  5460  	memset(&genlhdr, 0, sizeof(genlhdr));
  5461  	genlhdr.cmd = HWSIM_CMD_REGISTER;
  5462  	netlink_init(nlmsg, hwsim_family, 0, &genlhdr, sizeof(genlhdr));
  5463  	int err = netlink_send_ext(nlmsg, sock, 0, NULL, false);
  5464  	if (err < 0) {
  5465  		debug("hwsim_register_device failed: %s\n", strerror(errno));
  5466  	}
  5467  	return err;
  5468  }
  5469  
  5470  static int hwsim_inject_frame(struct nlmsg* nlmsg, int sock, int hwsim_family, uint8* mac_addr, uint8* data, int len)
  5471  {
  5472  	struct genlmsghdr genlhdr;
  5473  	uint32 rx_rate = WIFI_DEFAULT_RX_RATE;
  5474  	uint32 signal = WIFI_DEFAULT_SIGNAL;
  5475  
  5476  	memset(&genlhdr, 0, sizeof(genlhdr));
  5477  	genlhdr.cmd = HWSIM_CMD_FRAME;
  5478  	netlink_init(nlmsg, hwsim_family, 0, &genlhdr, sizeof(genlhdr));
  5479  	netlink_attr(nlmsg, HWSIM_ATTR_RX_RATE, &rx_rate, sizeof(rx_rate));
  5480  	netlink_attr(nlmsg, HWSIM_ATTR_SIGNAL, &signal, sizeof(signal));
  5481  	netlink_attr(nlmsg, HWSIM_ATTR_ADDR_RECEIVER, mac_addr, ETH_ALEN);
  5482  	netlink_attr(nlmsg, HWSIM_ATTR_FRAME, data, len);
  5483  	int err = netlink_send_ext(nlmsg, sock, 0, NULL, false);
  5484  	if (err < 0) {
  5485  		debug("hwsim_inject_frame failed: %s\n", strerror(errno));
  5486  	}
  5487  	return err;
  5488  }
  5489  
  5490  static long syz_80211_inject_frame(volatile long a0, volatile long a1, volatile long a2)
  5491  {
  5492  	uint8* mac_addr = (uint8*)a0;
  5493  	uint8* buf = (uint8*)a1;
  5494  	int buf_len = (int)a2;
  5495  	struct nlmsg tmp_msg;
  5496  
  5497  	if (buf_len < 0 || buf_len > WIFI_MAX_INJECT_LEN) {
  5498  		debug("syz_80211_inject_frame: wrong buffer size %d\n", buf_len);
  5499  		return -1;
  5500  	}
  5501  
  5502  	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
  5503  	if (sock < 0) {
  5504  		debug("syz_80211_inject_frame: socket creation failed, errno %d\n", errno);
  5505  		return -1;
  5506  	}
  5507  
  5508  	int hwsim_family_id = netlink_query_family_id(&tmp_msg, sock, "MAC80211_HWSIM", false);
  5509  	int ret = hwsim_register_socket(&tmp_msg, sock, hwsim_family_id);
  5510  	if (ret < 0) {
  5511  		debug("syz_80211_inject_frame: failed to register socket, ret %d\n", ret);
  5512  		close(sock);
  5513  		return -1;
  5514  	}
  5515  
  5516  	ret = hwsim_inject_frame(&tmp_msg, sock, hwsim_family_id, mac_addr, buf, buf_len);
  5517  	close(sock);
  5518  	if (ret < 0) {
  5519  		debug("syz_80211_inject_frame: failed to inject message, ret %d\n", ret);
  5520  		return -1;
  5521  	}
  5522  
  5523  	return 0;
  5524  }
  5525  
  5526  #endif
  5527  
  5528  #if SYZ_EXECUTOR || __NR_syz_80211_join_ibss
  5529  
  5530  #define WIFI_MAX_SSID_LEN 32
  5531  
  5532  #define WIFI_JOIN_IBSS_NO_SCAN 0
  5533  #define WIFI_JOIN_IBSS_BG_SCAN 1
  5534  #define WIFI_JOIN_IBSS_BG_NO_SCAN 2
  5535  
  5536  static long syz_80211_join_ibss(volatile long a0, volatile long a1, volatile long a2, volatile long a3)
  5537  {
  5538  	char* interface = (char*)a0;
  5539  	uint8* ssid = (uint8*)a1;
  5540  	int ssid_len = (int)a2;
  5541  	int mode = (int)a3; // This parameter essentially determines whether it will perform a scan
  5542  
  5543  	struct nlmsg tmp_msg;
  5544  	uint8 bssid[ETH_ALEN] = WIFI_IBSS_BSSID;
  5545  
  5546  	if (ssid_len < 0 || ssid_len > WIFI_MAX_SSID_LEN) {
  5547  		debug("syz_80211_join_ibss: invalid ssid len %d\n", ssid_len);
  5548  		return -1;
  5549  	}
  5550  
  5551  	if (mode < 0 || mode > WIFI_JOIN_IBSS_BG_NO_SCAN) {
  5552  		debug("syz_80211_join_ibss: invalid mode %d\n", mode);
  5553  		return -1;
  5554  	}
  5555  
  5556  	int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
  5557  	if (sock < 0) {
  5558  		debug("syz_80211_join_ibss: socket creation failed, errno %d\n", errno);
  5559  		return -1;
  5560  	}
  5561  
  5562  	int nl80211_family_id = netlink_query_family_id(&tmp_msg, sock, "nl80211", false);
  5563  	struct join_ibss_props ibss_props = {
  5564  	    .wiphy_freq = WIFI_DEFAULT_FREQUENCY,
  5565  	    .wiphy_freq_fixed = (mode == WIFI_JOIN_IBSS_NO_SCAN || mode == WIFI_JOIN_IBSS_BG_NO_SCAN),
  5566  	    .mac = bssid,
  5567  	    .ssid = ssid,
  5568  	    .ssid_len = ssid_len};
  5569  
  5570  	int ret = nl80211_setup_ibss_interface(&tmp_msg, sock, nl80211_family_id, interface, &ibss_props, false);
  5571  	close(sock);
  5572  	if (ret < 0) {
  5573  		debug("syz_80211_join_ibss: failed set up IBSS network for %.32s\n", interface);
  5574  		return -1;
  5575  	}
  5576  
  5577  	if (mode == WIFI_JOIN_IBSS_NO_SCAN) {
  5578  		ret = await_ifla_operstate(&tmp_msg, interface, IF_OPER_UP, false);
  5579  		if (ret < 0) {
  5580  			debug("syz_80211_join_ibss: await_ifla_operstate failed for %.32s, ret %d\n", interface, ret);
  5581  			return -1;
  5582  		}
  5583  	}
  5584  
  5585  	return 0;
  5586  }
  5587  
  5588  #endif
  5589  
  5590  #if SYZ_EXECUTOR || __NR_syz_clone || __NR_syz_clone3
  5591  #if SYZ_EXECUTOR
  5592  // The slowdown multiplier is already taken into account.
  5593  #define USLEEP_FORKED_CHILD (3 * syscall_timeout_ms * 1000)
  5594  #else
  5595  #define USLEEP_FORKED_CHILD (3 * /*{{{BASE_CALL_TIMEOUT_MS}}}*/ *1000)
  5596  #endif
  5597  
  5598  static long handle_clone_ret(long ret)
  5599  {
  5600  	if (ret != 0) {
  5601  #if SYZ_EXECUTOR || SYZ_HANDLE_SEGV
  5602  		__atomic_store_n(&clone_ongoing, 0, __ATOMIC_RELAXED);
  5603  #endif
  5604  		return ret;
  5605  	}
  5606  	// Exit if we're in the child process - not all kernels provide the proper means
  5607  	// to prevent fork-bombs.
  5608  	// But first sleep for some time. This will hopefully foster IPC fuzzing.
  5609  	usleep(USLEEP_FORKED_CHILD);
  5610  	// Note that exit_group is a bad choice here because if we created just a thread, then
  5611  	// the whole process will be killed. A plain exit will work fine in any case.
  5612  	syscall(__NR_exit, 0);
  5613  	while (1) {
  5614  	}
  5615  }
  5616  #endif
  5617  
  5618  #if SYZ_EXECUTOR || __NR_syz_clone
  5619  #include <sched.h>
  5620  
  5621  // syz_clone is mostly needed on kernels which do not suport clone3.
  5622  static long syz_clone(volatile long flags, volatile long stack, volatile long stack_len,
  5623  		      volatile long ptid, volatile long ctid, volatile long tls)
  5624  {
  5625  	// ABI requires 16-byte stack alignment.
  5626  	long sp = (stack + stack_len) & ~15;
  5627  #if SYZ_EXECUTOR || SYZ_HANDLE_SEGV
  5628  	__atomic_store_n(&clone_ongoing, 1, __ATOMIC_RELAXED);
  5629  #endif
  5630  	// Clear the CLONE_VM flag. Otherwise it'll very likely corrupt syz-executor.
  5631  	long ret = (long)syscall(__NR_clone, flags & ~CLONE_VM, sp, ptid, ctid, tls);
  5632  	return handle_clone_ret(ret);
  5633  }
  5634  #endif
  5635  
  5636  #if SYZ_EXECUTOR || __NR_syz_clone3
  5637  #include <linux/sched.h>
  5638  #include <sched.h>
  5639  
  5640  #define MAX_CLONE_ARGS_BYTES 256
  5641  static long syz_clone3(volatile long a0, volatile long a1)
  5642  {
  5643  	unsigned long copy_size = a1;
  5644  	if (copy_size < sizeof(uint64) || copy_size > MAX_CLONE_ARGS_BYTES)
  5645  		return -1;
  5646  	// The structure may have different sizes on different kernel versions, so copy it as raw bytes.
  5647  	char clone_args[MAX_CLONE_ARGS_BYTES];
  5648  	memcpy(&clone_args, (void*)a0, copy_size);
  5649  
  5650  	// As in syz_clone, clear the CLONE_VM flag. Flags are in the first 8-byte integer field.
  5651  	uint64* flags = (uint64*)&clone_args;
  5652  	*flags &= ~CLONE_VM;
  5653  #if SYZ_EXECUTOR || SYZ_HANDLE_SEGV
  5654  	__atomic_store_n(&clone_ongoing, 1, __ATOMIC_RELAXED);
  5655  #endif
  5656  	return handle_clone_ret((long)syscall(__NR_clone3, &clone_args, copy_size));
  5657  }
  5658  
  5659  #endif
  5660  
  5661  #if SYZ_EXECUTOR || __NR_syz_pkey_set
  5662  // syz_pkey_set(key pkey, val flags[pkey_flags])
  5663  static long syz_pkey_set(volatile long pkey, volatile long val)
  5664  {
  5665  #if GOARCH_amd64 || GOARCH_386
  5666  	uint32 eax = 0;
  5667  	uint32 ecx = 0;
  5668  	asm volatile("rdpkru"
  5669  		     : "=a"(eax)
  5670  		     : "c"(ecx)
  5671  		     : "edx");
  5672  	// PKRU register contains 2 bits per key.
  5673  	// Max number of keys is 16.
  5674  	// Clear old bits for the key:
  5675  	eax &= ~(3 << ((pkey % 16) * 2));
  5676  	// Set new bits for the key:
  5677  	eax |= (val & 3) << ((pkey % 16) * 2);
  5678  	uint32 edx = 0;
  5679  	asm volatile("wrpkru" ::"a"(eax), "c"(ecx), "d"(edx));
  5680  #endif
  5681  	return 0;
  5682  }
  5683  #endif
  5684  
  5685  #if SYZ_EXECUTOR || SYZ_SWAP
  5686  #include <fcntl.h>
  5687  #include <linux/falloc.h>
  5688  #include <stdio.h>
  5689  #include <string.h>
  5690  #include <sys/stat.h>
  5691  #include <sys/swap.h>
  5692  #include <sys/types.h>
  5693  
  5694  #define SWAP_FILE "./swap-file"
  5695  #define SWAP_FILE_SIZE (128 * 1000 * 1000) // 128 MB.
  5696  
  5697  static void setup_swap()
  5698  {
  5699  	// The call must be idempotent, so first disable swap and remove the swap file.
  5700  	swapoff(SWAP_FILE);
  5701  	unlink(SWAP_FILE);
  5702  	// Zero-fill the file.
  5703  	int fd = open(SWAP_FILE, O_CREAT | O_WRONLY | O_CLOEXEC, 0600);
  5704  	if (fd == -1)
  5705  		failmsg("swap file open failed", "file: %s", SWAP_FILE);
  5706  	// We cannot do ftruncate -- swapon complains about this. Do fallocate instead.
  5707  	fallocate(fd, FALLOC_FL_ZERO_RANGE, 0, SWAP_FILE_SIZE);
  5708  	close(fd);
  5709  	// Set up the swap file.
  5710  	char cmdline[64];
  5711  	sprintf(cmdline, "mkswap %s", SWAP_FILE);
  5712  	if (runcmdline(cmdline))
  5713  		fail("mkswap failed");
  5714  	if (swapon(SWAP_FILE, SWAP_FLAG_PREFER) == 1)
  5715  		failmsg("swapon failed", "file: %s", SWAP_FILE);
  5716  }
  5717  
  5718  #endif
  5719  
  5720  #if SYZ_EXECUTOR || __NR_syz_pidfd_open
  5721  #include <sys/syscall.h>
  5722  
  5723  // TODO: long-term we should improve our sandboxing rules since there are also
  5724  // many other opportunities for a fuzzer process to access what it shouldn't.
  5725  // Here we only shut down one of the recently discovered ways.
  5726  static long syz_pidfd_open(volatile long pid, volatile long flags)
  5727  {
  5728  	if (pid == 1) {
  5729  		// Under a PID namespace, pid=1 is the parent process.
  5730  		// We don't want a forked child to mangle parent syz-executor's fds.
  5731  		pid = 0;
  5732  	}
  5733  	return syscall(__NR_pidfd_open, pid, flags);
  5734  }
  5735  
  5736  #endif