github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/executor/common_linux.h (about) 1 // Copyright 2016 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 // This file is shared between executor and csource package. 5 6 #include <stdlib.h> 7 #include <sys/syscall.h> 8 #include <sys/types.h> 9 #include <unistd.h> 10 11 #if SYZ_EXECUTOR 12 const int kExtraCoverSize = 1024 << 10; 13 struct cover_t; 14 static void cover_reset(cover_t* cov); 15 #endif 16 17 #if SYZ_EXECUTOR || SYZ_THREADED 18 #include <linux/futex.h> 19 #include <pthread.h> 20 21 typedef struct { 22 int state; 23 } event_t; 24 25 static void event_init(event_t* ev) 26 { 27 ev->state = 0; 28 } 29 30 static void event_reset(event_t* ev) 31 { 32 ev->state = 0; 33 } 34 35 static void event_set(event_t* ev) 36 { 37 if (ev->state) 38 exitf("event already set"); 39 __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE); 40 syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000); 41 } 42 43 static void event_wait(event_t* ev) 44 { 45 while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE)) 46 syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0); 47 } 48 49 static int event_isset(event_t* ev) 50 { 51 return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE); 52 } 53 54 static int event_timedwait(event_t* ev, uint64 timeout) 55 { 56 uint64 start = current_time_ms(); 57 uint64 now = start; 58 for (;;) { 59 uint64 remain = timeout - (now - start); 60 struct timespec ts; 61 ts.tv_sec = remain / 1000; 62 ts.tv_nsec = (remain % 1000) * 1000 * 1000; 63 syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts); 64 if (__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE)) 65 return 1; 66 now = current_time_ms(); 67 if (now - start > timeout) 68 return 0; 69 } 70 } 71 #endif 72 73 #if SYZ_EXECUTOR || SYZ_REPEAT || SYZ_NET_INJECTION || SYZ_FAULT || SYZ_SANDBOX_NONE || \ 74 SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID || \ 75 SYZ_FAULT || SYZ_LEAK || SYZ_BINFMT_MISC || SYZ_SYSCTL || \ 76 ((__NR_syz_usb_connect || __NR_syz_usb_connect_ath9k) && USB_DEBUG) || \ 77 __NR_syz_usbip_server_init 78 #include <errno.h> 79 #include <fcntl.h> 80 #include <stdarg.h> 81 #include <stdbool.h> 82 #include <string.h> 83 #include <sys/stat.h> 84 #include <sys/types.h> 85 86 static bool write_file(const char* file, const char* what, ...) 87 { 88 char buf[1024]; 89 va_list args; 90 va_start(args, what); 91 vsnprintf(buf, sizeof(buf), what, args); 92 va_end(args); 93 buf[sizeof(buf) - 1] = 0; 94 int len = strlen(buf); 95 96 int fd = open(file, O_WRONLY | O_CLOEXEC); 97 if (fd == -1) 98 return false; 99 if (write(fd, buf, len) != len) { 100 int err = errno; 101 close(fd); 102 debug("write(%s) failed: %d\n", file, err); 103 errno = err; 104 return false; 105 } 106 close(fd); 107 return true; 108 } 109 #endif 110 111 #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI || SYZ_WIFI || SYZ_802154 || \ 112 __NR_syz_genetlink_get_family_id || __NR_syz_80211_inject_frame || __NR_syz_80211_join_ibss || SYZ_NIC_VF 113 #include <arpa/inet.h> 114 #include <errno.h> 115 #include <net/if.h> 116 #include <netinet/in.h> 117 #include <stdbool.h> 118 #include <string.h> 119 #include <sys/socket.h> 120 #include <sys/types.h> 121 122 #include <linux/genetlink.h> 123 #include <linux/if_addr.h> 124 #include <linux/if_link.h> 125 #include <linux/in6.h> 126 #include <linux/neighbour.h> 127 #include <linux/net.h> 128 #include <linux/netlink.h> 129 #include <linux/rtnetlink.h> 130 #include <linux/veth.h> 131 132 struct nlmsg { 133 char* pos; 134 int nesting; 135 struct nlattr* nested[8]; 136 char buf[4096]; 137 }; 138 139 static void netlink_init(struct nlmsg* nlmsg, int typ, int flags, 140 const void* data, int size) 141 { 142 memset(nlmsg, 0, sizeof(*nlmsg)); 143 struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf; 144 hdr->nlmsg_type = typ; 145 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags; 146 memcpy(hdr + 1, data, size); 147 nlmsg->pos = (char*)(hdr + 1) + NLMSG_ALIGN(size); 148 } 149 150 static void netlink_attr(struct nlmsg* nlmsg, int typ, 151 const void* data, int size) 152 { 153 struct nlattr* attr = (struct nlattr*)nlmsg->pos; 154 attr->nla_len = sizeof(*attr) + size; 155 attr->nla_type = typ; 156 if (size > 0) 157 memcpy(attr + 1, data, size); 158 nlmsg->pos += NLMSG_ALIGN(attr->nla_len); 159 } 160 161 #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_802154 162 static void netlink_nest(struct nlmsg* nlmsg, int typ) 163 { 164 struct nlattr* attr = (struct nlattr*)nlmsg->pos; 165 attr->nla_type = typ; 166 nlmsg->pos += sizeof(*attr); 167 nlmsg->nested[nlmsg->nesting++] = attr; 168 } 169 170 static void netlink_done(struct nlmsg* nlmsg) 171 { 172 struct nlattr* attr = nlmsg->nested[--nlmsg->nesting]; 173 attr->nla_len = nlmsg->pos - (char*)attr; 174 } 175 176 #if SYZ_EXECUTOR || SYZ_NIC_VF 177 #include <ifaddrs.h> 178 #include <linux/ethtool.h> 179 #include <linux/sockios.h> 180 #include <sys/ioctl.h> 181 182 struct vf_intf { 183 char pass_thru_intf[IFNAMSIZ]; 184 int ppid; // used by Child 185 }; 186 187 static struct vf_intf vf_intf; 188 189 static void find_vf_interface(void) 190 { 191 #if SYZ_EXECUTOR 192 if (!flag_nic_vf) 193 return; 194 #endif 195 struct ifaddrs* addresses = NULL; 196 int pid = getpid(); 197 int ret = 0; 198 199 memset(&vf_intf, 0, sizeof(struct vf_intf)); 200 201 debug("Checking for VF pass-thru interface.\n"); 202 if (getifaddrs(&addresses) == -1) { 203 debug("%s: getifaddrs() failed.\n", __func__); 204 return; 205 } 206 207 int fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP); 208 209 if (fd < 0) { 210 debug("%s: socket() failed.\n", __func__); 211 return; 212 } 213 struct ifreq ifr; 214 struct ethtool_drvinfo drvinfo; 215 struct ifaddrs* address = addresses; 216 217 while (address) { 218 debug("ifa_name: %s\n", address->ifa_name); 219 memset(&ifr, 0, sizeof(struct ifreq)); 220 strcpy(ifr.ifr_name, address->ifa_name); 221 memset(&drvinfo, 0, sizeof(struct ethtool_drvinfo)); 222 drvinfo.cmd = ETHTOOL_GDRVINFO; 223 ifr.ifr_data = (caddr_t)&drvinfo; 224 ret = ioctl(fd, SIOCETHTOOL, &ifr); 225 226 if (ret < 0) { 227 debug("%s: ioctl() failed.\n", __func__); 228 } else if (strlen(drvinfo.bus_info)) { 229 debug("bus_info: %s, strlen(drvinfo.bus_info)=%zu\n", 230 drvinfo.bus_info, strlen(drvinfo.bus_info)); 231 if (strcmp(drvinfo.bus_info, "0000:00:11.0") == 0) { 232 if (strlen(address->ifa_name) < IFNAMSIZ) { 233 strncpy(vf_intf.pass_thru_intf, 234 address->ifa_name, IFNAMSIZ); 235 vf_intf.ppid = pid; 236 } else { 237 debug("%s: %d strlen(%s) >= IFNAMSIZ.\n", 238 __func__, pid, address->ifa_name); 239 } 240 break; 241 } 242 } 243 address = address->ifa_next; 244 } 245 freeifaddrs(addresses); 246 if (!vf_intf.ppid) { 247 memset(&vf_intf, 0, sizeof(struct vf_intf)); 248 debug("%s: %d could not find VF pass-thru interface.\n", __func__, pid); 249 return; 250 } 251 debug("%s: %d found VF pass-thru interface %s\n", 252 __func__, pid, vf_intf.pass_thru_intf); 253 } 254 #endif // SYZ_NIC_VF 255 256 #endif 257 258 static int netlink_send_ext(struct nlmsg* nlmsg, int sock, 259 uint16 reply_type, int* reply_len, bool dofail) 260 { 261 #if SYZ_EXECUTOR 262 if (in_execute_one && dofail) { 263 // We can expect different sorts of breakages during fuzzing, 264 // we should not kill the whole process because of them. 265 failmsg("invalid netlink_send_ext arguments", "dofail is true during syscall execution"); 266 } 267 #endif 268 if (nlmsg->pos > nlmsg->buf + sizeof(nlmsg->buf) || nlmsg->nesting) 269 fail("nlmsg overflow/bad nesting"); 270 struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf; 271 hdr->nlmsg_len = nlmsg->pos - nlmsg->buf; 272 struct sockaddr_nl addr; 273 memset(&addr, 0, sizeof(addr)); 274 addr.nl_family = AF_NETLINK; 275 ssize_t n = sendto(sock, nlmsg->buf, hdr->nlmsg_len, 0, (struct sockaddr*)&addr, sizeof(addr)); 276 if (n != (ssize_t)hdr->nlmsg_len) { 277 if (dofail) 278 failmsg("netlink_send_ext: short netlink write", "wrote=%zd, want=%d", n, hdr->nlmsg_len); 279 debug("netlink_send_ext: short netlink write: %zd/%d errno=%d\n", n, hdr->nlmsg_len, errno); 280 return -1; 281 } 282 n = recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0); 283 if (reply_len) 284 *reply_len = 0; 285 if (n < 0) { 286 if (dofail) 287 fail("netlink_send_ext: netlink read failed"); 288 debug("netlink_send_ext: netlink read failed: errno=%d\n", errno); 289 return -1; 290 } 291 if (n < (ssize_t)sizeof(struct nlmsghdr)) { 292 errno = EINVAL; 293 if (dofail) 294 failmsg("netlink_send_ext: short netlink read", "read=%zd", n); 295 debug("netlink_send_ext: short netlink read: %zd\n", n); 296 return -1; 297 } 298 if (hdr->nlmsg_type == NLMSG_DONE) 299 return 0; 300 if (reply_len && hdr->nlmsg_type == reply_type) { 301 *reply_len = n; 302 return 0; 303 } 304 if (n < (ssize_t)(sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr))) { 305 errno = EINVAL; 306 if (dofail) 307 failmsg("netlink_send_ext: short netlink read", "read=%zd", n); 308 debug("netlink_send_ext: short netlink read: %zd\n", n); 309 return -1; 310 } 311 if (hdr->nlmsg_type != NLMSG_ERROR) { 312 errno = EINVAL; 313 if (dofail) 314 failmsg("netlink_send_ext: bad netlink ack type", "type=%d", hdr->nlmsg_type); 315 debug("netlink_send_ext: short netlink ack: %d\n", hdr->nlmsg_type); 316 return -1; 317 } 318 errno = -((struct nlmsgerr*)(hdr + 1))->error; 319 return -errno; 320 } 321 322 #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI || SYZ_WIFI || SYZ_802154 323 static int netlink_send(struct nlmsg* nlmsg, int sock) 324 { 325 return netlink_send_ext(nlmsg, sock, 0, NULL, true); 326 } 327 #endif 328 329 static int netlink_query_family_id(struct nlmsg* nlmsg, int sock, const char* family_name, bool dofail) 330 { 331 struct genlmsghdr genlhdr; 332 memset(&genlhdr, 0, sizeof(genlhdr)); 333 genlhdr.cmd = CTRL_CMD_GETFAMILY; 334 netlink_init(nlmsg, GENL_ID_CTRL, 0, &genlhdr, sizeof(genlhdr)); 335 netlink_attr(nlmsg, CTRL_ATTR_FAMILY_NAME, family_name, strnlen(family_name, GENL_NAMSIZ - 1) + 1); 336 int n = 0; 337 int err = netlink_send_ext(nlmsg, sock, GENL_ID_CTRL, &n, dofail); 338 if (err < 0) { 339 debug("netlink: failed to get family id for %.*s: %s\n", GENL_NAMSIZ, family_name, strerror(errno)); 340 return -1; 341 } 342 uint16 id = 0; 343 struct nlattr* attr = (struct nlattr*)(nlmsg->buf + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(genlhdr))); 344 for (; (char*)attr < nlmsg->buf + n; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) { 345 if (attr->nla_type == CTRL_ATTR_FAMILY_ID) { 346 id = *(uint16*)(attr + 1); 347 break; 348 } 349 } 350 if (!id) { 351 debug("netlink: failed to parse family id for %.*s\n", GENL_NAMSIZ, family_name); 352 errno = EINVAL; 353 return -1; 354 } 355 recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0); // recv ack 356 357 return id; 358 } 359 360 #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_DEVLINK_PCI 361 static int netlink_next_msg(struct nlmsg* nlmsg, unsigned int offset, 362 unsigned int total_len) 363 { 364 struct nlmsghdr* hdr = (struct nlmsghdr*)(nlmsg->buf + offset); 365 366 if (offset == total_len || offset + hdr->nlmsg_len > total_len) 367 return -1; 368 return hdr->nlmsg_len; 369 } 370 #endif 371 372 #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_802154 373 374 // Force few TX and RX queues per interface to avoid creating 2 sysfs entries 375 // per CPU per interface which takes a long time on machines with many cores. 376 static unsigned int queue_count = 2; 377 378 static void netlink_add_device_impl(struct nlmsg* nlmsg, const char* type, 379 const char* name, bool up) 380 { 381 struct ifinfomsg hdr; 382 memset(&hdr, 0, sizeof(hdr)); 383 if (up) 384 hdr.ifi_flags = hdr.ifi_change = IFF_UP; 385 netlink_init(nlmsg, RTM_NEWLINK, NLM_F_EXCL | NLM_F_CREATE, &hdr, sizeof(hdr)); 386 if (name) 387 netlink_attr(nlmsg, IFLA_IFNAME, name, strlen(name)); 388 389 netlink_attr(nlmsg, IFLA_NUM_TX_QUEUES, &queue_count, sizeof(queue_count)); 390 netlink_attr(nlmsg, IFLA_NUM_RX_QUEUES, &queue_count, sizeof(queue_count)); 391 392 netlink_nest(nlmsg, IFLA_LINKINFO); 393 netlink_attr(nlmsg, IFLA_INFO_KIND, type, strlen(type)); 394 } 395 #endif 396 397 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 398 static void netlink_add_device(struct nlmsg* nlmsg, int sock, const char* type, 399 const char* name) 400 { 401 netlink_add_device_impl(nlmsg, type, name, false); 402 netlink_done(nlmsg); 403 int err = netlink_send(nlmsg, sock); 404 if (err < 0) { 405 debug("netlink: adding device %s type %s: %s\n", name, type, strerror(errno)); 406 } 407 } 408 409 static void netlink_add_veth(struct nlmsg* nlmsg, int sock, const char* name, 410 const char* peer) 411 { 412 netlink_add_device_impl(nlmsg, "veth", name, false); 413 netlink_nest(nlmsg, IFLA_INFO_DATA); 414 netlink_nest(nlmsg, VETH_INFO_PEER); 415 nlmsg->pos += sizeof(struct ifinfomsg); 416 netlink_attr(nlmsg, IFLA_IFNAME, peer, strlen(peer)); 417 netlink_attr(nlmsg, IFLA_NUM_TX_QUEUES, &queue_count, sizeof(queue_count)); 418 netlink_attr(nlmsg, IFLA_NUM_RX_QUEUES, &queue_count, sizeof(queue_count)); 419 netlink_done(nlmsg); 420 netlink_done(nlmsg); 421 netlink_done(nlmsg); 422 int err = netlink_send(nlmsg, sock); 423 if (err < 0) { 424 debug("netlink: adding device %s type veth peer %s: %s\n", name, peer, strerror(errno)); 425 } 426 } 427 428 static void netlink_add_xfrm(struct nlmsg* nlmsg, int sock, const char* name) 429 { 430 netlink_add_device_impl(nlmsg, "xfrm", name, true); 431 netlink_nest(nlmsg, IFLA_INFO_DATA); 432 int if_id = 1; 433 // This is IFLA_XFRM_IF_ID attr which is not present in older kernel headers. 434 netlink_attr(nlmsg, 2, &if_id, sizeof(if_id)); 435 netlink_done(nlmsg); 436 netlink_done(nlmsg); 437 int err = netlink_send(nlmsg, sock); 438 if (err < 0) { 439 debug("netlink: adding device %s type xfrm if_id %d: %s\n", name, if_id, strerror(errno)); 440 } 441 } 442 443 static void netlink_add_hsr(struct nlmsg* nlmsg, int sock, const char* name, 444 const char* slave1, const char* slave2) 445 { 446 netlink_add_device_impl(nlmsg, "hsr", name, false); 447 netlink_nest(nlmsg, IFLA_INFO_DATA); 448 int ifindex1 = if_nametoindex(slave1); 449 netlink_attr(nlmsg, IFLA_HSR_SLAVE1, &ifindex1, sizeof(ifindex1)); 450 int ifindex2 = if_nametoindex(slave2); 451 netlink_attr(nlmsg, IFLA_HSR_SLAVE2, &ifindex2, sizeof(ifindex2)); 452 netlink_done(nlmsg); 453 netlink_done(nlmsg); 454 int err = netlink_send(nlmsg, sock); 455 if (err < 0) { 456 debug("netlink: adding device %s type hsr slave1 %s slave2 %s: %s\n", name, slave1, slave2, strerror(errno)); 457 } 458 } 459 460 static void netlink_add_linked(struct nlmsg* nlmsg, int sock, const char* type, const char* name, const char* link) 461 { 462 netlink_add_device_impl(nlmsg, type, name, false); 463 netlink_done(nlmsg); 464 int ifindex = if_nametoindex(link); 465 netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); 466 int err = netlink_send(nlmsg, sock); 467 if (err < 0) { 468 debug("netlink: adding device %s type %s link %s: %s\n", name, type, link, strerror(errno)); 469 } 470 } 471 472 static void netlink_add_vlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link, uint16 id, uint16 proto) 473 { 474 netlink_add_device_impl(nlmsg, "vlan", name, false); 475 netlink_nest(nlmsg, IFLA_INFO_DATA); 476 netlink_attr(nlmsg, IFLA_VLAN_ID, &id, sizeof(id)); 477 netlink_attr(nlmsg, IFLA_VLAN_PROTOCOL, &proto, sizeof(proto)); 478 netlink_done(nlmsg); 479 netlink_done(nlmsg); 480 int ifindex = if_nametoindex(link); 481 netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); 482 int err = netlink_send(nlmsg, sock); 483 if (err < 0) { 484 debug("netlink: add %s type vlan link %s id %d: %s\n", name, link, id, strerror(errno)); 485 } 486 } 487 488 static void netlink_add_macvlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link) 489 { 490 netlink_add_device_impl(nlmsg, "macvlan", name, false); 491 netlink_nest(nlmsg, IFLA_INFO_DATA); 492 uint32 mode = MACVLAN_MODE_BRIDGE; 493 netlink_attr(nlmsg, IFLA_MACVLAN_MODE, &mode, sizeof(mode)); 494 netlink_done(nlmsg); 495 netlink_done(nlmsg); 496 int ifindex = if_nametoindex(link); 497 netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); 498 int err = netlink_send(nlmsg, sock); 499 if (err < 0) { 500 debug("netlink: add %s type macvlan link %s mode %d: %s\n", name, link, mode, strerror(errno)); 501 } 502 } 503 504 static void netlink_add_geneve(struct nlmsg* nlmsg, int sock, const char* name, uint32 vni, struct in_addr* addr4, struct in6_addr* addr6) 505 { 506 netlink_add_device_impl(nlmsg, "geneve", name, false); 507 netlink_nest(nlmsg, IFLA_INFO_DATA); 508 netlink_attr(nlmsg, IFLA_GENEVE_ID, &vni, sizeof(vni)); 509 if (addr4) 510 netlink_attr(nlmsg, IFLA_GENEVE_REMOTE, addr4, sizeof(*addr4)); 511 if (addr6) 512 netlink_attr(nlmsg, IFLA_GENEVE_REMOTE6, addr6, sizeof(*addr6)); 513 netlink_done(nlmsg); 514 netlink_done(nlmsg); 515 int err = netlink_send(nlmsg, sock); 516 if (err < 0) { 517 debug("netlink: add %s type geneve vni %u: %s\n", name, vni, strerror(errno)); 518 } 519 } 520 521 #define IFLA_IPVLAN_FLAGS 2 522 #define IPVLAN_MODE_L3S 2 523 #undef IPVLAN_F_VEPA 524 #define IPVLAN_F_VEPA 2 525 526 static void netlink_add_ipvlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link, uint16 mode, uint16 flags) 527 { 528 netlink_add_device_impl(nlmsg, "ipvlan", name, false); 529 netlink_nest(nlmsg, IFLA_INFO_DATA); 530 netlink_attr(nlmsg, IFLA_IPVLAN_MODE, &mode, sizeof(mode)); 531 netlink_attr(nlmsg, IFLA_IPVLAN_FLAGS, &flags, sizeof(flags)); 532 netlink_done(nlmsg); 533 netlink_done(nlmsg); 534 int ifindex = if_nametoindex(link); 535 netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); 536 int err = netlink_send(nlmsg, sock); 537 if (err < 0) { 538 debug("netlink: add %s type ipvlan link %s mode %d: %s\n", name, link, mode, strerror(errno)); 539 } 540 } 541 #endif 542 543 #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI || SYZ_802154 544 static void netlink_device_change(struct nlmsg* nlmsg, int sock, const char* name, bool up, 545 const char* master, const void* mac, int macsize, 546 const char* new_name) 547 { 548 struct ifinfomsg hdr; 549 memset(&hdr, 0, sizeof(hdr)); 550 if (up) 551 hdr.ifi_flags = hdr.ifi_change = IFF_UP; 552 hdr.ifi_index = if_nametoindex(name); 553 netlink_init(nlmsg, RTM_NEWLINK, 0, &hdr, sizeof(hdr)); 554 if (new_name) 555 netlink_attr(nlmsg, IFLA_IFNAME, new_name, strlen(new_name)); 556 if (master) { 557 int ifindex = if_nametoindex(master); 558 netlink_attr(nlmsg, IFLA_MASTER, &ifindex, sizeof(ifindex)); 559 } 560 if (macsize) 561 netlink_attr(nlmsg, IFLA_ADDRESS, mac, macsize); 562 int err = netlink_send(nlmsg, sock); 563 if (err < 0) { 564 debug("netlink: device %s up master %s: %s\n", name, master ? master : "NULL", strerror(errno)); 565 } 566 } 567 #endif 568 569 #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION 570 static int netlink_add_addr(struct nlmsg* nlmsg, int sock, const char* dev, 571 const void* addr, int addrsize) 572 { 573 struct ifaddrmsg hdr; 574 memset(&hdr, 0, sizeof(hdr)); 575 hdr.ifa_family = addrsize == 4 ? AF_INET : AF_INET6; 576 hdr.ifa_prefixlen = addrsize == 4 ? 24 : 120; 577 hdr.ifa_scope = RT_SCOPE_UNIVERSE; 578 hdr.ifa_index = if_nametoindex(dev); 579 netlink_init(nlmsg, RTM_NEWADDR, NLM_F_CREATE | NLM_F_REPLACE, &hdr, sizeof(hdr)); 580 netlink_attr(nlmsg, IFA_LOCAL, addr, addrsize); 581 netlink_attr(nlmsg, IFA_ADDRESS, addr, addrsize); 582 return netlink_send(nlmsg, sock); 583 } 584 585 static void netlink_add_addr4(struct nlmsg* nlmsg, int sock, 586 const char* dev, const char* addr) 587 { 588 struct in_addr in_addr; 589 inet_pton(AF_INET, addr, &in_addr); 590 int err = netlink_add_addr(nlmsg, sock, dev, &in_addr, sizeof(in_addr)); 591 if (err < 0) { 592 debug("netlink: add addr %s dev %s: %s\n", addr, dev, strerror(errno)); 593 } 594 } 595 596 static void netlink_add_addr6(struct nlmsg* nlmsg, int sock, 597 const char* dev, const char* addr) 598 { 599 struct in6_addr in6_addr; 600 inet_pton(AF_INET6, addr, &in6_addr); 601 int err = netlink_add_addr(nlmsg, sock, dev, &in6_addr, sizeof(in6_addr)); 602 if (err < 0) { 603 debug("netlink: add addr %s dev %s: %s\n", addr, dev, strerror(errno)); 604 } 605 } 606 #endif 607 608 #if SYZ_EXECUTOR || SYZ_NET_INJECTION 609 static void netlink_add_neigh(struct nlmsg* nlmsg, int sock, const char* name, 610 const void* addr, int addrsize, const void* mac, int macsize) 611 { 612 struct ndmsg hdr; 613 memset(&hdr, 0, sizeof(hdr)); 614 hdr.ndm_family = addrsize == 4 ? AF_INET : AF_INET6; 615 hdr.ndm_ifindex = if_nametoindex(name); 616 hdr.ndm_state = NUD_PERMANENT; 617 netlink_init(nlmsg, RTM_NEWNEIGH, NLM_F_EXCL | NLM_F_CREATE, &hdr, sizeof(hdr)); 618 netlink_attr(nlmsg, NDA_DST, addr, addrsize); 619 netlink_attr(nlmsg, NDA_LLADDR, mac, macsize); 620 int err = netlink_send(nlmsg, sock); 621 if (err < 0) { 622 debug("netlink: add neigh %s addr %d lladdr %d: %s\n", name, addrsize, macsize, strerror(errno)); 623 } 624 } 625 #endif 626 #endif 627 628 #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI || SYZ_WIFI || SYZ_802154 629 static struct nlmsg nlmsg; 630 #endif 631 632 #if SYZ_EXECUTOR || SYZ_NET_INJECTION 633 #include <arpa/inet.h> 634 #include <errno.h> 635 #include <fcntl.h> 636 #include <net/if.h> 637 #include <net/if_arp.h> 638 #include <stdarg.h> 639 #include <stdbool.h> 640 #include <sys/ioctl.h> 641 #include <sys/stat.h> 642 643 #include <linux/if_ether.h> 644 #include <linux/if_tun.h> 645 #include <linux/ip.h> 646 #include <linux/tcp.h> 647 648 static int tunfd = -1; 649 650 #define TUN_IFACE "syz_tun" 651 #define LOCAL_MAC 0xaaaaaaaaaaaa 652 #define REMOTE_MAC 0xaaaaaaaaaabb 653 #define LOCAL_IPV4 "172.20.20.170" 654 #define REMOTE_IPV4 "172.20.20.187" 655 #define LOCAL_IPV6 "fe80::aa" 656 #define REMOTE_IPV6 "fe80::bb" 657 658 #ifndef IFF_NAPI 659 #define IFF_NAPI 0x0010 660 #endif 661 #if ENABLE_NAPI_FRAGS 662 static int tun_frags_enabled; 663 #ifndef IFF_NAPI_FRAGS 664 #define IFF_NAPI_FRAGS 0x0020 665 #endif 666 #endif 667 668 static void initialize_tun(void) 669 { 670 #if SYZ_EXECUTOR 671 if (!flag_net_injection) 672 return; 673 #endif 674 tunfd = open("/dev/net/tun", O_RDWR | O_NONBLOCK); 675 if (tunfd == -1) { 676 #if SYZ_EXECUTOR 677 fail("tun: can't open /dev/net/tun"); 678 #else 679 printf("tun: can't open /dev/net/tun: please enable CONFIG_TUN=y\n"); 680 printf("otherwise fuzzing or reproducing might not work as intended\n"); 681 return; 682 #endif 683 } 684 // Remap tun onto higher fd number to hide it from fuzzer and to keep 685 // fd numbers stable regardless of whether tun is opened or not (also see kMaxFd). 686 const int kTunFd = 200; 687 if (dup2(tunfd, kTunFd) < 0) 688 fail("dup2(tunfd, kTunFd) failed"); 689 close(tunfd); 690 tunfd = kTunFd; 691 692 struct ifreq ifr; 693 memset(&ifr, 0, sizeof(ifr)); 694 strncpy(ifr.ifr_name, TUN_IFACE, IFNAMSIZ); 695 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 696 // Note: SYZ_ENABLE_NAPI_FRAGS is never enabled. This is code is only for reference 697 // in case we figure out how IFF_NAPI_FRAGS works. With IFF_NAPI_FRAGS packets 698 // don't reach destinations and bail out in udp_gro_receive (see #1594). 699 // Also IFF_NAPI_FRAGS does not work with sandbox_namespace (see comment there). 700 #if ENABLE_NAPI_FRAGS 701 ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS; 702 #endif 703 if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0) { 704 #if ENABLE_NAPI_FRAGS 705 // IFF_NAPI_FRAGS requires root, so try without it. 706 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 707 if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0) 708 #endif 709 fail("tun: ioctl(TUNSETIFF) failed"); 710 } 711 #if ENABLE_NAPI_FRAGS 712 // If IFF_NAPI_FRAGS is not supported it will be silently dropped, 713 // so query the effective flags. 714 if (ioctl(tunfd, TUNGETIFF, (void*)&ifr) < 0) 715 fail("tun: ioctl(TUNGETIFF) failed"); 716 tun_frags_enabled = (ifr.ifr_flags & IFF_NAPI_FRAGS) != 0; 717 debug("tun_frags_enabled=%d\n", tun_frags_enabled); 718 #endif 719 720 // Disable IPv6 DAD, otherwise the address remains unusable until DAD completes. 721 // Don't panic because this is an optional config. 722 char sysctl[64]; 723 sprintf(sysctl, "/proc/sys/net/ipv6/conf/%s/accept_dad", TUN_IFACE); 724 write_file(sysctl, "0"); 725 // Disable IPv6 router solicitation to prevent IPv6 spam. 726 // Don't panic because this is an optional config. 727 sprintf(sysctl, "/proc/sys/net/ipv6/conf/%s/router_solicitations", TUN_IFACE); 728 write_file(sysctl, "0"); 729 // There seems to be no way to disable IPv6 MTD to prevent more IPv6 spam. 730 731 int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 732 if (sock == -1) 733 fail("socket(AF_NETLINK) failed"); 734 735 netlink_add_addr4(&nlmsg, sock, TUN_IFACE, LOCAL_IPV4); 736 netlink_add_addr6(&nlmsg, sock, TUN_IFACE, LOCAL_IPV6); 737 uint64 macaddr = REMOTE_MAC; 738 struct in_addr in_addr; 739 inet_pton(AF_INET, REMOTE_IPV4, &in_addr); 740 netlink_add_neigh(&nlmsg, sock, TUN_IFACE, &in_addr, sizeof(in_addr), &macaddr, ETH_ALEN); 741 struct in6_addr in6_addr; 742 inet_pton(AF_INET6, REMOTE_IPV6, &in6_addr); 743 netlink_add_neigh(&nlmsg, sock, TUN_IFACE, &in6_addr, sizeof(in6_addr), &macaddr, ETH_ALEN); 744 macaddr = LOCAL_MAC; 745 netlink_device_change(&nlmsg, sock, TUN_IFACE, true, 0, &macaddr, ETH_ALEN, NULL); 746 close(sock); 747 } 748 #endif 749 750 #if SYZ_EXECUTOR || __NR_syz_init_net_socket || SYZ_DEVLINK_PCI || __NR_syz_socket_connect_nvme_tcp 751 const int kInitNetNsFd = 201; // see kMaxFd 752 #endif 753 754 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI || SYZ_NET_DEVICES 755 756 #include <linux/genetlink.h> 757 #include <stdbool.h> 758 759 #define DEVLINK_FAMILY_NAME "devlink" 760 761 #define DEVLINK_CMD_PORT_GET 5 762 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI 763 #define DEVLINK_CMD_RELOAD 37 764 #endif 765 #define DEVLINK_ATTR_BUS_NAME 1 766 #define DEVLINK_ATTR_DEV_NAME 2 767 #define DEVLINK_ATTR_NETDEV_NAME 7 768 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI 769 #define DEVLINK_ATTR_NETNS_FD 138 770 #endif 771 772 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI 773 static void netlink_devlink_netns_move(const char* bus_name, const char* dev_name, int netns_fd) 774 { 775 struct genlmsghdr genlhdr; 776 int sock; 777 int id, err; 778 779 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 780 if (sock == -1) 781 fail("socket(AF_NETLINK) failed"); 782 783 id = netlink_query_family_id(&nlmsg, sock, DEVLINK_FAMILY_NAME, true); 784 if (id == -1) 785 goto error; 786 787 memset(&genlhdr, 0, sizeof(genlhdr)); 788 genlhdr.cmd = DEVLINK_CMD_RELOAD; 789 netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr)); 790 netlink_attr(&nlmsg, DEVLINK_ATTR_BUS_NAME, bus_name, strlen(bus_name) + 1); 791 netlink_attr(&nlmsg, DEVLINK_ATTR_DEV_NAME, dev_name, strlen(dev_name) + 1); 792 netlink_attr(&nlmsg, DEVLINK_ATTR_NETNS_FD, &netns_fd, sizeof(netns_fd)); 793 err = netlink_send(&nlmsg, sock); 794 if (err < 0) { 795 debug("netlink: failed to move devlink instance %s/%s into network namespace: %s\n", 796 bus_name, dev_name, strerror(errno)); 797 } 798 error: 799 close(sock); 800 } 801 #endif 802 803 static struct nlmsg nlmsg2; 804 805 static void initialize_devlink_ports(const char* bus_name, const char* dev_name, 806 const char* netdev_prefix) 807 { 808 struct genlmsghdr genlhdr; 809 int len, total_len, id, err, offset; 810 uint16 netdev_index; 811 812 int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 813 if (sock == -1) 814 fail("socket(AF_NETLINK) failed"); 815 816 int rtsock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 817 if (rtsock == -1) 818 fail("socket(AF_NETLINK) failed"); 819 820 id = netlink_query_family_id(&nlmsg, sock, DEVLINK_FAMILY_NAME, true); 821 if (id == -1) 822 goto error; 823 824 memset(&genlhdr, 0, sizeof(genlhdr)); 825 genlhdr.cmd = DEVLINK_CMD_PORT_GET; 826 netlink_init(&nlmsg, id, NLM_F_DUMP, &genlhdr, sizeof(genlhdr)); 827 netlink_attr(&nlmsg, DEVLINK_ATTR_BUS_NAME, bus_name, strlen(bus_name) + 1); 828 netlink_attr(&nlmsg, DEVLINK_ATTR_DEV_NAME, dev_name, strlen(dev_name) + 1); 829 830 err = netlink_send_ext(&nlmsg, sock, id, &total_len, true); 831 if (err < 0) { 832 debug("netlink: failed to get port get reply: %s\n", strerror(errno)); 833 goto error; 834 } 835 836 offset = 0; 837 netdev_index = 0; 838 while ((len = netlink_next_msg(&nlmsg, offset, total_len)) != -1) { 839 struct nlattr* attr = (struct nlattr*)(nlmsg.buf + offset + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(genlhdr))); 840 for (; (char*)attr < nlmsg.buf + offset + len; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) { 841 if (attr->nla_type == DEVLINK_ATTR_NETDEV_NAME) { 842 char* port_name; 843 char netdev_name[IFNAMSIZ]; 844 port_name = (char*)(attr + 1); 845 snprintf(netdev_name, sizeof(netdev_name), "%s%d", netdev_prefix, netdev_index); 846 netlink_device_change(&nlmsg2, rtsock, port_name, true, 0, 0, 0, netdev_name); 847 break; 848 } 849 } 850 offset += len; 851 netdev_index++; 852 } 853 error: 854 close(rtsock); 855 close(sock); 856 } 857 858 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI 859 #include <fcntl.h> 860 #include <sched.h> 861 862 static void initialize_devlink_pci(void) 863 { 864 #if SYZ_EXECUTOR 865 if (!flag_devlink_pci) 866 return; 867 #endif 868 int netns = open("/proc/self/ns/net", O_RDONLY); 869 if (netns == -1) 870 fail("open(/proc/self/ns/net) failed"); 871 int ret = setns(kInitNetNsFd, 0); 872 if (ret == -1) 873 fail("set_ns(init_netns_fd) failed"); 874 netlink_devlink_netns_move("pci", "0000:00:10.0", netns); 875 ret = setns(netns, 0); 876 if (ret == -1) 877 fail("set_ns(this_netns_fd) failed"); 878 close(netns); 879 880 initialize_devlink_ports("pci", "0000:00:10.0", "netpci"); 881 } 882 #endif 883 #endif 884 885 #if SYZ_EXECUTOR || SYZ_WIFI || __NR_syz_80211_inject_frame || __NR_syz_80211_join_ibss 886 887 #define WIFI_INITIAL_DEVICE_COUNT 2 888 #define WIFI_MAC_BASE \ 889 { \ 890 0x08, 0x02, 0x11, 0x00, 0x00, 0x00} 891 #define WIFI_IBSS_BSSID \ 892 { \ 893 0x50, 0x50, 0x50, 0x50, 0x50, 0x50} 894 #define WIFI_IBSS_SSID \ 895 { \ 896 0x10, 0x10, 0x10, 0x10, 0x10, 0x10} 897 #define WIFI_DEFAULT_FREQUENCY 2412 898 #define WIFI_DEFAULT_SIGNAL 0 899 #define WIFI_DEFAULT_RX_RATE 1 900 901 // consts from drivers/net/wireless/mac80211_hwsim.h 902 #define HWSIM_CMD_REGISTER 1 903 #define HWSIM_CMD_FRAME 2 904 #define HWSIM_CMD_NEW_RADIO 4 905 #define HWSIM_ATTR_SUPPORT_P2P_DEVICE 14 906 #define HWSIM_ATTR_PERM_ADDR 22 907 908 #endif 909 910 #if SYZ_EXECUTOR || SYZ_WIFI || __NR_syz_80211_join_ibss 911 #include <linux/genetlink.h> 912 #include <linux/if_ether.h> 913 #include <linux/nl80211.h> 914 #include <linux/rtnetlink.h> 915 #include <net/if.h> 916 #include <stdbool.h> 917 #include <sys/ioctl.h> 918 919 // From linux/if.h, but we cannot include the file as it conflicts with net/if.h 920 #define IF_OPER_UP 6 921 922 // IBSS parameters for nl80211_join_ibss 923 struct join_ibss_props { 924 int wiphy_freq; 925 bool wiphy_freq_fixed; 926 uint8* mac; 927 uint8* ssid; 928 int ssid_len; 929 }; 930 931 static int set_interface_state(const char* interface_name, int on) 932 { 933 struct ifreq ifr; 934 int sock = socket(AF_INET, SOCK_DGRAM, 0); 935 if (sock < 0) { 936 debug("set_interface_state: failed to open socket, errno %d\n", errno); 937 return -1; 938 } 939 940 memset(&ifr, 0, sizeof(ifr)); 941 strcpy(ifr.ifr_name, interface_name); 942 int ret = ioctl(sock, SIOCGIFFLAGS, &ifr); 943 if (ret < 0) { 944 debug("set_interface_state: failed to execute SIOCGIFFLAGS, ret %d\n", ret); 945 close(sock); 946 return -1; 947 } 948 949 if (on) 950 ifr.ifr_flags |= IFF_UP; 951 else 952 ifr.ifr_flags &= ~IFF_UP; 953 954 ret = ioctl(sock, SIOCSIFFLAGS, &ifr); 955 close(sock); 956 if (ret < 0) { 957 debug("set_interface_state: failed to execute SIOCSIFFLAGS, ret %d\n", ret); 958 return -1; 959 } 960 return 0; 961 } 962 963 static int nl80211_set_interface(struct nlmsg* nlmsg, int sock, int nl80211_family, uint32 ifindex, 964 uint32 iftype, bool dofail) 965 { 966 struct genlmsghdr genlhdr; 967 968 memset(&genlhdr, 0, sizeof(genlhdr)); 969 genlhdr.cmd = NL80211_CMD_SET_INTERFACE; 970 netlink_init(nlmsg, nl80211_family, 0, &genlhdr, sizeof(genlhdr)); 971 netlink_attr(nlmsg, NL80211_ATTR_IFINDEX, &ifindex, sizeof(ifindex)); 972 netlink_attr(nlmsg, NL80211_ATTR_IFTYPE, &iftype, sizeof(iftype)); 973 int err = netlink_send_ext(nlmsg, sock, 0, NULL, dofail); 974 if (err < 0) { 975 debug("nl80211_set_interface failed: %s\n", strerror(errno)); 976 } 977 return err; 978 } 979 980 static int nl80211_join_ibss(struct nlmsg* nlmsg, int sock, int nl80211_family, uint32 ifindex, 981 struct join_ibss_props* props, bool dofail) 982 { 983 struct genlmsghdr genlhdr; 984 985 memset(&genlhdr, 0, sizeof(genlhdr)); 986 genlhdr.cmd = NL80211_CMD_JOIN_IBSS; 987 netlink_init(nlmsg, nl80211_family, 0, &genlhdr, sizeof(genlhdr)); 988 netlink_attr(nlmsg, NL80211_ATTR_IFINDEX, &ifindex, sizeof(ifindex)); 989 netlink_attr(nlmsg, NL80211_ATTR_SSID, props->ssid, props->ssid_len); 990 netlink_attr(nlmsg, NL80211_ATTR_WIPHY_FREQ, &(props->wiphy_freq), sizeof(props->wiphy_freq)); 991 if (props->mac) 992 netlink_attr(nlmsg, NL80211_ATTR_MAC, props->mac, ETH_ALEN); 993 if (props->wiphy_freq_fixed) 994 netlink_attr(nlmsg, NL80211_ATTR_FREQ_FIXED, NULL, 0); 995 int err = netlink_send_ext(nlmsg, sock, 0, NULL, dofail); 996 if (err < 0) { 997 debug("nl80211_join_ibss failed: %s\n", strerror(errno)); 998 } 999 return err; 1000 } 1001 1002 static int get_ifla_operstate(struct nlmsg* nlmsg, int ifindex, bool dofail) 1003 { 1004 struct ifinfomsg info; 1005 memset(&info, 0, sizeof(info)); 1006 info.ifi_family = AF_UNSPEC; 1007 info.ifi_index = ifindex; 1008 1009 int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 1010 if (sock == -1) { 1011 debug("get_ifla_operstate: socket failed: %d\n", errno); 1012 return -1; 1013 } 1014 1015 netlink_init(nlmsg, RTM_GETLINK, 0, &info, sizeof(info)); 1016 int n; 1017 int err = netlink_send_ext(nlmsg, sock, RTM_NEWLINK, &n, dofail); 1018 close(sock); 1019 1020 if (err) { 1021 debug("get_ifla_operstate: failed to query: %s\n", strerror(errno)); 1022 return -1; 1023 } 1024 1025 struct rtattr* attr = IFLA_RTA(NLMSG_DATA(nlmsg->buf)); 1026 for (; RTA_OK(attr, n); attr = RTA_NEXT(attr, n)) { 1027 if (attr->rta_type == IFLA_OPERSTATE) 1028 return *((int32_t*)RTA_DATA(attr)); 1029 } 1030 1031 return -1; 1032 } 1033 1034 static int await_ifla_operstate(struct nlmsg* nlmsg, char* interface, int operstate, bool dofail) 1035 { 1036 int ifindex = if_nametoindex(interface); 1037 while (true) { 1038 usleep(1000); // 1 ms 1039 int ret = get_ifla_operstate(nlmsg, ifindex, dofail); 1040 if (ret < 0) 1041 return ret; 1042 if (ret == operstate) 1043 return 0; 1044 } 1045 return 0; 1046 } 1047 1048 static int nl80211_setup_ibss_interface(struct nlmsg* nlmsg, int sock, int nl80211_family_id, char* interface, 1049 struct join_ibss_props* ibss_props, bool dofail) 1050 { 1051 int ifindex = if_nametoindex(interface); 1052 if (ifindex == 0) { 1053 debug("nl80211_setup_ibss_interface: if_nametoindex failed for %.32s, ret 0\n", interface); 1054 return -1; 1055 } 1056 1057 int ret = nl80211_set_interface(nlmsg, sock, nl80211_family_id, ifindex, NL80211_IFTYPE_ADHOC, dofail); 1058 if (ret < 0) { 1059 debug("nl80211_setup_ibss_interface: nl80211_set_interface failed for %.32s, ret %d\n", interface, ret); 1060 return -1; 1061 } 1062 1063 ret = set_interface_state(interface, 1); 1064 if (ret < 0) { 1065 debug("nl80211_setup_ibss_interface: set_interface_state failed for %.32s, ret %d\n", interface, ret); 1066 return -1; 1067 } 1068 1069 ret = nl80211_join_ibss(nlmsg, sock, nl80211_family_id, ifindex, ibss_props, dofail); 1070 if (ret < 0) { 1071 debug("nl80211_setup_ibss_interface: nl80211_join_ibss failed for %.32s, ret %d\n", interface, ret); 1072 return -1; 1073 } 1074 1075 return 0; 1076 } 1077 #endif 1078 1079 #if SYZ_EXECUTOR || SYZ_WIFI 1080 #include <fcntl.h> 1081 #include <linux/rfkill.h> 1082 #include <sys/stat.h> 1083 #include <sys/types.h> 1084 1085 static int hwsim80211_create_device(struct nlmsg* nlmsg, int sock, int hwsim_family, uint8 mac_addr[ETH_ALEN]) 1086 { 1087 struct genlmsghdr genlhdr; 1088 memset(&genlhdr, 0, sizeof(genlhdr)); 1089 genlhdr.cmd = HWSIM_CMD_NEW_RADIO; 1090 netlink_init(nlmsg, hwsim_family, 0, &genlhdr, sizeof(genlhdr)); 1091 netlink_attr(nlmsg, HWSIM_ATTR_SUPPORT_P2P_DEVICE, NULL, 0); 1092 netlink_attr(nlmsg, HWSIM_ATTR_PERM_ADDR, mac_addr, ETH_ALEN); 1093 int err = netlink_send(nlmsg, sock); 1094 if (err < 0) { 1095 debug("hwsim80211_create_device failed: %s\n", strerror(errno)); 1096 } 1097 return err; 1098 } 1099 1100 static void initialize_wifi_devices(void) 1101 { 1102 // Set up virtual wifi devices and join them into an IBSS network. 1103 // An IBSS network is created here in order to put these devices in an operable state right from 1104 // the beginning. It has the following positive effects. 1105 // 1. Frame injection becomes possible from the very start. 1106 // 2. A number of nl80211 commands expect their target wireless interface to be in an operable state. 1107 // 3. Simplification of reproducer generation - in many cases the reproducer will not have to spend time 1108 // selecting system calls that set up the environment. 1109 // 1110 // IBSS network was chosen as the simplest network type to begin with. 1111 1112 #if SYZ_EXECUTOR 1113 if (!flag_wifi) 1114 return; 1115 #endif 1116 int rfkill = open("/dev/rfkill", O_RDWR); 1117 if (rfkill == -1) 1118 fail("open(/dev/rfkill) failed"); 1119 struct rfkill_event event = {0}; 1120 event.type = RFKILL_TYPE_ALL; 1121 event.op = RFKILL_OP_CHANGE_ALL; 1122 if (write(rfkill, &event, sizeof(event)) != (ssize_t)(sizeof(event))) 1123 fail("write(/dev/rfkill) failed"); 1124 close(rfkill); 1125 1126 uint8 mac_addr[6] = WIFI_MAC_BASE; 1127 int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 1128 if (sock < 0) 1129 fail("initialize_wifi_devices: failed to create socket"); 1130 int hwsim_family_id = netlink_query_family_id(&nlmsg, sock, "MAC80211_HWSIM", true); 1131 int nl80211_family_id = netlink_query_family_id(&nlmsg, sock, "nl80211", true); 1132 if (hwsim_family_id < 0 || nl80211_family_id < 0) 1133 fail("netlink_query_family_id failed"); 1134 uint8 ssid[] = WIFI_IBSS_SSID; 1135 uint8 bssid[] = WIFI_IBSS_BSSID; 1136 struct join_ibss_props ibss_props = { 1137 .wiphy_freq = WIFI_DEFAULT_FREQUENCY, .wiphy_freq_fixed = true, .mac = bssid, .ssid = ssid, .ssid_len = sizeof(ssid)}; 1138 1139 for (int device_id = 0; device_id < WIFI_INITIAL_DEVICE_COUNT; device_id++) { 1140 // Virtual wifi devices will have consequtive mac addresses 1141 mac_addr[5] = device_id; 1142 int ret = hwsim80211_create_device(&nlmsg, sock, hwsim_family_id, mac_addr); 1143 if (ret < 0) 1144 failmsg("initialize_wifi_devices: failed to create device", "device=%d", device_id); 1145 1146 // For each device, unless HWSIM_ATTR_NO_VIF is passed, a network interface is created 1147 // automatically. Such interfaces are named "wlan0", "wlan1" and so on. 1148 char interface[6] = "wlan0"; 1149 interface[4] += device_id; 1150 1151 if (nl80211_setup_ibss_interface(&nlmsg, sock, nl80211_family_id, interface, &ibss_props, true) < 0) 1152 failmsg("initialize_wifi_devices: failed set up IBSS network", "device=%d", device_id); 1153 } 1154 1155 // Wait for all devices to join the IBSS network 1156 for (int device_id = 0; device_id < WIFI_INITIAL_DEVICE_COUNT; device_id++) { 1157 char interface[6] = "wlan0"; 1158 interface[4] += device_id; 1159 int ret = await_ifla_operstate(&nlmsg, interface, IF_OPER_UP, true); 1160 if (ret < 0) 1161 failmsg("initialize_wifi_devices: get_ifla_operstate failed", 1162 "device=%d, ret=%d", device_id, ret); 1163 } 1164 1165 close(sock); 1166 } 1167 #endif 1168 1169 #if SYZ_EXECUTOR || (SYZ_NET_DEVICES && SYZ_NIC_VF) || SYZ_SWAP 1170 static int runcmdline(char* cmdline) 1171 { 1172 debug("%s\n", cmdline); 1173 int ret = system(cmdline); 1174 if (ret) { 1175 debug("FAIL: %s\n", cmdline); 1176 } 1177 return ret; 1178 } 1179 #endif 1180 1181 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 1182 #include <arpa/inet.h> 1183 #include <errno.h> 1184 #include <fcntl.h> 1185 #include <net/if.h> 1186 #include <net/if_arp.h> 1187 #include <stdarg.h> 1188 #include <stdbool.h> 1189 #include <sys/ioctl.h> 1190 #include <sys/stat.h> 1191 #include <sys/uio.h> 1192 1193 #include <linux/if_ether.h> 1194 #include <linux/if_tun.h> 1195 #include <linux/ip.h> 1196 #include <linux/tcp.h> 1197 1198 // Addresses are chosen to be in the same subnet as tun addresses. 1199 #define DEV_IPV4 "172.20.20.%d" 1200 #define DEV_IPV6 "fe80::%02x" 1201 #define DEV_MAC 0x00aaaaaaaaaa 1202 1203 static void netdevsim_add(unsigned int addr, unsigned int port_count) 1204 { 1205 // These devices are sticky and are not deleted on net namespace destruction. 1206 // So try to delete the previous version of the device. 1207 write_file("/sys/bus/netdevsim/del_device", "%u", addr); 1208 if (write_file("/sys/bus/netdevsim/new_device", "%u %u", addr, port_count)) { 1209 char buf[32]; 1210 snprintf(buf, sizeof(buf), "netdevsim%d", addr); 1211 initialize_devlink_ports("netdevsim", buf, "netdevsim"); 1212 } 1213 } 1214 1215 #define WG_GENL_NAME "wireguard" 1216 enum wg_cmd { 1217 WG_CMD_GET_DEVICE, 1218 WG_CMD_SET_DEVICE, 1219 }; 1220 enum wgdevice_attribute { 1221 WGDEVICE_A_UNSPEC, 1222 WGDEVICE_A_IFINDEX, 1223 WGDEVICE_A_IFNAME, 1224 WGDEVICE_A_PRIVATE_KEY, 1225 WGDEVICE_A_PUBLIC_KEY, 1226 WGDEVICE_A_FLAGS, 1227 WGDEVICE_A_LISTEN_PORT, 1228 WGDEVICE_A_FWMARK, 1229 WGDEVICE_A_PEERS, 1230 }; 1231 enum wgpeer_attribute { 1232 WGPEER_A_UNSPEC, 1233 WGPEER_A_PUBLIC_KEY, 1234 WGPEER_A_PRESHARED_KEY, 1235 WGPEER_A_FLAGS, 1236 WGPEER_A_ENDPOINT, 1237 WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, 1238 WGPEER_A_LAST_HANDSHAKE_TIME, 1239 WGPEER_A_RX_BYTES, 1240 WGPEER_A_TX_BYTES, 1241 WGPEER_A_ALLOWEDIPS, 1242 WGPEER_A_PROTOCOL_VERSION, 1243 }; 1244 enum wgallowedip_attribute { 1245 WGALLOWEDIP_A_UNSPEC, 1246 WGALLOWEDIP_A_FAMILY, 1247 WGALLOWEDIP_A_IPADDR, 1248 WGALLOWEDIP_A_CIDR_MASK, 1249 }; 1250 1251 static void netlink_wireguard_setup(void) 1252 { 1253 const char ifname_a[] = "wg0"; 1254 const char ifname_b[] = "wg1"; 1255 const char ifname_c[] = "wg2"; 1256 const char private_a[] = "\xa0\x5c\xa8\x4f\x6c\x9c\x8e\x38\x53\xe2\xfd\x7a\x70\xae\x0f\xb2\x0f\xa1\x52\x60\x0c\xb0\x08\x45\x17\x4f\x08\x07\x6f\x8d\x78\x43"; 1257 const char private_b[] = "\xb0\x80\x73\xe8\xd4\x4e\x91\xe3\xda\x92\x2c\x22\x43\x82\x44\xbb\x88\x5c\x69\xe2\x69\xc8\xe9\xd8\x35\xb1\x14\x29\x3a\x4d\xdc\x6e"; 1258 const char private_c[] = "\xa0\xcb\x87\x9a\x47\xf5\xbc\x64\x4c\x0e\x69\x3f\xa6\xd0\x31\xc7\x4a\x15\x53\xb6\xe9\x01\xb9\xff\x2f\x51\x8c\x78\x04\x2f\xb5\x42"; 1259 const char public_a[] = "\x97\x5c\x9d\x81\xc9\x83\xc8\x20\x9e\xe7\x81\x25\x4b\x89\x9f\x8e\xd9\x25\xae\x9f\x09\x23\xc2\x3c\x62\xf5\x3c\x57\xcd\xbf\x69\x1c"; 1260 const char public_b[] = "\xd1\x73\x28\x99\xf6\x11\xcd\x89\x94\x03\x4d\x7f\x41\x3d\xc9\x57\x63\x0e\x54\x93\xc2\x85\xac\xa4\x00\x65\xcb\x63\x11\xbe\x69\x6b"; 1261 const char public_c[] = "\xf4\x4d\xa3\x67\xa8\x8e\xe6\x56\x4f\x02\x02\x11\x45\x67\x27\x08\x2f\x5c\xeb\xee\x8b\x1b\xf5\xeb\x73\x37\x34\x1b\x45\x9b\x39\x22"; 1262 const uint16 listen_a = 20001; 1263 const uint16 listen_b = 20002; 1264 const uint16 listen_c = 20003; 1265 const uint16 af_inet = AF_INET; 1266 const uint16 af_inet6 = AF_INET6; 1267 // Unused, but useful in case we change this: 1268 // const struct sockaddr_in endpoint_a_v4 = { 1269 // .sin_family = AF_INET, 1270 // .sin_port = htons(listen_a), 1271 // .sin_addr = {htonl(INADDR_LOOPBACK)}}; 1272 const struct sockaddr_in endpoint_b_v4 = { 1273 .sin_family = AF_INET, 1274 .sin_port = htons(listen_b), 1275 .sin_addr = {htonl(INADDR_LOOPBACK)}}; 1276 const struct sockaddr_in endpoint_c_v4 = { 1277 .sin_family = AF_INET, 1278 .sin_port = htons(listen_c), 1279 .sin_addr = {htonl(INADDR_LOOPBACK)}}; 1280 struct sockaddr_in6 endpoint_a_v6 = { 1281 .sin6_family = AF_INET6, 1282 .sin6_port = htons(listen_a)}; 1283 endpoint_a_v6.sin6_addr = in6addr_loopback; 1284 // Unused, but useful in case we change this: 1285 // const struct sockaddr_in6 endpoint_b_v6 = { 1286 // .sin6_family = AF_INET6, 1287 // .sin6_port = htons(listen_b)}; 1288 // endpoint_b_v6.sin6_addr = in6addr_loopback; 1289 struct sockaddr_in6 endpoint_c_v6 = { 1290 .sin6_family = AF_INET6, 1291 .sin6_port = htons(listen_c)}; 1292 endpoint_c_v6.sin6_addr = in6addr_loopback; 1293 const struct in_addr first_half_v4 = {0}; 1294 const struct in_addr second_half_v4 = {(uint32)htonl(128 << 24)}; 1295 const struct in6_addr first_half_v6 = {{{0}}}; 1296 const struct in6_addr second_half_v6 = {{{0x80}}}; 1297 const uint8 half_cidr = 1; 1298 const uint16 persistent_keepalives[] = {1, 3, 7, 9, 14, 19}; 1299 1300 struct genlmsghdr genlhdr = { 1301 .cmd = WG_CMD_SET_DEVICE, 1302 .version = 1}; 1303 int sock; 1304 int id, err; 1305 1306 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 1307 if (sock == -1) { 1308 debug("socket(AF_NETLINK) failed: %s\n", strerror(errno)); 1309 return; 1310 } 1311 1312 id = netlink_query_family_id(&nlmsg, sock, WG_GENL_NAME, true); 1313 if (id == -1) 1314 goto error; 1315 1316 netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr)); 1317 netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_a, strlen(ifname_a) + 1); 1318 netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_a, 32); 1319 netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_a, 2); 1320 netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS); 1321 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1322 netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_b, 32); 1323 netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_b_v4, sizeof(endpoint_b_v4)); 1324 netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[0], 2); 1325 netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); 1326 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1327 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); 1328 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4)); 1329 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1330 netlink_done(&nlmsg); 1331 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1332 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); 1333 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6)); 1334 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1335 netlink_done(&nlmsg); 1336 netlink_done(&nlmsg); 1337 netlink_done(&nlmsg); 1338 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1339 netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_c, 32); 1340 netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_c_v6, sizeof(endpoint_c_v6)); 1341 netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[1], 2); 1342 netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); 1343 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1344 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); 1345 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4)); 1346 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1347 netlink_done(&nlmsg); 1348 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1349 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); 1350 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6)); 1351 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1352 netlink_done(&nlmsg); 1353 netlink_done(&nlmsg); 1354 netlink_done(&nlmsg); 1355 netlink_done(&nlmsg); 1356 err = netlink_send(&nlmsg, sock); 1357 if (err < 0) { 1358 debug("netlink: failed to setup wireguard instance: %s\n", strerror(errno)); 1359 } 1360 1361 netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr)); 1362 netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_b, strlen(ifname_b) + 1); 1363 netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_b, 32); 1364 netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_b, 2); 1365 netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS); 1366 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1367 netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_a, 32); 1368 netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_a_v6, sizeof(endpoint_a_v6)); 1369 netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[2], 2); 1370 netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); 1371 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1372 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); 1373 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4)); 1374 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1375 netlink_done(&nlmsg); 1376 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1377 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); 1378 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6)); 1379 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1380 netlink_done(&nlmsg); 1381 netlink_done(&nlmsg); 1382 netlink_done(&nlmsg); 1383 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1384 netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_c, 32); 1385 netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_c_v4, sizeof(endpoint_c_v4)); 1386 netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[3], 2); 1387 netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); 1388 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1389 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); 1390 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4)); 1391 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1392 netlink_done(&nlmsg); 1393 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1394 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); 1395 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6)); 1396 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1397 netlink_done(&nlmsg); 1398 netlink_done(&nlmsg); 1399 netlink_done(&nlmsg); 1400 netlink_done(&nlmsg); 1401 err = netlink_send(&nlmsg, sock); 1402 if (err < 0) { 1403 debug("netlink: failed to setup wireguard instance: %s\n", strerror(errno)); 1404 } 1405 1406 netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr)); 1407 netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_c, strlen(ifname_c) + 1); 1408 netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_c, 32); 1409 netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_c, 2); 1410 netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS); 1411 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1412 netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_a, 32); 1413 netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_a_v6, sizeof(endpoint_a_v6)); 1414 netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[4], 2); 1415 netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); 1416 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1417 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); 1418 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4)); 1419 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1420 netlink_done(&nlmsg); 1421 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1422 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); 1423 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6)); 1424 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1425 netlink_done(&nlmsg); 1426 netlink_done(&nlmsg); 1427 netlink_done(&nlmsg); 1428 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1429 netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_b, 32); 1430 netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_b_v4, sizeof(endpoint_b_v4)); 1431 netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[5], 2); 1432 netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); 1433 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1434 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); 1435 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4)); 1436 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1437 netlink_done(&nlmsg); 1438 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1439 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); 1440 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6)); 1441 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1442 netlink_done(&nlmsg); 1443 netlink_done(&nlmsg); 1444 netlink_done(&nlmsg); 1445 netlink_done(&nlmsg); 1446 err = netlink_send(&nlmsg, sock); 1447 if (err < 0) { 1448 debug("netlink: failed to setup wireguard instance: %s\n", strerror(errno)); 1449 } 1450 1451 error: 1452 close(sock); 1453 } 1454 1455 #if SYZ_EXECUTOR || SYZ_NIC_VF 1456 1457 static void netlink_nicvf_setup(void) 1458 { 1459 char cmdline[256]; 1460 1461 #if SYZ_EXECUTOR 1462 if (!flag_nic_vf) 1463 return; 1464 #endif 1465 if (!vf_intf.ppid) 1466 return; 1467 1468 debug("ppid = %d, vf_intf.pass_thru_intf: %s\n", 1469 vf_intf.ppid, vf_intf.pass_thru_intf); 1470 1471 sprintf(cmdline, "nsenter -t 1 -n ip link set %s netns %d", 1472 vf_intf.pass_thru_intf, getpid()); 1473 if (runcmdline(cmdline)) 1474 failmsg("failed to run command", "%s", cmdline); 1475 sprintf(cmdline, "ip a s %s", vf_intf.pass_thru_intf); 1476 if (runcmdline(cmdline)) 1477 failmsg("failed to run command", "%s", cmdline); 1478 sprintf(cmdline, "ip link set %s down", vf_intf.pass_thru_intf); 1479 if (runcmdline(cmdline)) 1480 failmsg("failed to run command", "%s", cmdline); 1481 sprintf(cmdline, "ip link set %s name nicvf0", vf_intf.pass_thru_intf); 1482 if (runcmdline(cmdline)) 1483 failmsg("failed to run command", "%s", cmdline); 1484 debug("nicvf0 VF pass-through setup complete.\n"); 1485 } 1486 #endif // SYZ_NIC_VF 1487 1488 // We test in a separate namespace, which does not have any network devices initially (even lo). 1489 // Create/up as many as we can. 1490 static void initialize_netdevices(void) 1491 { 1492 #if SYZ_EXECUTOR 1493 if (!flag_net_devices) 1494 return; 1495 #endif 1496 // TODO: add the following devices: 1497 // - vxlan 1498 // - ipip 1499 // - lowpan (requires link to device of type IEEE802154, e.g. wpan0) 1500 // - ipoib (requires link to device of type ARPHRD_INFINIBAND) 1501 // - vrf 1502 // - rmnet 1503 // - openvswitch 1504 // Naive attempts to add devices of these types fail with various errors. 1505 // Also init namespace contains the following devices (which presumably can't be 1506 // created in non-init namespace), can we use them somehow? 1507 // - ifb0/1 1508 // - teql0 1509 // - eql 1510 // Note: netdevsim devices can't have the same name even in different namespaces. 1511 char netdevsim[16]; 1512 sprintf(netdevsim, "netdevsim%d", (int)procid); 1513 struct { 1514 const char* type; 1515 const char* dev; 1516 } devtypes[] = { 1517 // Note: ip6erspan device can't be added if ip6gretap exists in the same namespace. 1518 {"ip6gretap", "ip6gretap0"}, 1519 {"bridge", "bridge0"}, 1520 {"vcan", "vcan0"}, 1521 {"bond", "bond0"}, 1522 {"team", "team0"}, 1523 {"dummy", "dummy0"}, 1524 #if SYZ_EXECUTOR || SYZ_NIC_VF 1525 {"nicvf", "nicvf0"}, 1526 #endif 1527 {"nlmon", "nlmon0"}, 1528 {"caif", "caif0"}, 1529 {"batadv", "batadv0"}, 1530 // Note: this adds vxcan0/vxcan1 pair, similar to veth (creating vxcan0 would fail). 1531 {"vxcan", "vxcan1"}, 1532 // This adds connected veth0 and veth1 devices. 1533 {"veth", 0}, 1534 {"wireguard", "wg0"}, 1535 {"wireguard", "wg1"}, 1536 {"wireguard", "wg2"}, 1537 }; 1538 const char* devmasters[] = {"bridge", "bond", "team", "batadv"}; 1539 // If you extend this array, also update netdev_addr_id in vnet.txt 1540 // and devnames in socket.txt. 1541 struct { 1542 const char* name; 1543 int macsize; 1544 bool noipv6; 1545 } devices[] = { 1546 {"lo", ETH_ALEN}, 1547 {"sit0", 0}, 1548 {"bridge0", ETH_ALEN}, 1549 {"vcan0", 0, true}, 1550 {"tunl0", 0}, 1551 {"gre0", 0}, 1552 {"gretap0", ETH_ALEN}, 1553 {"ip_vti0", 0}, 1554 {"ip6_vti0", 0}, 1555 {"ip6tnl0", 0}, 1556 {"ip6gre0", 0}, 1557 {"ip6gretap0", ETH_ALEN}, 1558 {"erspan0", ETH_ALEN}, 1559 {"bond0", ETH_ALEN}, 1560 {"veth0", ETH_ALEN}, 1561 {"veth1", ETH_ALEN}, 1562 {"team0", ETH_ALEN}, 1563 {"veth0_to_bridge", ETH_ALEN}, 1564 {"veth1_to_bridge", ETH_ALEN}, 1565 {"veth0_to_bond", ETH_ALEN}, 1566 {"veth1_to_bond", ETH_ALEN}, 1567 {"veth0_to_team", ETH_ALEN}, 1568 {"veth1_to_team", ETH_ALEN}, 1569 {"veth0_to_hsr", ETH_ALEN}, 1570 {"veth1_to_hsr", ETH_ALEN}, 1571 {"hsr0", 0}, 1572 {"dummy0", ETH_ALEN}, 1573 #if SYZ_EXECUTOR || SYZ_NIC_VF 1574 {"nicvf0", 0, true}, 1575 #endif 1576 {"nlmon0", 0}, 1577 {"vxcan0", 0, true}, 1578 {"vxcan1", 0, true}, 1579 {"caif0", ETH_ALEN}, // TODO: up'ing caif fails with ENODEV 1580 {"batadv0", ETH_ALEN}, 1581 {netdevsim, ETH_ALEN}, 1582 {"xfrm0", ETH_ALEN}, 1583 {"veth0_virt_wifi", ETH_ALEN}, 1584 {"veth1_virt_wifi", ETH_ALEN}, 1585 {"virt_wifi0", ETH_ALEN}, 1586 {"veth0_vlan", ETH_ALEN}, 1587 {"veth1_vlan", ETH_ALEN}, 1588 {"vlan0", ETH_ALEN}, 1589 {"vlan1", ETH_ALEN}, 1590 {"macvlan0", ETH_ALEN}, 1591 {"macvlan1", ETH_ALEN}, 1592 {"ipvlan0", ETH_ALEN}, 1593 {"ipvlan1", ETH_ALEN}, 1594 {"veth0_macvtap", ETH_ALEN}, 1595 {"veth1_macvtap", ETH_ALEN}, 1596 {"macvtap0", ETH_ALEN}, 1597 {"macsec0", ETH_ALEN}, 1598 {"veth0_to_batadv", ETH_ALEN}, 1599 {"veth1_to_batadv", ETH_ALEN}, 1600 {"batadv_slave_0", ETH_ALEN}, 1601 {"batadv_slave_1", ETH_ALEN}, 1602 {"geneve0", ETH_ALEN}, 1603 {"geneve1", ETH_ALEN}, 1604 {"wg0", 0}, 1605 {"wg1", 0}, 1606 {"wg2", 0}, 1607 }; 1608 int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 1609 if (sock == -1) 1610 fail("socket(AF_NETLINK) failed"); 1611 unsigned i; 1612 for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++) 1613 netlink_add_device(&nlmsg, sock, devtypes[i].type, devtypes[i].dev); 1614 // This creates connected bridge/bond/team_slave devices of type veth, 1615 // and makes them slaves of bridge/bond/team devices, respectively. 1616 // Note: slave devices don't need MAC/IP addresses, only master devices. 1617 // veth0_to_* is not slave devices, which still need ip addresses. 1618 for (i = 0; i < sizeof(devmasters) / (sizeof(devmasters[0])); i++) { 1619 char master[32], slave0[32], veth0[32], slave1[32], veth1[32]; 1620 sprintf(slave0, "%s_slave_0", devmasters[i]); 1621 sprintf(veth0, "veth0_to_%s", devmasters[i]); 1622 netlink_add_veth(&nlmsg, sock, slave0, veth0); 1623 sprintf(slave1, "%s_slave_1", devmasters[i]); 1624 sprintf(veth1, "veth1_to_%s", devmasters[i]); 1625 netlink_add_veth(&nlmsg, sock, slave1, veth1); 1626 sprintf(master, "%s0", devmasters[i]); 1627 netlink_device_change(&nlmsg, sock, slave0, false, master, 0, 0, NULL); 1628 netlink_device_change(&nlmsg, sock, slave1, false, master, 0, 0, NULL); 1629 } 1630 netlink_add_xfrm(&nlmsg, sock, "xfrm0"); 1631 1632 // bond/team_slave_* will set up automatically when set their master. 1633 // But bridge_slave_* need to set up manually. 1634 netlink_device_change(&nlmsg, sock, "bridge_slave_0", true, 0, 0, 0, NULL); 1635 netlink_device_change(&nlmsg, sock, "bridge_slave_1", true, 0, 0, 0, NULL); 1636 1637 // Setup hsr device (slightly different from what we do for devmasters). 1638 netlink_add_veth(&nlmsg, sock, "hsr_slave_0", "veth0_to_hsr"); 1639 netlink_add_veth(&nlmsg, sock, "hsr_slave_1", "veth1_to_hsr"); 1640 netlink_add_hsr(&nlmsg, sock, "hsr0", "hsr_slave_0", "hsr_slave_1"); 1641 netlink_device_change(&nlmsg, sock, "hsr_slave_0", true, 0, 0, 0, NULL); 1642 netlink_device_change(&nlmsg, sock, "hsr_slave_1", true, 0, 0, 0, NULL); 1643 1644 netlink_add_veth(&nlmsg, sock, "veth0_virt_wifi", "veth1_virt_wifi"); 1645 netlink_add_linked(&nlmsg, sock, "virt_wifi", "virt_wifi0", "veth1_virt_wifi"); 1646 1647 netlink_add_veth(&nlmsg, sock, "veth0_vlan", "veth1_vlan"); 1648 netlink_add_vlan(&nlmsg, sock, "vlan0", "veth0_vlan", 0, htons(ETH_P_8021Q)); 1649 netlink_add_vlan(&nlmsg, sock, "vlan1", "veth0_vlan", 1, htons(ETH_P_8021AD)); 1650 netlink_add_macvlan(&nlmsg, sock, "macvlan0", "veth1_vlan"); 1651 netlink_add_macvlan(&nlmsg, sock, "macvlan1", "veth1_vlan"); 1652 netlink_add_ipvlan(&nlmsg, sock, "ipvlan0", "veth0_vlan", IPVLAN_MODE_L2, 0); 1653 netlink_add_ipvlan(&nlmsg, sock, "ipvlan1", "veth0_vlan", IPVLAN_MODE_L3S, IPVLAN_F_VEPA); 1654 1655 netlink_add_veth(&nlmsg, sock, "veth0_macvtap", "veth1_macvtap"); 1656 netlink_add_linked(&nlmsg, sock, "macvtap", "macvtap0", "veth0_macvtap"); 1657 netlink_add_linked(&nlmsg, sock, "macsec", "macsec0", "veth1_macvtap"); 1658 1659 char addr[32]; 1660 sprintf(addr, DEV_IPV4, 14 + 10); // should point to veth0 1661 struct in_addr geneve_addr4; 1662 if (inet_pton(AF_INET, addr, &geneve_addr4) <= 0) 1663 fail("geneve0 inet_pton failed"); 1664 struct in6_addr geneve_addr6; 1665 // Must not be link local (our device addresses are link local). 1666 if (inet_pton(AF_INET6, "fc00::01", &geneve_addr6) <= 0) 1667 fail("geneve1 inet_pton failed"); 1668 netlink_add_geneve(&nlmsg, sock, "geneve0", 0, &geneve_addr4, 0); 1669 netlink_add_geneve(&nlmsg, sock, "geneve1", 1, 0, &geneve_addr6); 1670 1671 netdevsim_add((int)procid, 4); // Number of port is in sync with value in sys/linux/socket_netlink_generic_devlink.txt 1672 1673 netlink_wireguard_setup(); 1674 1675 #if SYZ_EXECUTOR || SYZ_NIC_VF 1676 netlink_nicvf_setup(); 1677 #endif 1678 1679 for (i = 0; i < sizeof(devices) / (sizeof(devices[0])); i++) { 1680 // Assign some unique address to devices. Some devices won't up without this. 1681 // Shift addresses by 10 because 0 subnet address can mean special things. 1682 char addr[32]; 1683 sprintf(addr, DEV_IPV4, i + 10); 1684 netlink_add_addr4(&nlmsg, sock, devices[i].name, addr); 1685 if (!devices[i].noipv6) { 1686 sprintf(addr, DEV_IPV6, i + 10); 1687 netlink_add_addr6(&nlmsg, sock, devices[i].name, addr); 1688 } 1689 uint64 macaddr = DEV_MAC + ((i + 10ull) << 40); 1690 netlink_device_change(&nlmsg, sock, devices[i].name, true, 0, &macaddr, devices[i].macsize, NULL); 1691 } 1692 close(sock); 1693 } 1694 1695 // Same as initialize_netdevices, but called in init net namespace. 1696 static void initialize_netdevices_init(void) 1697 { 1698 #if SYZ_EXECUTOR 1699 if (!flag_net_devices) 1700 return; 1701 #endif 1702 int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 1703 if (sock == -1) 1704 fail("socket(AF_NETLINK) failed"); 1705 struct { 1706 const char* type; 1707 int macsize; 1708 bool noipv6; 1709 bool noup; 1710 } devtypes[] = { 1711 // NETROM device, see net/netrom/{af_netrom,nr_dev}.c 1712 {"nr", 7, true}, 1713 // ROSE device, see net/rose/{af_rose,rose_dev}.c 1714 // We don't up it yet because it crashes kernel right away: 1715 // https://groups.google.com/d/msg/syzkaller/v-4B3zoBC-4/02SCKEzJBwAJ 1716 {"rose", 5, true, true}, 1717 }; 1718 unsigned i; 1719 for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++) { 1720 char dev[32], addr[32]; 1721 sprintf(dev, "%s%d", devtypes[i].type, (int)procid); 1722 // Note: syscall descriptions know these addresses. 1723 sprintf(addr, "172.30.%d.%d", i, (int)procid + 1); 1724 netlink_add_addr4(&nlmsg, sock, dev, addr); 1725 if (!devtypes[i].noipv6) { 1726 sprintf(addr, "fe88::%02x:%02x", i, (int)procid + 1); 1727 netlink_add_addr6(&nlmsg, sock, dev, addr); 1728 } 1729 int macsize = devtypes[i].macsize; 1730 uint64 macaddr = 0xbbbbbb + ((unsigned long long)i << (8 * (macsize - 2))) + 1731 (procid << (8 * (macsize - 1))); 1732 netlink_device_change(&nlmsg, sock, dev, !devtypes[i].noup, 0, &macaddr, macsize, NULL); 1733 } 1734 close(sock); 1735 1736 #if SYZ_EXECUTOR || SYZ_NIC_VF 1737 find_vf_interface(); 1738 #endif 1739 } 1740 #endif 1741 1742 #if SYZ_EXECUTOR || SYZ_NET_INJECTION && (__NR_syz_extract_tcp_res || SYZ_REPEAT) 1743 #include <errno.h> 1744 1745 static int read_tun(char* data, int size) 1746 { 1747 if (tunfd < 0) 1748 return -1; 1749 1750 int rv = read(tunfd, data, size); 1751 if (rv < 0) { 1752 // EBADF can be returned if the test closes tunfd with close_range syscall. 1753 // Tun sometimes returns EBADFD, unclear if it's a kernel bug or not. 1754 if (errno == EAGAIN || errno == EBADF || errno == EBADFD) 1755 return -1; 1756 fail("tun read failed"); 1757 } 1758 return rv; 1759 } 1760 #endif 1761 1762 #if SYZ_EXECUTOR || __NR_syz_emit_ethernet && SYZ_NET_INJECTION 1763 #include <stdbool.h> 1764 #include <sys/uio.h> 1765 1766 #if ENABLE_NAPI_FRAGS 1767 #define MAX_FRAGS 4 1768 struct vnet_fragmentation { 1769 uint32 full; 1770 uint32 count; 1771 uint32 frags[MAX_FRAGS]; 1772 }; 1773 #endif 1774 1775 static long syz_emit_ethernet(volatile long a0, volatile long a1, volatile long a2) 1776 { 1777 // syz_emit_ethernet(len len[packet], packet ptr[in, eth_packet], frags ptr[in, vnet_fragmentation, opt]) 1778 // vnet_fragmentation { 1779 // full int32[0:1] 1780 // count int32[1:4] 1781 // frags array[int32[0:4096], 4] 1782 // } 1783 if (tunfd < 0) 1784 return (uintptr_t)-1; 1785 1786 uint32 length = a0; 1787 char* data = (char*)a1; 1788 debug_dump_data(data, length); 1789 1790 #if ENABLE_NAPI_FRAGS 1791 struct vnet_fragmentation* frags = (struct vnet_fragmentation*)a2; 1792 struct iovec vecs[MAX_FRAGS + 1]; 1793 uint32 nfrags = 0; 1794 if (!tun_frags_enabled || frags == NULL) { 1795 vecs[nfrags].iov_base = data; 1796 vecs[nfrags].iov_len = length; 1797 nfrags++; 1798 } else { 1799 bool full = frags->full; 1800 uint32 count = frags->count; 1801 if (count > MAX_FRAGS) 1802 count = MAX_FRAGS; 1803 uint32 i; 1804 for (i = 0; i < count && length != 0; i++) { 1805 uint32 size = frags->frags[i]; 1806 if (size > length) 1807 size = length; 1808 vecs[nfrags].iov_base = data; 1809 vecs[nfrags].iov_len = size; 1810 nfrags++; 1811 data += size; 1812 length -= size; 1813 } 1814 if (length != 0 && (full || nfrags == 0)) { 1815 vecs[nfrags].iov_base = data; 1816 vecs[nfrags].iov_len = length; 1817 nfrags++; 1818 } 1819 } 1820 return writev(tunfd, vecs, nfrags); 1821 #else 1822 return write(tunfd, data, length); 1823 #endif 1824 } 1825 #endif 1826 1827 #if SYZ_EXECUTOR || __NR_syz_io_uring_submit || __NR_syz_io_uring_complete || __NR_syz_io_uring_setup 1828 1829 #define SIZEOF_IO_URING_SQE 64 1830 #define SIZEOF_IO_URING_CQE 16 1831 1832 // Once a io_uring is set up by calling io_uring_setup, the offsets to the member fields 1833 // to be used on the mmap'ed area are set in structs io_sqring_offsets and io_cqring_offsets. 1834 // Except io_sqring_offsets.array, the offsets are static while all depend on how struct io_rings 1835 // is organized in code. The offsets can be marked as resources in syzkaller descriptions but 1836 // this makes it difficult to generate correct programs by the fuzzer. Thus, the offsets are 1837 // hard-coded here (and in the descriptions), and array offset is later computed once the number 1838 // of entries is available. Another way to obtain the offsets is to setup another io_uring here 1839 // and use what it returns. It is slower but might be more maintainable. 1840 #define SQ_HEAD_OFFSET 0 1841 #define SQ_TAIL_OFFSET 64 1842 #define SQ_RING_MASK_OFFSET 256 1843 #define SQ_RING_ENTRIES_OFFSET 264 1844 #define SQ_FLAGS_OFFSET 276 1845 #define SQ_DROPPED_OFFSET 272 1846 #define CQ_HEAD_OFFSET 128 1847 #define CQ_TAIL_OFFSET 192 1848 #define CQ_RING_MASK_OFFSET 260 1849 #define CQ_RING_ENTRIES_OFFSET 268 1850 #define CQ_RING_OVERFLOW_OFFSET 284 1851 #define CQ_FLAGS_OFFSET 280 1852 #define CQ_CQES_OFFSET 320 1853 1854 #if SYZ_EXECUTOR || __NR_syz_io_uring_complete 1855 1856 // From linux/io_uring.h 1857 struct io_uring_cqe { 1858 uint64 user_data; 1859 uint32 res; 1860 uint32 flags; 1861 }; 1862 1863 static long syz_io_uring_complete(volatile long a0) 1864 { 1865 // syzlang: syz_io_uring_complete(ring_ptr ring_ptr) 1866 // C: syz_io_uring_complete(char* ring_ptr) 1867 1868 // It is not checked if the ring is empty 1869 1870 // Cast to original 1871 char* ring_ptr = (char*)a0; 1872 1873 // Compute the head index and the next head value 1874 uint32 cq_ring_mask = *(uint32*)(ring_ptr + CQ_RING_MASK_OFFSET); 1875 uint32* cq_head_ptr = (uint32*)(ring_ptr + CQ_HEAD_OFFSET); 1876 uint32 cq_head = *cq_head_ptr & cq_ring_mask; 1877 uint32 cq_head_next = *cq_head_ptr + 1; 1878 1879 // Compute the ptr to the src cq entry on the ring 1880 char* cqe_src = ring_ptr + CQ_CQES_OFFSET + cq_head * SIZEOF_IO_URING_CQE; 1881 1882 // Get the cq entry from the ring 1883 struct io_uring_cqe cqe; 1884 memcpy(&cqe, cqe_src, sizeof(cqe)); 1885 1886 // Advance the head. Head is a free-flowing integer and relies on natural wrapping. 1887 // Ensure that the kernel will never see a head update without the preceeding CQE 1888 // stores being done. 1889 __atomic_store_n(cq_head_ptr, cq_head_next, __ATOMIC_RELEASE); 1890 1891 // In the descriptions (sys/linux/io_uring.txt), openat and openat2 are passed 1892 // with a unique range of sqe.user_data (0x12345 and 0x23456) to identify the operations 1893 // which produces an fd instance. Check cqe.user_data, which should be the same 1894 // as sqe.user_data for that operation. If it falls in that unique range, return 1895 // cqe.res as fd. Otherwise, just return an invalid fd. 1896 return (cqe.user_data == 0x12345 || cqe.user_data == 0x23456) ? (long)cqe.res : (long)-1; 1897 } 1898 1899 #endif 1900 1901 #if SYZ_EXECUTOR || __NR_syz_io_uring_setup 1902 1903 struct io_sqring_offsets { 1904 uint32 head; 1905 uint32 tail; 1906 uint32 ring_mask; 1907 uint32 ring_entries; 1908 uint32 flags; 1909 uint32 dropped; 1910 uint32 array; 1911 uint32 resv1; 1912 uint64 resv2; 1913 }; 1914 1915 struct io_cqring_offsets { 1916 uint32 head; 1917 uint32 tail; 1918 uint32 ring_mask; 1919 uint32 ring_entries; 1920 uint32 overflow; 1921 uint32 cqes; 1922 uint64 resv[2]; 1923 }; 1924 1925 struct io_uring_params { 1926 uint32 sq_entries; 1927 uint32 cq_entries; 1928 uint32 flags; 1929 uint32 sq_thread_cpu; 1930 uint32 sq_thread_idle; 1931 uint32 features; 1932 uint32 resv[4]; 1933 struct io_sqring_offsets sq_off; 1934 struct io_cqring_offsets cq_off; 1935 }; 1936 1937 #define IORING_OFF_SQ_RING 0 1938 #define IORING_OFF_SQES 0x10000000ULL 1939 #define IORING_SETUP_SQE128 (1U << 10) 1940 #define IORING_SETUP_CQE32 (1U << 11) 1941 1942 #include <sys/mman.h> 1943 #include <unistd.h> 1944 1945 // Wrapper for io_uring_setup and the subsequent mmap calls that map the ring and the sqes 1946 static long syz_io_uring_setup(volatile long a0, volatile long a1, volatile long a2, volatile long a3) 1947 { 1948 // syzlang: syz_io_uring_setup(entries int32[1:IORING_MAX_ENTRIES], params ptr[inout, io_uring_params], ring_ptr ptr[out, ring_ptr], sqes_ptr ptr[out, sqes_ptr]) fd_io_uring 1949 // C: syz_io_uring_setup(uint32 entries, struct io_uring_params* params, void** ring_ptr_out, void** sqes_ptr_out) // returns uint32 fd_io_uring 1950 1951 // Cast to original 1952 uint32 entries = (uint32)a0; 1953 struct io_uring_params* setup_params = (struct io_uring_params*)a1; 1954 void** ring_ptr_out = (void**)a2; 1955 void** sqes_ptr_out = (void**)a3; 1956 // Temporarily disable IORING_SETUP_CQE32 and IORING_SETUP_SQE128 that may change SIZEOF_IO_URING_CQE and SIZEOF_IO_URING_SQE. 1957 // Tracking bug: https://github.com/google/syzkaller/issues/4531. 1958 setup_params->flags &= ~(IORING_SETUP_CQE32 | IORING_SETUP_SQE128); 1959 uint32 fd_io_uring = syscall(__NR_io_uring_setup, entries, setup_params); 1960 1961 // Compute the ring sizes 1962 uint32 sq_ring_sz = setup_params->sq_off.array + setup_params->sq_entries * sizeof(uint32); 1963 uint32 cq_ring_sz = setup_params->cq_off.cqes + setup_params->cq_entries * SIZEOF_IO_URING_CQE; 1964 1965 // Asssumed IORING_FEAT_SINGLE_MMAP, which is always the case with the current implementation 1966 // The implication is that the sq_ring_ptr and the cq_ring_ptr are the same but the 1967 // difference is in the offsets to access the fields of these rings. 1968 uint32 ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz; 1969 *ring_ptr_out = mmap(0, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd_io_uring, IORING_OFF_SQ_RING); 1970 1971 uint32 sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE; 1972 *sqes_ptr_out = mmap(0, sqes_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd_io_uring, IORING_OFF_SQES); 1973 1974 uint32* array = (uint32*)((uintptr_t)*ring_ptr_out + setup_params->sq_off.array); 1975 for (uint32 index = 0; index < entries; index++) 1976 array[index] = index; 1977 1978 return fd_io_uring; 1979 } 1980 1981 #endif 1982 1983 #if SYZ_EXECUTOR || __NR_syz_io_uring_submit 1984 1985 static long syz_io_uring_submit(volatile long a0, volatile long a1, volatile long a2) 1986 { 1987 // syzlang: syz_io_uring_submit(ring_ptr ring_ptr, sqes_ptr sqes_ptr, sqe ptr[in, io_uring_sqe]) 1988 // C: syz_io_uring_submit(char* ring_ptr, io_uring_sqe* sqes_ptr, io_uring_sqe* sqe) 1989 1990 // It is not checked if the ring is full 1991 1992 // Cast to original 1993 char* ring_ptr = (char*)a0; // This will be exposed to offsets in bytes 1994 char* sqes_ptr = (char*)a1; 1995 1996 char* sqe = (char*)a2; 1997 1998 uint32 sq_ring_mask = *(uint32*)(ring_ptr + SQ_RING_MASK_OFFSET); 1999 uint32* sq_tail_ptr = (uint32*)(ring_ptr + SQ_TAIL_OFFSET); 2000 uint32 sq_tail = *sq_tail_ptr & sq_ring_mask; 2001 2002 // Get the ptr to the destination for the sqe 2003 char* sqe_dest = sqes_ptr + sq_tail * SIZEOF_IO_URING_SQE; 2004 2005 // Write the sqe entry to its destination in sqes 2006 memcpy(sqe_dest, sqe, SIZEOF_IO_URING_SQE); 2007 2008 // Write the index to the sqe array 2009 uint32 sq_tail_next = *sq_tail_ptr + 1; 2010 2011 // Advance the tail. Tail is a free-flowing integer and relies on natural wrapping. 2012 // Ensure that the kernel will never see a tail update without the preceeding SQE 2013 // stores being done. 2014 __atomic_store_n(sq_tail_ptr, sq_tail_next, __ATOMIC_RELEASE); 2015 2016 // Now the application is free to call io_uring_enter() to submit the sqe 2017 return 0; 2018 } 2019 2020 #endif 2021 2022 #endif 2023 2024 #if SYZ_EXECUTOR || __NR_syz_usbip_server_init 2025 2026 #include <errno.h> 2027 #include <fcntl.h> 2028 #include <linux/usb/ch9.h> 2029 #include <stdbool.h> 2030 #include <stdio.h> 2031 #include <stdlib.h> 2032 #include <string.h> 2033 #include <sys/socket.h> 2034 #include <unistd.h> 2035 2036 // This should be coherent with CONFIG_USBIP_VHCI_HC_PORTS. 2037 #define VHCI_HC_PORTS 8 2038 #define VHCI_PORTS (VHCI_HC_PORTS * 2) 2039 2040 static long syz_usbip_server_init(volatile long a0) 2041 { 2042 // port_alloc[0] corresponds to ports which can be used by usb2 and 2043 // port_alloc[1] corresponds to ports which can be used by usb3. 2044 static int port_alloc[2]; 2045 2046 int speed = (int)a0; 2047 bool usb3 = (speed == USB_SPEED_SUPER); 2048 2049 int socket_pair[2]; 2050 if (socketpair(AF_UNIX, SOCK_STREAM, 0, socket_pair)) { 2051 // This can happen if the test calls prlimit(RLIMIT_AS). 2052 debug("syz_usbip_server_init: socketpair failed (%d)\n", errno); 2053 return -1; 2054 } 2055 2056 int client_fd = socket_pair[0]; 2057 int server_fd = socket_pair[1]; 2058 2059 int available_port_num = __atomic_fetch_add(&port_alloc[usb3], 1, __ATOMIC_RELAXED); 2060 if (available_port_num > VHCI_HC_PORTS) { 2061 debug("syz_usbip_server_init : no more available port for : %d\n", available_port_num); 2062 return -1; 2063 } 2064 2065 // Each port number corresponds to a particular vhci_hcd (USB/IP Virtual Host Controller) and it is used by either 2066 // an usb2 device or usb3 device. There are 16 ports available in each vhci_hcd. 2067 // (VHCI_PORTS = 16 in our case.) When they are occupied, the following vhci_hcd's ports are used. 2068 // First 16 ports correspond to vhci_hcd0, next 16 ports correspond to 2069 // vhci_hcd1 etc. In a vhci_hcd, first 8 ports are used by usb2 devices and last 8 are used by usb3 devices. 2070 int port_num = procid * VHCI_PORTS + usb3 * VHCI_HC_PORTS + available_port_num; 2071 2072 // Under normal USB/IP usage, devid represents the device ID on the server. 2073 // When fuzzing with syzkaller we don't have an actual server or an actual device, so use 0 for devid. 2074 char buffer[100]; 2075 sprintf(buffer, "%d %d %s %d", port_num, client_fd, "0", speed); 2076 2077 write_file("/sys/devices/platform/vhci_hcd.0/attach", buffer); 2078 return server_fd; 2079 } 2080 2081 #endif 2082 2083 #if SYZ_EXECUTOR || __NR_syz_btf_id_by_name 2084 2085 #include <errno.h> 2086 #include <fcntl.h> 2087 #include <stdbool.h> 2088 #include <stddef.h> 2089 #include <stdio.h> 2090 #include <stdlib.h> 2091 #include <string.h> 2092 #include <sys/stat.h> 2093 #include <unistd.h> 2094 2095 // Some items in linux/btf.h are relatively new, so we copy them here for 2096 // backward compatibility. 2097 #define BTF_MAGIC 0xeB9F 2098 2099 struct btf_header { 2100 __u16 magic; 2101 __u8 version; 2102 __u8 flags; 2103 __u32 hdr_len; 2104 __u32 type_off; 2105 __u32 type_len; 2106 __u32 str_off; 2107 __u32 str_len; 2108 }; 2109 2110 #define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f) 2111 #define BTF_INFO_VLEN(info) ((info) & 0xffff) 2112 2113 #define BTF_KIND_INT 1 2114 #define BTF_KIND_ARRAY 3 2115 #define BTF_KIND_STRUCT 4 2116 #define BTF_KIND_UNION 5 2117 #define BTF_KIND_ENUM 6 2118 #define BTF_KIND_FUNC_PROTO 13 2119 #define BTF_KIND_VAR 14 2120 #define BTF_KIND_DATASEC 15 2121 2122 struct btf_type { 2123 __u32 name_off; 2124 __u32 info; 2125 union { 2126 __u32 size; 2127 __u32 type; 2128 }; 2129 }; 2130 2131 struct btf_enum { 2132 __u32 name_off; 2133 __s32 val; 2134 }; 2135 2136 struct btf_array { 2137 __u32 type; 2138 __u32 index_type; 2139 __u32 nelems; 2140 }; 2141 2142 struct btf_member { 2143 __u32 name_off; 2144 __u32 type; 2145 __u32 offset; 2146 }; 2147 2148 struct btf_param { 2149 __u32 name_off; 2150 __u32 type; 2151 }; 2152 2153 struct btf_var { 2154 __u32 linkage; 2155 }; 2156 2157 struct btf_var_secinfo { 2158 __u32 type; 2159 __u32 offset; 2160 __u32 size; 2161 }; 2162 2163 // Set the limit on the maximum size of btf/vmlinux to be 10 MiB. 2164 #define VMLINUX_MAX_SUPPORT_SIZE (10 * 1024 * 1024) 2165 2166 // Read out all the content of /sys/kernel/btf/vmlinux to the fixed address 2167 // buffer and return it. Return NULL if failed. 2168 static char* read_btf_vmlinux() 2169 { 2170 static bool is_read = false; 2171 static char buf[VMLINUX_MAX_SUPPORT_SIZE]; 2172 2173 // There could be a race condition here, but it should not be harmful. 2174 if (is_read) 2175 return buf; 2176 2177 int fd = open("/sys/kernel/btf/vmlinux", O_RDONLY); 2178 if (fd < 0) 2179 return NULL; 2180 2181 unsigned long bytes_read = 0; 2182 for (;;) { 2183 ssize_t ret = read(fd, buf + bytes_read, 2184 VMLINUX_MAX_SUPPORT_SIZE - bytes_read); 2185 2186 if (ret < 0 || bytes_read + ret == VMLINUX_MAX_SUPPORT_SIZE) 2187 return NULL; 2188 2189 if (ret == 0) 2190 break; 2191 2192 bytes_read += ret; 2193 } 2194 2195 is_read = true; 2196 return buf; 2197 } 2198 2199 // Given a pointer to a C-string as the only argument a0, return the 2200 // corresponding btf ID for this name. Return -1 if there is an error when 2201 // opening the vmlinux file or the name is not found in vmlinux. 2202 static long syz_btf_id_by_name(volatile long a0) 2203 { 2204 // syzlang: syz_btf_id_by_name(name ptr[in, string]) btf_id 2205 // C: syz_btf_id_by_name(char* name) 2206 char* target = (char*)a0; 2207 2208 char* vmlinux = read_btf_vmlinux(); 2209 if (vmlinux == NULL) 2210 return -1; 2211 2212 struct btf_header* btf_header = (struct btf_header*)vmlinux; 2213 if (btf_header->magic != BTF_MAGIC) 2214 return -1; 2215 // These offsets are bytes relative to the end of the header. 2216 char* btf_type_sec = vmlinux + btf_header->hdr_len + btf_header->type_off; 2217 char* btf_str_sec = vmlinux + btf_header->hdr_len + btf_header->str_off; 2218 // Scan through the btf type section, and find a type description that 2219 // matches the provided name. 2220 unsigned int bytes_parsed = 0; 2221 // BTF index starts at 1. 2222 long idx = 1; 2223 while (bytes_parsed < btf_header->type_len) { 2224 struct btf_type* btf_type = (struct btf_type*)(btf_type_sec + bytes_parsed); 2225 uint32 kind = BTF_INFO_KIND(btf_type->info); 2226 uint32 vlen = BTF_INFO_VLEN(btf_type->info); 2227 char* name = btf_str_sec + btf_type->name_off; 2228 2229 if (strcmp(name, target) == 0) 2230 return idx; 2231 2232 // From /include/uapi/linux/btf.h, some kinds of types are 2233 // followed by extra data. 2234 size_t skip; 2235 switch (kind) { 2236 case BTF_KIND_INT: 2237 skip = sizeof(uint32); 2238 break; 2239 case BTF_KIND_ENUM: 2240 skip = sizeof(struct btf_enum) * vlen; 2241 break; 2242 case BTF_KIND_ARRAY: 2243 skip = sizeof(struct btf_array); 2244 break; 2245 case BTF_KIND_STRUCT: 2246 case BTF_KIND_UNION: 2247 skip = sizeof(struct btf_member) * vlen; 2248 break; 2249 case BTF_KIND_FUNC_PROTO: 2250 skip = sizeof(struct btf_param) * vlen; 2251 break; 2252 case BTF_KIND_VAR: 2253 skip = sizeof(struct btf_var); 2254 break; 2255 case BTF_KIND_DATASEC: 2256 skip = sizeof(struct btf_var_secinfo) * vlen; 2257 break; 2258 default: 2259 skip = 0; 2260 } 2261 2262 bytes_parsed += sizeof(struct btf_type) + skip; 2263 idx++; 2264 } 2265 2266 return -1; 2267 } 2268 2269 #endif // SYZ_EXECUTOR || __NR_syz_btf_id_by_name 2270 2271 // Same as memcpy except that it accepts offset to dest and src. 2272 #if SYZ_EXECUTOR || __NR_syz_memcpy_off 2273 static long syz_memcpy_off(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4) 2274 { 2275 // C: syz_memcpy_off(void* dest, uint32 dest_off, void* src, uint32 src_off, size_t n) 2276 2277 // Cast to original 2278 char* dest = (char*)a0; 2279 uint32 dest_off = (uint32)a1; 2280 char* src = (char*)a2; 2281 uint32 src_off = (uint32)a3; 2282 size_t n = (size_t)a4; 2283 2284 return (long)memcpy(dest + dest_off, src + src_off, n); 2285 } 2286 #endif 2287 2288 #if SYZ_EXECUTOR || __NR_syz_create_resource 2289 // syz_create_resource(val intptr) intptr 2290 // Variants of this pseudo-syscall are used to create resources from arbitrary values. 2291 // For example: 2292 // syz_create_resource$foo(x int32) resource_foo 2293 // allows the fuzzer to use the same random int32 value in multiple syscalls, 2294 // and should increase probability of generation of syscalls related to foo. 2295 static long syz_create_resource(volatile long val) 2296 { 2297 return val; 2298 } 2299 #endif 2300 2301 #if (SYZ_EXECUTOR || SYZ_REPEAT && SYZ_NET_INJECTION) && SYZ_EXECUTOR_USES_FORK_SERVER 2302 static void flush_tun() 2303 { 2304 #if SYZ_EXECUTOR 2305 if (!flag_net_injection) 2306 return; 2307 #endif 2308 char data[1000]; 2309 while (read_tun(&data[0], sizeof(data)) != -1) { 2310 } 2311 } 2312 #endif 2313 2314 #if SYZ_EXECUTOR || __NR_syz_extract_tcp_res && SYZ_NET_INJECTION 2315 #ifndef __ANDROID__ 2316 // Can't include <linux/ipv6.h>, since it causes 2317 // conflicts due to some structs redefinition. 2318 struct ipv6hdr { 2319 __u8 priority : 4, 2320 version : 4; 2321 __u8 flow_lbl[3]; 2322 2323 __be16 payload_len; 2324 __u8 nexthdr; 2325 __u8 hop_limit; 2326 2327 struct in6_addr saddr; 2328 struct in6_addr daddr; 2329 }; 2330 #endif 2331 2332 struct tcp_resources { 2333 uint32 seq; 2334 uint32 ack; 2335 }; 2336 2337 static long syz_extract_tcp_res(volatile long a0, volatile long a1, volatile long a2) 2338 { 2339 // syz_extract_tcp_res(res ptr[out, tcp_resources], seq_inc int32, ack_inc int32) 2340 2341 if (tunfd < 0) 2342 return (uintptr_t)-1; 2343 2344 // We just need this to be large enough to hold headers that we parse (ethernet/ip/tcp). 2345 // Rest of the packet (if any) will be silently truncated which is fine. 2346 char data[1000]; 2347 int rv = read_tun(&data[0], sizeof(data)); 2348 if (rv == -1) 2349 return (uintptr_t)-1; 2350 size_t length = rv; 2351 debug_dump_data(data, length); 2352 2353 if (length < sizeof(struct ethhdr)) 2354 return (uintptr_t)-1; 2355 struct ethhdr* ethhdr = (struct ethhdr*)&data[0]; 2356 2357 struct tcphdr* tcphdr = 0; 2358 if (ethhdr->h_proto == htons(ETH_P_IP)) { 2359 if (length < sizeof(struct ethhdr) + sizeof(struct iphdr)) 2360 return (uintptr_t)-1; 2361 struct iphdr* iphdr = (struct iphdr*)&data[sizeof(struct ethhdr)]; 2362 if (iphdr->protocol != IPPROTO_TCP) 2363 return (uintptr_t)-1; 2364 if (length < sizeof(struct ethhdr) + iphdr->ihl * 4 + sizeof(struct tcphdr)) 2365 return (uintptr_t)-1; 2366 tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + iphdr->ihl * 4]; 2367 } else { 2368 if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr)) 2369 return (uintptr_t)-1; 2370 struct ipv6hdr* ipv6hdr = (struct ipv6hdr*)&data[sizeof(struct ethhdr)]; 2371 // TODO: parse and skip extension headers. 2372 if (ipv6hdr->nexthdr != IPPROTO_TCP) 2373 return (uintptr_t)-1; 2374 if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) 2375 return (uintptr_t)-1; 2376 tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr)]; 2377 } 2378 2379 struct tcp_resources* res = (struct tcp_resources*)a0; 2380 res->seq = htonl((ntohl(tcphdr->seq) + (uint32)a1)); 2381 res->ack = htonl((ntohl(tcphdr->ack_seq) + (uint32)a2)); 2382 2383 debug("extracted seq: %08x\n", res->seq); 2384 debug("extracted ack: %08x\n", res->ack); 2385 2386 return 0; 2387 } 2388 #endif 2389 2390 #if SYZ_EXECUTOR || SYZ_CLOSE_FDS || __NR_syz_usb_connect || __NR_syz_usb_connect_ath9k 2391 #define MAX_FDS 30 2392 #endif 2393 2394 #if SYZ_EXECUTOR || __NR_syz_usb_connect || __NR_syz_usb_connect_ath9k || \ 2395 __NR_syz_usb_ep_write || __NR_syz_usb_ep_read || __NR_syz_usb_control_io || \ 2396 __NR_syz_usb_disconnect 2397 #include <errno.h> 2398 #include <fcntl.h> 2399 #include <linux/usb/ch9.h> 2400 #include <stdarg.h> 2401 #include <stdbool.h> 2402 #include <stddef.h> 2403 #include <stdio.h> 2404 #include <sys/mount.h> 2405 #include <sys/stat.h> 2406 #include <sys/types.h> 2407 2408 #include "common_usb_linux.h" 2409 #endif 2410 2411 #if SYZ_EXECUTOR || __NR_syz_open_dev 2412 #include <fcntl.h> 2413 #include <string.h> 2414 #include <sys/stat.h> 2415 #include <sys/types.h> 2416 2417 static long syz_open_dev(volatile long a0, volatile long a1, volatile long a2) 2418 { 2419 if (a0 == 0xc || a0 == 0xb) { 2420 // syz_open_dev$char(dev const[0xc], major intptr, minor intptr) fd 2421 // syz_open_dev$block(dev const[0xb], major intptr, minor intptr) fd 2422 char buf[128]; 2423 sprintf(buf, "/dev/%s/%d:%d", a0 == 0xc ? "char" : "block", (uint8)a1, (uint8)a2); 2424 return open(buf, O_RDWR, 0); 2425 } else { 2426 // syz_open_dev(dev ptr[in, string["/dev/foo#"]], id intptr, flags flags[open_flags]) fd 2427 unsigned long nb = a1; 2428 char buf[1024]; 2429 char* hash; 2430 strncpy(buf, (char*)a0, sizeof(buf) - 1); 2431 buf[sizeof(buf) - 1] = 0; 2432 while ((hash = strchr(buf, '#'))) { 2433 *hash = '0' + (char)(nb % 10); // 10 devices should be enough for everyone. 2434 nb /= 10; 2435 } 2436 return open(buf, a2 & ~O_CREAT, 0); 2437 } 2438 } 2439 #endif 2440 2441 #if SYZ_EXECUTOR || __NR_syz_open_procfs 2442 #include <fcntl.h> 2443 #include <string.h> 2444 #include <sys/stat.h> 2445 #include <sys/types.h> 2446 2447 static long syz_open_procfs(volatile long a0, volatile long a1) 2448 { 2449 // syz_open_procfs(pid pid, file ptr[in, string[procfs_file]]) fd 2450 2451 char buf[128]; 2452 memset(buf, 0, sizeof(buf)); 2453 if (a0 == 0) { 2454 snprintf(buf, sizeof(buf), "/proc/self/%s", (char*)a1); 2455 } else if (a0 == -1) { 2456 snprintf(buf, sizeof(buf), "/proc/thread-self/%s", (char*)a1); 2457 } else { 2458 snprintf(buf, sizeof(buf), "/proc/self/task/%d/%s", (int)a0, (char*)a1); 2459 } 2460 int fd = open(buf, O_RDWR); 2461 if (fd == -1) 2462 fd = open(buf, O_RDONLY); 2463 return fd; 2464 } 2465 #endif 2466 2467 #if SYZ_EXECUTOR || __NR_syz_open_pts 2468 #include <fcntl.h> 2469 #include <sys/ioctl.h> 2470 #include <sys/stat.h> 2471 #include <sys/types.h> 2472 2473 static long syz_open_pts(volatile long a0, volatile long a1) 2474 { 2475 // syz_openpts(fd fd[tty], flags flags[open_flags]) fd[tty] 2476 int ptyno = 0; 2477 if (ioctl(a0, TIOCGPTN, &ptyno)) 2478 return -1; 2479 char buf[128]; 2480 sprintf(buf, "/dev/pts/%d", ptyno); 2481 return open(buf, a1, 0); 2482 } 2483 #endif 2484 2485 #if SYZ_EXECUTOR || __NR_syz_init_net_socket 2486 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID 2487 #include <fcntl.h> 2488 #include <sched.h> 2489 #include <sys/stat.h> 2490 #include <sys/types.h> 2491 #include <unistd.h> 2492 2493 // syz_init_net_socket opens a socket in init net namespace. 2494 // Used for families that can only be created in init net namespace. 2495 static long syz_init_net_socket(volatile long domain, volatile long type, volatile long proto) 2496 { 2497 int netns = open("/proc/self/ns/net", O_RDONLY); 2498 if (netns == -1) 2499 return netns; 2500 if (setns(kInitNetNsFd, 0)) 2501 return -1; 2502 int sock = syscall(__NR_socket, domain, type, proto); 2503 int err = errno; 2504 if (setns(netns, 0)) { 2505 // The operation may fail if the fd is closed by 2506 // a syscall from another thread. 2507 exitf("setns(netns) failed"); 2508 } 2509 close(netns); 2510 errno = err; 2511 return sock; 2512 } 2513 #else 2514 static long syz_init_net_socket(volatile long domain, volatile long type, volatile long proto) 2515 { 2516 return syscall(__NR_socket, domain, type, proto); 2517 } 2518 #endif 2519 #endif 2520 2521 #if SYZ_EXECUTOR || __NR_syz_socket_connect_nvme_tcp 2522 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE 2523 #include <arpa/inet.h> 2524 #include <fcntl.h> 2525 #include <netinet/in.h> 2526 #include <sched.h> 2527 #include <sys/socket.h> 2528 #include <sys/stat.h> 2529 #include <sys/types.h> 2530 #include <unistd.h> 2531 2532 static long syz_socket_connect_nvme_tcp() 2533 { 2534 struct sockaddr_in nvme_local_address; 2535 int netns = open("/proc/self/ns/net", O_RDONLY); 2536 if (netns == -1) 2537 return netns; 2538 if (setns(kInitNetNsFd, 0)) 2539 return -1; 2540 int sock = syscall(__NR_socket, AF_INET, SOCK_STREAM, 0x0); 2541 int err = errno; 2542 if (setns(netns, 0)) { 2543 // The operation may fail if the fd is closed by 2544 // a syscall from another thread. 2545 exitf("setns(netns) failed"); 2546 } 2547 close(netns); 2548 errno = err; 2549 // We only connect to an NVMe-oF/TCP server on 127.0.0.1:4420 2550 nvme_local_address.sin_family = AF_INET; 2551 nvme_local_address.sin_port = htobe16(4420); 2552 nvme_local_address.sin_addr.s_addr = htobe32(0x7f000001); 2553 err = syscall(__NR_connect, sock, &nvme_local_address, sizeof(nvme_local_address)); 2554 if (err != 0) { 2555 close(sock); 2556 return -1; 2557 } 2558 return sock; 2559 } 2560 #else 2561 static long syz_socket_connect_nvme_tcp() 2562 { 2563 return syscall(__NR_socket, -1, 0, 0); 2564 } 2565 #endif 2566 #endif 2567 2568 #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION 2569 #include <errno.h> 2570 #include <fcntl.h> 2571 #include <linux/rfkill.h> 2572 #include <pthread.h> 2573 #include <sys/epoll.h> 2574 #include <sys/ioctl.h> 2575 #include <sys/socket.h> 2576 #include <sys/uio.h> 2577 2578 #define BTPROTO_HCI 1 2579 #define ACL_LINK 1 2580 #define SCAN_PAGE 2 2581 2582 typedef struct { 2583 uint8 b[6]; 2584 } __attribute__((packed)) bdaddr_t; 2585 2586 #define HCI_COMMAND_PKT 1 2587 #define HCI_EVENT_PKT 4 2588 #define HCI_VENDOR_PKT 0xff 2589 2590 struct hci_command_hdr { 2591 uint16 opcode; 2592 uint8 plen; 2593 } __attribute__((packed)); 2594 2595 struct hci_event_hdr { 2596 uint8 evt; 2597 uint8 plen; 2598 } __attribute__((packed)); 2599 2600 #define HCI_EV_CONN_COMPLETE 0x03 2601 struct hci_ev_conn_complete { 2602 uint8 status; 2603 uint16 handle; 2604 bdaddr_t bdaddr; 2605 uint8 link_type; 2606 uint8 encr_mode; 2607 } __attribute__((packed)); 2608 2609 #define HCI_EV_CONN_REQUEST 0x04 2610 struct hci_ev_conn_request { 2611 bdaddr_t bdaddr; 2612 uint8 dev_class[3]; 2613 uint8 link_type; 2614 } __attribute__((packed)); 2615 2616 #define HCI_EV_REMOTE_FEATURES 0x0b 2617 struct hci_ev_remote_features { 2618 uint8 status; 2619 uint16 handle; 2620 uint8 features[8]; 2621 } __attribute__((packed)); 2622 2623 #define HCI_EV_CMD_COMPLETE 0x0e 2624 struct hci_ev_cmd_complete { 2625 uint8 ncmd; 2626 uint16 opcode; 2627 } __attribute__((packed)); 2628 2629 #define HCI_OP_WRITE_SCAN_ENABLE 0x0c1a 2630 2631 #define HCI_OP_READ_BUFFER_SIZE 0x1005 2632 struct hci_rp_read_buffer_size { 2633 uint8 status; 2634 uint16 acl_mtu; 2635 uint8 sco_mtu; 2636 uint16 acl_max_pkt; 2637 uint16 sco_max_pkt; 2638 } __attribute__((packed)); 2639 2640 #define HCI_OP_READ_BD_ADDR 0x1009 2641 struct hci_rp_read_bd_addr { 2642 uint8 status; 2643 bdaddr_t bdaddr; 2644 } __attribute__((packed)); 2645 2646 #define HCI_EV_LE_META 0x3e 2647 struct hci_ev_le_meta { 2648 uint8 subevent; 2649 } __attribute__((packed)); 2650 2651 #define HCI_EV_LE_CONN_COMPLETE 0x01 2652 struct hci_ev_le_conn_complete { 2653 uint8 status; 2654 uint16 handle; 2655 uint8 role; 2656 uint8 bdaddr_type; 2657 bdaddr_t bdaddr; 2658 uint16 interval; 2659 uint16 latency; 2660 uint16 supervision_timeout; 2661 uint8 clk_accurancy; 2662 } __attribute__((packed)); 2663 2664 struct hci_dev_req { 2665 uint16 dev_id; 2666 uint32 dev_opt; 2667 }; 2668 2669 struct vhci_vendor_pkt_request { 2670 uint8 type; 2671 uint8 opcode; 2672 } __attribute__((packed)); 2673 2674 struct vhci_pkt { 2675 uint8 type; 2676 union { 2677 struct { 2678 uint8 opcode; 2679 uint16 id; 2680 } __attribute__((packed)) vendor_pkt; 2681 struct hci_command_hdr command_hdr; 2682 }; 2683 } __attribute__((packed)); 2684 2685 #define HCIDEVUP _IOW('H', 201, int) 2686 #define HCISETSCAN _IOW('H', 221, int) 2687 2688 static int vhci_fd = -1; 2689 2690 static void rfkill_unblock_all() 2691 { 2692 int fd = open("/dev/rfkill", O_WRONLY); 2693 if (fd < 0) 2694 fail("open /dev/rfkill failed"); 2695 struct rfkill_event event = {0}; 2696 event.idx = 0; 2697 event.type = RFKILL_TYPE_ALL; 2698 event.op = RFKILL_OP_CHANGE_ALL; 2699 event.soft = 0; 2700 event.hard = 0; 2701 if (write(fd, &event, sizeof(event)) < 0) 2702 fail("write rfkill event failed"); 2703 close(fd); 2704 } 2705 2706 static void hci_send_event_packet(int fd, uint8 evt, void* data, size_t data_len) 2707 { 2708 struct iovec iv[3]; 2709 2710 struct hci_event_hdr hdr; 2711 hdr.evt = evt; 2712 hdr.plen = data_len; 2713 2714 uint8 type = HCI_EVENT_PKT; 2715 2716 iv[0].iov_base = &type; 2717 iv[0].iov_len = sizeof(type); 2718 iv[1].iov_base = &hdr; 2719 iv[1].iov_len = sizeof(hdr); 2720 iv[2].iov_base = data; 2721 iv[2].iov_len = data_len; 2722 2723 if (writev(fd, iv, sizeof(iv) / sizeof(struct iovec)) < 0) 2724 fail("writev failed"); 2725 } 2726 2727 static void hci_send_event_cmd_complete(int fd, uint16 opcode, void* data, size_t data_len) 2728 { 2729 struct iovec iv[4]; 2730 2731 struct hci_event_hdr hdr; 2732 hdr.evt = HCI_EV_CMD_COMPLETE; 2733 hdr.plen = sizeof(struct hci_ev_cmd_complete) + data_len; 2734 2735 struct hci_ev_cmd_complete evt_hdr; 2736 evt_hdr.ncmd = 1; 2737 evt_hdr.opcode = opcode; 2738 2739 uint8 type = HCI_EVENT_PKT; 2740 2741 iv[0].iov_base = &type; 2742 iv[0].iov_len = sizeof(type); 2743 iv[1].iov_base = &hdr; 2744 iv[1].iov_len = sizeof(hdr); 2745 iv[2].iov_base = &evt_hdr; 2746 iv[2].iov_len = sizeof(evt_hdr); 2747 iv[3].iov_base = data; 2748 iv[3].iov_len = data_len; 2749 2750 if (writev(fd, iv, sizeof(iv) / sizeof(struct iovec)) < 0) 2751 fail("writev failed"); 2752 } 2753 2754 static bool process_command_pkt(int fd, char* buf, ssize_t buf_size) 2755 { 2756 struct hci_command_hdr* hdr = (struct hci_command_hdr*)buf; 2757 if (buf_size < (ssize_t)sizeof(struct hci_command_hdr) || 2758 hdr->plen != buf_size - sizeof(struct hci_command_hdr)) 2759 failmsg("process_command_pkt: invalid size", "suze=%zx", buf_size); 2760 2761 switch (hdr->opcode) { 2762 case HCI_OP_WRITE_SCAN_ENABLE: { 2763 uint8 status = 0; 2764 hci_send_event_cmd_complete(fd, hdr->opcode, &status, sizeof(status)); 2765 return true; 2766 } 2767 case HCI_OP_READ_BD_ADDR: { 2768 struct hci_rp_read_bd_addr rp = {0}; 2769 rp.status = 0; 2770 memset(&rp.bdaddr, 0xaa, 6); 2771 hci_send_event_cmd_complete(fd, hdr->opcode, &rp, sizeof(rp)); 2772 return false; 2773 } 2774 case HCI_OP_READ_BUFFER_SIZE: { 2775 struct hci_rp_read_buffer_size rp = {0}; 2776 rp.status = 0; 2777 rp.acl_mtu = 1021; 2778 rp.sco_mtu = 96; 2779 rp.acl_max_pkt = 4; 2780 rp.sco_max_pkt = 6; 2781 hci_send_event_cmd_complete(fd, hdr->opcode, &rp, sizeof(rp)); 2782 return false; 2783 } 2784 } 2785 2786 char dummy[0xf9] = {0}; 2787 hci_send_event_cmd_complete(fd, hdr->opcode, dummy, sizeof(dummy)); 2788 return false; 2789 } 2790 2791 static void* event_thread(void* arg) 2792 { 2793 while (1) { 2794 char buf[1024] = {0}; 2795 ssize_t buf_size = read(vhci_fd, buf, sizeof(buf)); 2796 if (buf_size < 0) 2797 fail("read failed"); 2798 debug_dump_data(buf, buf_size); 2799 if (buf_size > 0 && buf[0] == HCI_COMMAND_PKT) { 2800 if (process_command_pkt(vhci_fd, buf + 1, buf_size - 1)) 2801 break; 2802 } 2803 } 2804 return NULL; 2805 } 2806 2807 // Matches hci_handles in sys/linux/dev_vhci.txt. 2808 #define HCI_HANDLE_1 200 2809 #define HCI_HANDLE_2 201 2810 2811 #define HCI_PRIMARY 0 2812 #define HCI_OP_RESET 0x0c03 2813 2814 static void initialize_vhci() 2815 { 2816 #if SYZ_EXECUTOR 2817 if (!flag_vhci_injection) 2818 return; 2819 #endif 2820 2821 int hci_sock = socket(AF_BLUETOOTH, SOCK_RAW, BTPROTO_HCI); 2822 if (hci_sock < 0) 2823 fail("socket(AF_BLUETOOTH, SOCK_RAW, BTPROTO_HCI) failed"); 2824 2825 vhci_fd = open("/dev/vhci", O_RDWR); 2826 if (vhci_fd == -1) 2827 fail("open /dev/vhci failed"); 2828 2829 // Remap vhci onto higher fd number to hide it from fuzzer and to keep 2830 // fd numbers stable regardless of whether vhci is opened or not (also see kMaxFd). 2831 const int kVhciFd = 202; 2832 if (dup2(vhci_fd, kVhciFd) < 0) 2833 fail("dup2(vhci_fd, kVhciFd) failed"); 2834 close(vhci_fd); 2835 vhci_fd = kVhciFd; 2836 2837 struct vhci_vendor_pkt_request vendor_pkt_req = {HCI_VENDOR_PKT, HCI_PRIMARY}; 2838 if (write(vhci_fd, &vendor_pkt_req, sizeof(vendor_pkt_req)) != sizeof(vendor_pkt_req)) 2839 fail("vendor_pkt_req write failed"); 2840 2841 struct vhci_pkt vhci_pkt; 2842 if (read(vhci_fd, &vhci_pkt, sizeof(vhci_pkt)) != sizeof(vhci_pkt)) 2843 fail("vhci_pkt read failed"); 2844 2845 if (vhci_pkt.type == HCI_COMMAND_PKT && vhci_pkt.command_hdr.opcode == HCI_OP_RESET) { 2846 char response[1] = {0}; 2847 hci_send_event_cmd_complete(vhci_fd, HCI_OP_RESET, response, sizeof(response)); 2848 2849 if (read(vhci_fd, &vhci_pkt, sizeof(vhci_pkt)) != sizeof(vhci_pkt)) 2850 fail("vhci_pkt read failed"); 2851 } 2852 2853 if (vhci_pkt.type != HCI_VENDOR_PKT) 2854 fail("wrong response packet"); 2855 2856 int dev_id = vhci_pkt.vendor_pkt.id; 2857 debug("hci dev id: %x\n", dev_id); 2858 2859 pthread_t th; 2860 if (pthread_create(&th, NULL, event_thread, NULL)) 2861 fail("pthread_create failed"); 2862 2863 // Bring hci device up 2864 int ret = ioctl(hci_sock, HCIDEVUP, dev_id); 2865 if (ret) { 2866 if (errno == ERFKILL) { 2867 rfkill_unblock_all(); 2868 ret = ioctl(hci_sock, HCIDEVUP, dev_id); 2869 } 2870 2871 if (ret && errno != EALREADY) 2872 fail("ioctl(HCIDEVUP) failed"); 2873 } 2874 2875 // Activate page scanning mode which is required to fake a connection. 2876 struct hci_dev_req dr = {0}; 2877 dr.dev_id = dev_id; 2878 dr.dev_opt = SCAN_PAGE; 2879 if (ioctl(hci_sock, HCISETSCAN, &dr)) 2880 fail("ioctl(HCISETSCAN) failed"); 2881 2882 // Fake a connection with bd address 10:aa:aa:aa:aa:aa. 2883 // This is a fixed address used in sys/linux/socket_bluetooth.txt. 2884 struct hci_ev_conn_request request; 2885 memset(&request, 0, sizeof(request)); 2886 memset(&request.bdaddr, 0xaa, 6); 2887 *(uint8*)&request.bdaddr.b[5] = 0x10; 2888 request.link_type = ACL_LINK; 2889 hci_send_event_packet(vhci_fd, HCI_EV_CONN_REQUEST, &request, sizeof(request)); 2890 2891 struct hci_ev_conn_complete complete; 2892 memset(&complete, 0, sizeof(complete)); 2893 complete.status = 0; 2894 complete.handle = HCI_HANDLE_1; 2895 memset(&complete.bdaddr, 0xaa, 6); 2896 *(uint8*)&complete.bdaddr.b[5] = 0x10; 2897 complete.link_type = ACL_LINK; 2898 complete.encr_mode = 0; 2899 hci_send_event_packet(vhci_fd, HCI_EV_CONN_COMPLETE, &complete, sizeof(complete)); 2900 2901 struct hci_ev_remote_features features; 2902 memset(&features, 0, sizeof(features)); 2903 features.status = 0; 2904 features.handle = HCI_HANDLE_1; 2905 hci_send_event_packet(vhci_fd, HCI_EV_REMOTE_FEATURES, &features, sizeof(features)); 2906 2907 // Fake a low-energy connection with bd address 11:aa:aa:aa:aa:aa. 2908 // This is a fixed address used in sys/linux/socket_bluetooth.txt. 2909 struct { 2910 struct hci_ev_le_meta le_meta; 2911 struct hci_ev_le_conn_complete le_conn; 2912 } le_conn; 2913 memset(&le_conn, 0, sizeof(le_conn)); 2914 le_conn.le_meta.subevent = HCI_EV_LE_CONN_COMPLETE; 2915 memset(&le_conn.le_conn.bdaddr, 0xaa, 6); 2916 *(uint8*)&le_conn.le_conn.bdaddr.b[5] = 0x11; 2917 le_conn.le_conn.role = 1; 2918 le_conn.le_conn.handle = HCI_HANDLE_2; 2919 hci_send_event_packet(vhci_fd, HCI_EV_LE_META, &le_conn, sizeof(le_conn)); 2920 2921 pthread_join(th, NULL); 2922 close(hci_sock); 2923 } 2924 #endif 2925 2926 #if SYZ_EXECUTOR || __NR_syz_emit_vhci && SYZ_VHCI_INJECTION 2927 static long syz_emit_vhci(volatile long a0, volatile long a1) 2928 { 2929 if (vhci_fd < 0) 2930 return (uintptr_t)-1; 2931 2932 char* data = (char*)a0; 2933 uint32 length = a1; 2934 2935 return write(vhci_fd, data, length); 2936 } 2937 #endif 2938 2939 #if SYZ_EXECUTOR || __NR_syz_genetlink_get_family_id 2940 #include <errno.h> 2941 #include <sys/socket.h> 2942 2943 static long syz_genetlink_get_family_id(volatile long name, volatile long sock_arg) 2944 { 2945 debug("syz_genetlink_get_family_id(%s, %d)\n", (char*)name, (int)sock_arg); 2946 int fd = sock_arg; 2947 if (fd < 0) { 2948 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 2949 if (fd == -1) { 2950 debug("syz_genetlink_get_family_id: socket failed: %d\n", errno); 2951 return -1; 2952 } 2953 } 2954 struct nlmsg nlmsg_tmp; 2955 int ret = netlink_query_family_id(&nlmsg_tmp, fd, (char*)name, false); 2956 if ((int)sock_arg < 0) 2957 close(fd); 2958 if (ret < 0) { 2959 debug("syz_genetlink_get_family_id: netlink_query_family_id failed: %d\n", ret); 2960 return -1; 2961 } 2962 2963 return ret; 2964 } 2965 #endif 2966 2967 #if SYZ_EXECUTOR || __NR_syz_mount_image || __NR_syz_read_part_table 2968 #include "common_zlib.h" 2969 #include <errno.h> 2970 #include <fcntl.h> 2971 #include <linux/loop.h> 2972 #include <stdbool.h> 2973 #include <sys/ioctl.h> 2974 #include <sys/stat.h> 2975 #include <sys/types.h> 2976 2977 // Setup the loop device needed for mounting a filesystem image. Takes care of 2978 // creating and initializing the underlying file backing the loop device and 2979 // returns the fds to the file and device. 2980 // Returns 0 on success, -1 otherwise. 2981 static int setup_loop_device(unsigned char* data, unsigned long size, const char* loopname, int* loopfd_p) 2982 { 2983 int err = 0, loopfd = -1; 2984 int memfd = syscall(__NR_memfd_create, "syzkaller", 0); 2985 if (memfd == -1) { 2986 err = errno; 2987 goto error; 2988 } 2989 if (puff_zlib_to_file(data, size, memfd)) { 2990 err = errno; 2991 debug("setup_loop_device: could not decompress data: %d\n", errno); 2992 goto error_close_memfd; 2993 } 2994 2995 loopfd = open(loopname, O_RDWR); 2996 if (loopfd == -1) { 2997 err = errno; 2998 debug("setup_loop_device: open failed: %d\n", errno); 2999 goto error_close_memfd; 3000 } 3001 if (ioctl(loopfd, LOOP_SET_FD, memfd)) { 3002 if (errno != EBUSY) { 3003 err = errno; 3004 goto error_close_loop; 3005 } 3006 ioctl(loopfd, LOOP_CLR_FD, 0); 3007 usleep(1000); 3008 if (ioctl(loopfd, LOOP_SET_FD, memfd)) { 3009 err = errno; 3010 goto error_close_loop; 3011 } 3012 } 3013 3014 close(memfd); 3015 *loopfd_p = loopfd; 3016 return 0; 3017 3018 error_close_loop: 3019 close(loopfd); 3020 error_close_memfd: 3021 close(memfd); 3022 error: 3023 errno = err; 3024 return -1; 3025 } 3026 3027 #if SYZ_EXECUTOR || __NR_syz_mount_image 3028 3029 static void reset_loop_device(const char* loopname) 3030 { 3031 int loopfd = open(loopname, O_RDWR); 3032 if (loopfd == -1) { 3033 debug("reset_loop_device: open failed: %d\n", errno); 3034 return; 3035 } 3036 if (ioctl(loopfd, LOOP_CLR_FD, 0)) { 3037 debug("reset_loop_device: LOOP_CLR_FD failed: %d\n", errno); 3038 } 3039 close(loopfd); 3040 } 3041 3042 #endif 3043 3044 #endif 3045 3046 #if SYZ_EXECUTOR || __NR_syz_read_part_table 3047 // syz_read_part_table(size len[img], img ptr[in, compressed_image]) 3048 static long syz_read_part_table(volatile unsigned long size, volatile long image) 3049 { 3050 unsigned char* data = (unsigned char*)image; 3051 int err = 0, res = -1, loopfd = -1; 3052 char loopname[64]; 3053 3054 snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid); 3055 if (setup_loop_device(data, size, loopname, &loopfd) == -1) 3056 return -1; 3057 3058 struct loop_info64 info; 3059 if (ioctl(loopfd, LOOP_GET_STATUS64, &info)) { 3060 err = errno; 3061 goto error_clear_loop; 3062 } 3063 #if SYZ_EXECUTOR 3064 cover_reset(0); 3065 #endif 3066 info.lo_flags |= LO_FLAGS_PARTSCAN; 3067 if (ioctl(loopfd, LOOP_SET_STATUS64, &info)) { 3068 err = errno; 3069 goto error_clear_loop; 3070 } 3071 res = 0; 3072 // If we managed to parse some partitions, symlink them into our work dir. 3073 for (unsigned long i = 1, j = 0; i < 8; i++) { 3074 snprintf(loopname, sizeof(loopname), "/dev/loop%llup%d", procid, (int)i); 3075 struct stat statbuf; 3076 if (stat(loopname, &statbuf) == 0) { 3077 char linkname[64]; 3078 snprintf(linkname, sizeof(linkname), "./file%d", (int)j++); 3079 if (symlink(loopname, linkname)) { 3080 debug("syz_read_part_table: symlink(%s, %s) failed: %d\n", loopname, linkname, errno); 3081 } 3082 } 3083 } 3084 error_clear_loop: 3085 if (res) 3086 ioctl(loopfd, LOOP_CLR_FD, 0); 3087 close(loopfd); 3088 errno = err; 3089 return res; 3090 } 3091 #endif 3092 3093 #if SYZ_EXECUTOR || __NR_syz_mount_image 3094 #include <stddef.h> 3095 #include <string.h> 3096 #include <sys/mount.h> 3097 3098 // syz_mount_image( 3099 // fs ptr[in, string[fs]], 3100 // dir ptr[in, filename], 3101 // flags flags[mount_flags], 3102 // opts ptr[in, fs_options], 3103 // chdir bool8, 3104 // size len[img], 3105 // img ptr[in, compressed_image] 3106 // ) fd_dir 3107 static long syz_mount_image( 3108 volatile long fsarg, 3109 volatile long dir, 3110 volatile long flags, 3111 volatile long optsarg, 3112 volatile long change_dir, 3113 volatile unsigned long size, 3114 volatile long image) 3115 { 3116 unsigned char* data = (unsigned char*)image; 3117 int res = -1, err = 0, need_loop_device = !!size; 3118 char* mount_opts = (char*)optsarg; 3119 char* target = (char*)dir; 3120 char* fs = (char*)fsarg; 3121 char* source = NULL; 3122 char loopname[64]; 3123 3124 if (need_loop_device) { 3125 int loopfd; 3126 // Some filesystems (e.g. FUSE) do not need a backing device or 3127 // filesystem image. 3128 memset(loopname, 0, sizeof(loopname)); 3129 snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid); 3130 if (setup_loop_device(data, size, loopname, &loopfd) == -1) 3131 return -1; 3132 // If BLK_DEV_WRITE_MOUNTED is set, we won't be able to mount() 3133 // while holding the loop device fd. 3134 close(loopfd); 3135 source = loopname; 3136 } 3137 3138 mkdir(target, 0777); 3139 char opts[256]; 3140 memset(opts, 0, sizeof(opts)); 3141 // Leave some space for the additional options we append below. 3142 if (strlen(mount_opts) > (sizeof(opts) - 32)) { 3143 debug("ERROR: syz_mount_image parameter optsarg bigger than internal opts\n"); 3144 } 3145 strncpy(opts, mount_opts, sizeof(opts) - 32); 3146 if (strcmp(fs, "iso9660") == 0) { 3147 flags |= MS_RDONLY; 3148 } else if (strncmp(fs, "ext", 3) == 0) { 3149 // For ext2/3/4 we have to have errors=continue because the image 3150 // can contain errors=panic flag and can legally crash kernel. 3151 bool has_remount_ro = false; 3152 char* remount_ro_start = strstr(opts, "errors=remount-ro"); 3153 if (remount_ro_start != NULL) { 3154 // syzkaller can sometimes break the options format, so we have to make sure this option can really be parsed. 3155 char after = *(remount_ro_start + strlen("errors=remount-ro")); 3156 char before = remount_ro_start == opts ? '\0' : *(remount_ro_start - 1); 3157 has_remount_ro = ((before == '\0' || before == ',') && (after == '\0' || after == ',')); 3158 } 3159 if (strstr(opts, "errors=panic") || !has_remount_ro) 3160 strcat(opts, ",errors=continue"); 3161 } else if (strcmp(fs, "xfs") == 0) { 3162 // For xfs we need nouuid because xfs has a global uuids table 3163 // and if two parallel executors mounts fs with the same uuid, second mount fails. 3164 strcat(opts, ",nouuid"); 3165 } else if (strncmp(fs, "gfs2", 4) == 0 && (strstr(opts, "errors=panic") || strstr(opts, "debug"))) { 3166 // Otherwise ordinary withdrawals turn into kernel panics, see #6189. 3167 strcat(opts, ",errors=withdraw"); 3168 } 3169 debug("syz_mount_image: size=%llu loop='%s' dir='%s' fs='%s' flags=%llu opts='%s'\n", (uint64)size, loopname, target, fs, (uint64)flags, opts); 3170 #if SYZ_EXECUTOR 3171 cover_reset(0); 3172 #endif 3173 res = mount(source, target, fs, flags, opts); 3174 if (res == -1) { 3175 debug("syz_mount_image > mount error: %d\n", errno); 3176 err = errno; 3177 goto error_clear_loop; 3178 } 3179 res = open(target, O_RDONLY | O_DIRECTORY); 3180 if (res == -1) { 3181 debug("syz_mount_image > open error: %d\n", errno); 3182 err = errno; 3183 goto error_clear_loop; 3184 } 3185 if (change_dir) { 3186 res = chdir(target); 3187 if (res == -1) { 3188 debug("syz_mount_image > chdir error: %d\n", errno); 3189 err = errno; 3190 } 3191 } 3192 3193 error_clear_loop: 3194 if (need_loop_device) 3195 reset_loop_device(loopname); 3196 errno = err; 3197 return res; 3198 } 3199 #endif 3200 3201 #if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu || __NR_syz_kvm_vgic_v3_setup || __NR_syz_kvm_setup_syzos_vm || __NR_syz_kvm_add_vcpu || __NR_syz_kvm_assert_syzos_uexit || __NR_syz_kvm_assert_reg || __NR_syz_kvm_assert_syzos_kvm_exit 3202 // KVM is not yet supported on RISC-V 3203 #if !GOARCH_riscv64 && !GOARCH_arm 3204 #include <errno.h> 3205 #include <fcntl.h> 3206 #include <linux/kvm.h> 3207 #include <stdarg.h> 3208 #include <stddef.h> 3209 #include <sys/ioctl.h> 3210 #include <sys/stat.h> 3211 3212 #if GOARCH_amd64 3213 #include "common_kvm_amd64.h" 3214 #elif GOARCH_386 3215 #include "common_kvm_386.h" 3216 #elif GOARCH_arm64 3217 #include "common_kvm_arm64.h" 3218 #elif GOARCH_ppc64 || GOARCH_ppc64le 3219 #include "common_kvm_ppc64.h" 3220 #elif !GOARCH_arm && (SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu) 3221 static volatile long syz_kvm_setup_cpu(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4, volatile long a5, volatile long a6, volatile long a7) 3222 { 3223 return 0; 3224 } 3225 #endif 3226 #endif 3227 #endif 3228 3229 #if (SYZ_EXECUTOR || SYZ_NET_RESET) && SYZ_EXECUTOR_USES_FORK_SERVER 3230 #include <errno.h> 3231 #include <net/if.h> 3232 #include <netinet/in.h> 3233 #include <string.h> 3234 #include <sys/socket.h> 3235 3236 #include <linux/net.h> 3237 3238 // checkpoint/reset_net_namespace partially resets net namespace to initial state 3239 // after each test. Currently it resets only ipv4 netfilter state. 3240 // Ideally, we just create a new net namespace for each test, 3241 // however it's too slow (1-1.5 seconds per namespace, not parallelizable). 3242 3243 // Linux headers do not compile for C++, so we have to define the structs manualy. 3244 #define XT_TABLE_SIZE 1536 3245 #define XT_MAX_ENTRIES 10 3246 3247 struct xt_counters { 3248 uint64 pcnt, bcnt; 3249 }; 3250 3251 struct ipt_getinfo { 3252 char name[32]; 3253 unsigned int valid_hooks; 3254 unsigned int hook_entry[5]; 3255 unsigned int underflow[5]; 3256 unsigned int num_entries; 3257 unsigned int size; 3258 }; 3259 3260 struct ipt_get_entries { 3261 char name[32]; 3262 unsigned int size; 3263 uint64 entrytable[XT_TABLE_SIZE / sizeof(uint64)]; 3264 }; 3265 3266 struct ipt_replace { 3267 char name[32]; 3268 unsigned int valid_hooks; 3269 unsigned int num_entries; 3270 unsigned int size; 3271 unsigned int hook_entry[5]; 3272 unsigned int underflow[5]; 3273 unsigned int num_counters; 3274 struct xt_counters* counters; 3275 uint64 entrytable[XT_TABLE_SIZE / sizeof(uint64)]; 3276 }; 3277 3278 struct ipt_table_desc { 3279 const char* name; 3280 struct ipt_getinfo info; 3281 struct ipt_replace replace; 3282 }; 3283 3284 static struct ipt_table_desc ipv4_tables[] = { 3285 {.name = "filter"}, 3286 {.name = "nat"}, 3287 {.name = "mangle"}, 3288 {.name = "raw"}, 3289 {.name = "security"}, 3290 }; 3291 3292 static struct ipt_table_desc ipv6_tables[] = { 3293 {.name = "filter"}, 3294 {.name = "nat"}, 3295 {.name = "mangle"}, 3296 {.name = "raw"}, 3297 {.name = "security"}, 3298 }; 3299 3300 #define IPT_BASE_CTL 64 3301 #define IPT_SO_SET_REPLACE (IPT_BASE_CTL) 3302 #define IPT_SO_GET_INFO (IPT_BASE_CTL) 3303 #define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1) 3304 3305 struct arpt_getinfo { 3306 char name[32]; 3307 unsigned int valid_hooks; 3308 unsigned int hook_entry[3]; 3309 unsigned int underflow[3]; 3310 unsigned int num_entries; 3311 unsigned int size; 3312 }; 3313 3314 struct arpt_get_entries { 3315 char name[32]; 3316 unsigned int size; 3317 uint64 entrytable[XT_TABLE_SIZE / sizeof(uint64)]; 3318 }; 3319 3320 struct arpt_replace { 3321 char name[32]; 3322 unsigned int valid_hooks; 3323 unsigned int num_entries; 3324 unsigned int size; 3325 unsigned int hook_entry[3]; 3326 unsigned int underflow[3]; 3327 unsigned int num_counters; 3328 struct xt_counters* counters; 3329 uint64 entrytable[XT_TABLE_SIZE / sizeof(uint64)]; 3330 }; 3331 3332 struct arpt_table_desc { 3333 const char* name; 3334 struct arpt_getinfo info; 3335 struct arpt_replace replace; 3336 }; 3337 3338 static struct arpt_table_desc arpt_tables[] = { 3339 {.name = "filter"}, 3340 }; 3341 3342 #define ARPT_BASE_CTL 96 3343 #define ARPT_SO_SET_REPLACE (ARPT_BASE_CTL) 3344 #define ARPT_SO_GET_INFO (ARPT_BASE_CTL) 3345 #define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1) 3346 3347 static void checkpoint_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level) 3348 { 3349 int fd = socket(family, SOCK_STREAM, IPPROTO_TCP); 3350 if (fd == -1) { 3351 switch (errno) { 3352 case EAFNOSUPPORT: 3353 case ENOPROTOOPT: 3354 // ENOENT can be returned if smack lsm is used. Smack tried to aplly netlbl to created sockets, 3355 // but the fuzzer can manage to remove netlbl entry for SOCK_STREAM/IPPROTO_TCP using 3356 // NLBL_MGMT_C_REMOVE, which is unfortunately global (not part of net namespace). In this state 3357 // creation of such sockets will fail all the time in all processes (so in some sense the machine 3358 // is indeed broken), but ignoring the error is still probably the best option given we allow 3359 // the fuzzer to invoke NLBL_MGMT_C_REMOVE in the first place. 3360 case ENOENT: 3361 return; 3362 } 3363 failmsg("iptable checkpoint: socket(SOCK_STREAM, IPPROTO_TCP) failed", "family=%d", family); 3364 } 3365 for (int i = 0; i < num_tables; i++) { 3366 struct ipt_table_desc* table = &tables[i]; 3367 strcpy(table->info.name, table->name); 3368 strcpy(table->replace.name, table->name); 3369 socklen_t optlen = sizeof(table->info); 3370 if (getsockopt(fd, level, IPT_SO_GET_INFO, &table->info, &optlen)) { 3371 switch (errno) { 3372 case EPERM: 3373 case ENOENT: 3374 case ENOPROTOOPT: 3375 continue; 3376 } 3377 failmsg("iptable checkpoint: getsockopt(IPT_SO_GET_INFO) failed", 3378 "table=%s, family=%d", table->name, family); 3379 } 3380 debug("iptable checkpoint %s/%d: checkpoint entries=%d hooks=%x size=%d\n", 3381 table->name, family, table->info.num_entries, 3382 table->info.valid_hooks, table->info.size); 3383 if (table->info.size > sizeof(table->replace.entrytable)) 3384 failmsg("iptable checkpoint: table size is too large", "table=%s, family=%d, size=%u", 3385 table->name, family, table->info.size); 3386 if (table->info.num_entries > XT_MAX_ENTRIES) 3387 failmsg("iptable checkpoint: too many counters", "table=%s, family=%d, counters=%d", 3388 table->name, family, table->info.num_entries); 3389 struct ipt_get_entries entries; 3390 memset(&entries, 0, sizeof(entries)); 3391 strcpy(entries.name, table->name); 3392 entries.size = table->info.size; 3393 optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size; 3394 if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen)) 3395 failmsg("iptable checkpoint: getsockopt(IPT_SO_GET_ENTRIES) failed", 3396 "table=%s, family=%d", table->name, family); 3397 table->replace.valid_hooks = table->info.valid_hooks; 3398 table->replace.num_entries = table->info.num_entries; 3399 table->replace.size = table->info.size; 3400 memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry)); 3401 memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow)); 3402 memcpy(table->replace.entrytable, entries.entrytable, table->info.size); 3403 } 3404 close(fd); 3405 } 3406 3407 static void reset_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level) 3408 { 3409 int fd = socket(family, SOCK_STREAM, IPPROTO_TCP); 3410 if (fd == -1) { 3411 switch (errno) { 3412 case EAFNOSUPPORT: 3413 case ENOPROTOOPT: 3414 case ENOENT: 3415 return; 3416 } 3417 failmsg("iptable: socket(SOCK_STREAM, IPPROTO_TCP) failed", "family=%d", family); 3418 } 3419 for (int i = 0; i < num_tables; i++) { 3420 struct ipt_table_desc* table = &tables[i]; 3421 if (table->info.valid_hooks == 0) 3422 continue; 3423 struct ipt_getinfo info; 3424 memset(&info, 0, sizeof(info)); 3425 strcpy(info.name, table->name); 3426 socklen_t optlen = sizeof(info); 3427 if (getsockopt(fd, level, IPT_SO_GET_INFO, &info, &optlen)) 3428 failmsg("iptable: getsockopt(IPT_SO_GET_INFO) failed", 3429 "table=%s, family=%d", table->name, family); 3430 if (memcmp(&table->info, &info, sizeof(table->info)) == 0) { 3431 struct ipt_get_entries entries; 3432 memset(&entries, 0, sizeof(entries)); 3433 strcpy(entries.name, table->name); 3434 entries.size = table->info.size; 3435 optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size; 3436 if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen)) 3437 failmsg("iptable: getsockopt(IPT_SO_GET_ENTRIES) failed", 3438 "table=%s, family=%d", table->name, family); 3439 if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0) 3440 continue; 3441 } 3442 debug("iptable %s/%d: resetting\n", table->name, family); 3443 struct xt_counters counters[XT_MAX_ENTRIES]; 3444 table->replace.num_counters = info.num_entries; 3445 table->replace.counters = counters; 3446 optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size; 3447 if (setsockopt(fd, level, IPT_SO_SET_REPLACE, &table->replace, optlen)) 3448 failmsg("iptable: setsockopt(IPT_SO_SET_REPLACE) failed", 3449 "table=%s, family=%d", table->name, family); 3450 } 3451 close(fd); 3452 } 3453 3454 static void checkpoint_arptables(void) 3455 { 3456 int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 3457 if (fd == -1) { 3458 switch (errno) { 3459 case EAFNOSUPPORT: 3460 case ENOPROTOOPT: 3461 case ENOENT: 3462 return; 3463 } 3464 fail("arptable checkpoint: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP) failed"); 3465 } 3466 for (unsigned i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) { 3467 struct arpt_table_desc* table = &arpt_tables[i]; 3468 strcpy(table->info.name, table->name); 3469 strcpy(table->replace.name, table->name); 3470 socklen_t optlen = sizeof(table->info); 3471 if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &table->info, &optlen)) { 3472 switch (errno) { 3473 case EPERM: 3474 case ENOENT: 3475 case ENOPROTOOPT: 3476 continue; 3477 } 3478 failmsg("arptable checkpoint: getsockopt(ARPT_SO_GET_INFO) failed", "table=%s", table->name); 3479 } 3480 debug("arptable checkpoint %s: entries=%d hooks=%x size=%d\n", 3481 table->name, table->info.num_entries, table->info.valid_hooks, table->info.size); 3482 if (table->info.size > sizeof(table->replace.entrytable)) 3483 failmsg("arptable checkpoint: table size is too large", 3484 "table=%s, size=%u", table->name, table->info.size); 3485 if (table->info.num_entries > XT_MAX_ENTRIES) 3486 failmsg("arptable checkpoint: too many counters", 3487 "table=%s, counters=%u", table->name, table->info.num_entries); 3488 struct arpt_get_entries entries; 3489 memset(&entries, 0, sizeof(entries)); 3490 strcpy(entries.name, table->name); 3491 entries.size = table->info.size; 3492 optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size; 3493 if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen)) 3494 failmsg("arptable checkpoint: getsockopt(ARPT_SO_GET_ENTRIES) failed", "table=%s", table->name); 3495 table->replace.valid_hooks = table->info.valid_hooks; 3496 table->replace.num_entries = table->info.num_entries; 3497 table->replace.size = table->info.size; 3498 memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry)); 3499 memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow)); 3500 memcpy(table->replace.entrytable, entries.entrytable, table->info.size); 3501 } 3502 close(fd); 3503 } 3504 3505 static void reset_arptables() 3506 { 3507 int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 3508 if (fd == -1) { 3509 switch (errno) { 3510 case EAFNOSUPPORT: 3511 case ENOPROTOOPT: 3512 case ENOENT: 3513 return; 3514 } 3515 fail("arptable: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)"); 3516 } 3517 for (unsigned i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) { 3518 struct arpt_table_desc* table = &arpt_tables[i]; 3519 if (table->info.valid_hooks == 0) 3520 continue; 3521 struct arpt_getinfo info; 3522 memset(&info, 0, sizeof(info)); 3523 strcpy(info.name, table->name); 3524 socklen_t optlen = sizeof(info); 3525 if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &info, &optlen)) 3526 failmsg("arptable: getsockopt(ARPT_SO_GET_INFO) failed", "table=%s", table->name); 3527 if (memcmp(&table->info, &info, sizeof(table->info)) == 0) { 3528 struct arpt_get_entries entries; 3529 memset(&entries, 0, sizeof(entries)); 3530 strcpy(entries.name, table->name); 3531 entries.size = table->info.size; 3532 optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size; 3533 if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen)) 3534 failmsg("arptable: getsockopt(ARPT_SO_GET_ENTRIES) failed", "table=%s", table->name); 3535 if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0) 3536 continue; 3537 debug("arptable %s: data changed\n", table->name); 3538 } else { 3539 debug("arptable %s: header changed\n", table->name); 3540 } 3541 debug("arptable %s: resetting\n", table->name); 3542 struct xt_counters counters[XT_MAX_ENTRIES]; 3543 table->replace.num_counters = info.num_entries; 3544 table->replace.counters = counters; 3545 optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size; 3546 if (setsockopt(fd, SOL_IP, ARPT_SO_SET_REPLACE, &table->replace, optlen)) 3547 failmsg("arptable: setsockopt(ARPT_SO_SET_REPLACE) failed", 3548 "table=%s", table->name); 3549 } 3550 close(fd); 3551 } 3552 3553 // ebtables.h is broken too: 3554 // ebtables.h: In function ‘ebt_entry_target* ebt_get_target(ebt_entry*)’: 3555 // ebtables.h:197:19: error: invalid conversion from ‘void*’ to ‘ebt_entry_target*’ 3556 3557 #define NF_BR_NUMHOOKS 6 3558 #define EBT_TABLE_MAXNAMELEN 32 3559 #define EBT_CHAIN_MAXNAMELEN 32 3560 #define EBT_BASE_CTL 128 3561 #define EBT_SO_SET_ENTRIES (EBT_BASE_CTL) 3562 #define EBT_SO_GET_INFO (EBT_BASE_CTL) 3563 #define EBT_SO_GET_ENTRIES (EBT_SO_GET_INFO + 1) 3564 #define EBT_SO_GET_INIT_INFO (EBT_SO_GET_ENTRIES + 1) 3565 #define EBT_SO_GET_INIT_ENTRIES (EBT_SO_GET_INIT_INFO + 1) 3566 3567 struct ebt_replace { 3568 char name[EBT_TABLE_MAXNAMELEN]; 3569 unsigned int valid_hooks; 3570 unsigned int nentries; 3571 unsigned int entries_size; 3572 struct ebt_entries* hook_entry[NF_BR_NUMHOOKS]; 3573 unsigned int num_counters; 3574 struct ebt_counter* counters; 3575 char* entries; 3576 }; 3577 3578 struct ebt_entries { 3579 unsigned int distinguisher; 3580 char name[EBT_CHAIN_MAXNAMELEN]; 3581 unsigned int counter_offset; 3582 int policy; 3583 unsigned int nentries; 3584 char data[0] __attribute__((aligned(__alignof__(struct ebt_replace)))); 3585 }; 3586 3587 struct ebt_table_desc { 3588 const char* name; 3589 struct ebt_replace replace; 3590 char entrytable[XT_TABLE_SIZE]; 3591 }; 3592 3593 static struct ebt_table_desc ebt_tables[] = { 3594 {.name = "filter"}, 3595 {.name = "nat"}, 3596 {.name = "broute"}, 3597 }; 3598 3599 static void checkpoint_ebtables(void) 3600 { 3601 int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 3602 if (fd == -1) { 3603 switch (errno) { 3604 case EAFNOSUPPORT: 3605 case ENOPROTOOPT: 3606 case ENOENT: 3607 return; 3608 } 3609 fail("ebtable checkpoint: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)"); 3610 } 3611 for (size_t i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) { 3612 struct ebt_table_desc* table = &ebt_tables[i]; 3613 strcpy(table->replace.name, table->name); 3614 socklen_t optlen = sizeof(table->replace); 3615 if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_INFO, &table->replace, &optlen)) { 3616 switch (errno) { 3617 case EPERM: 3618 case ENOENT: 3619 case ENOPROTOOPT: 3620 continue; 3621 } 3622 failmsg("ebtable checkpoint: getsockopt(EBT_SO_GET_INIT_INFO) failed", 3623 "table=%s", table->name); 3624 } 3625 debug("ebtable checkpoint %s: entries=%d hooks=%x size=%d\n", 3626 table->name, table->replace.nentries, table->replace.valid_hooks, 3627 table->replace.entries_size); 3628 if (table->replace.entries_size > sizeof(table->entrytable)) 3629 failmsg("ebtable checkpoint: table size is too large", "table=%s, size=%u", 3630 table->name, table->replace.entries_size); 3631 table->replace.num_counters = 0; 3632 table->replace.entries = table->entrytable; 3633 optlen = sizeof(table->replace) + table->replace.entries_size; 3634 if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_ENTRIES, &table->replace, &optlen)) 3635 failmsg("ebtable checkpoint: getsockopt(EBT_SO_GET_INIT_ENTRIES) failed", 3636 "table=%s", table->name); 3637 } 3638 close(fd); 3639 } 3640 3641 static void reset_ebtables() 3642 { 3643 int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 3644 if (fd == -1) { 3645 switch (errno) { 3646 case EAFNOSUPPORT: 3647 case ENOPROTOOPT: 3648 case ENOENT: 3649 return; 3650 } 3651 fail("ebtable: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)"); 3652 } 3653 for (unsigned i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) { 3654 struct ebt_table_desc* table = &ebt_tables[i]; 3655 if (table->replace.valid_hooks == 0) 3656 continue; 3657 struct ebt_replace replace; 3658 memset(&replace, 0, sizeof(replace)); 3659 strcpy(replace.name, table->name); 3660 socklen_t optlen = sizeof(replace); 3661 if (getsockopt(fd, SOL_IP, EBT_SO_GET_INFO, &replace, &optlen)) 3662 failmsg("ebtable: getsockopt(EBT_SO_GET_INFO)", "table=%s", table->name); 3663 replace.num_counters = 0; 3664 table->replace.entries = 0; 3665 for (unsigned h = 0; h < NF_BR_NUMHOOKS; h++) 3666 table->replace.hook_entry[h] = 0; 3667 if (memcmp(&table->replace, &replace, sizeof(table->replace)) == 0) { 3668 char entrytable[XT_TABLE_SIZE]; 3669 memset(&entrytable, 0, sizeof(entrytable)); 3670 replace.entries = entrytable; 3671 optlen = sizeof(replace) + replace.entries_size; 3672 if (getsockopt(fd, SOL_IP, EBT_SO_GET_ENTRIES, &replace, &optlen)) 3673 failmsg("ebtable: getsockopt(EBT_SO_GET_ENTRIES) failed", "table=%s", table->name); 3674 if (memcmp(table->entrytable, entrytable, replace.entries_size) == 0) 3675 continue; 3676 } 3677 debug("ebtable %s: resetting\n", table->name); 3678 // Kernel does not seem to return actual entry points (wat?). 3679 for (unsigned j = 0, h = 0; h < NF_BR_NUMHOOKS; h++) { 3680 if (table->replace.valid_hooks & (1 << h)) { 3681 table->replace.hook_entry[h] = (struct ebt_entries*)table->entrytable + j; 3682 j++; 3683 } 3684 } 3685 table->replace.entries = table->entrytable; 3686 optlen = sizeof(table->replace) + table->replace.entries_size; 3687 if (setsockopt(fd, SOL_IP, EBT_SO_SET_ENTRIES, &table->replace, optlen)) 3688 failmsg("ebtable: setsockopt(EBT_SO_SET_ENTRIES) failed", "table=%s", table->name); 3689 } 3690 close(fd); 3691 } 3692 3693 static void checkpoint_net_namespace(void) 3694 { 3695 #if SYZ_EXECUTOR 3696 if (!flag_net_reset || flag_sandbox_setuid) 3697 return; 3698 #endif 3699 checkpoint_ebtables(); 3700 checkpoint_arptables(); 3701 checkpoint_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP); 3702 checkpoint_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6); 3703 } 3704 3705 static void reset_net_namespace(void) 3706 { 3707 #if SYZ_EXECUTOR 3708 if (!flag_net_reset || flag_sandbox_setuid) 3709 return; 3710 #endif 3711 reset_ebtables(); 3712 reset_arptables(); 3713 reset_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP); 3714 reset_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6); 3715 } 3716 #endif 3717 3718 #if SYZ_EXECUTOR || (SYZ_CGROUPS && (SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID)) 3719 #include <fcntl.h> 3720 #include <string.h> 3721 #include <sys/mount.h> 3722 #include <sys/stat.h> 3723 #include <sys/types.h> 3724 3725 static void mount_cgroups(const char* dir, const char** controllers, int count) 3726 { 3727 if (mkdir(dir, 0777)) { 3728 debug("mkdir(%s) failed: %d\n", dir, errno); 3729 return; 3730 } 3731 // First, probe one-by-one to understand what controllers are present. 3732 char enabled[128] = {0}; 3733 int i = 0; 3734 for (; i < count; i++) { 3735 if (mount("none", dir, "cgroup", 0, controllers[i])) { 3736 debug("mount(%s, %s) failed: %d\n", dir, controllers[i], errno); 3737 continue; 3738 } 3739 umount(dir); 3740 strcat(enabled, ","); 3741 strcat(enabled, controllers[i]); 3742 } 3743 if (enabled[0] == 0) { 3744 if (rmdir(dir) && errno != EBUSY) 3745 failmsg("rmdir failed", "dir=%s", dir); 3746 return; 3747 } 3748 // Now mount all at once. 3749 if (mount("none", dir, "cgroup", 0, enabled + 1)) { 3750 // In systemd/stretch images this is failing with EBUSY 3751 // (systemd starts messing with these mounts?), 3752 // so we don't fail, but just log the error. 3753 debug("mount(%s, %s) failed: %d\n", dir, enabled + 1, errno); 3754 if (rmdir(dir) && errno != EBUSY) 3755 failmsg("rmdir failed", "dir=%s enabled=%s", dir, enabled); 3756 } 3757 if (chmod(dir, 0777)) { 3758 debug("chmod(%s) failed: %d\n", dir, errno); 3759 } 3760 } 3761 3762 static void mount_cgroups2(const char** controllers, int count) 3763 { 3764 if (mkdir("/syzcgroup/unified", 0777)) { 3765 debug("mkdir(/syzcgroup/unified) failed: %d\n", errno); 3766 return; 3767 } 3768 if (mount("none", "/syzcgroup/unified", "cgroup2", 0, NULL)) { 3769 debug("mount(cgroup2) failed: %d\n", errno); 3770 // For all cases when we don't end up mounting cgroup/cgroup2 3771 // in /syzcgroup/{unified,net,cpu}, we need to remove the dir. 3772 // Otherwise these will end up as normal dirs and the fuzzer may 3773 // create huge files there. These files won't be cleaned up 3774 // after tests and may easily consume all disk space. 3775 // EBUSY usually means that cgroup is already mounted there 3776 // by a previous run of e.g. syz-execprog. 3777 if (rmdir("/syzcgroup/unified") && errno != EBUSY) 3778 fail("rmdir(/syzcgroup/unified) failed"); 3779 return; 3780 } 3781 if (chmod("/syzcgroup/unified", 0777)) { 3782 debug("chmod(/syzcgroup/unified) failed: %d\n", errno); 3783 } 3784 int control = open("/syzcgroup/unified/cgroup.subtree_control", O_WRONLY); 3785 if (control == -1) 3786 return; 3787 int i; 3788 for (i = 0; i < count; i++) 3789 if (write(control, controllers[i], strlen(controllers[i])) < 0) { 3790 debug("write(cgroup.subtree_control, %s) failed: %d\n", controllers[i], errno); 3791 } 3792 close(control); 3793 } 3794 3795 static void setup_cgroups() 3796 { 3797 // We want to cover both cgroup and cgroup2. 3798 // Each resource controller can be bound to only one of them, 3799 // so to cover both we divide all controllers into 3 arbitrary groups. 3800 // One group is then bound to cgroup2/unified, and 2 other groups 3801 // are bound to 2 cgroup hierarchies. 3802 // Note: we need to enable controllers one-by-one for both cgroup and cgroup2. 3803 // If we enable all at the same time and one of them fails (b/c of older kernel 3804 // or not enabled configs), then all will fail. 3805 const char* unified_controllers[] = {"+cpu", "+io", "+pids"}; 3806 const char* net_controllers[] = {"net", "net_prio", "devices", "blkio", "freezer"}; 3807 const char* cpu_controllers[] = {"cpuset", "cpuacct", "hugetlb", "rlimit", "memory"}; 3808 if (mkdir("/syzcgroup", 0777)) { 3809 // Can happen due to e.g. read-only file system (EROFS). 3810 debug("mkdir(/syzcgroup) failed: %d\n", errno); 3811 return; 3812 } 3813 mount_cgroups2(unified_controllers, sizeof(unified_controllers) / sizeof(unified_controllers[0])); 3814 mount_cgroups("/syzcgroup/net", net_controllers, sizeof(net_controllers) / sizeof(net_controllers[0])); 3815 mount_cgroups("/syzcgroup/cpu", cpu_controllers, sizeof(cpu_controllers) / sizeof(cpu_controllers[0])); 3816 write_file("/syzcgroup/cpu/cgroup.clone_children", "1"); 3817 write_file("/syzcgroup/cpu/cpuset.memory_pressure_enabled", "1"); 3818 } 3819 3820 #if (SYZ_EXECUTOR || SYZ_REPEAT) && SYZ_EXECUTOR_USES_FORK_SERVER 3821 static void setup_cgroups_loop() 3822 { 3823 #if SYZ_EXECUTOR 3824 if (!flag_cgroups) 3825 return; 3826 #endif 3827 int pid = getpid(); 3828 char file[128]; 3829 char cgroupdir[64]; 3830 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid); 3831 if (mkdir(cgroupdir, 0777)) { 3832 debug("mkdir(%s) failed: %d\n", cgroupdir, errno); 3833 } 3834 // Restrict number of pids per test process to prevent fork bombs. 3835 // We have up to 16 threads + main process + loop. 3836 // 32 pids should be enough for everyone. 3837 snprintf(file, sizeof(file), "%s/pids.max", cgroupdir); 3838 write_file(file, "32"); 3839 // Setup some v1 groups to make things more interesting. 3840 snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir); 3841 write_file(file, "%d", pid); 3842 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid); 3843 if (mkdir(cgroupdir, 0777)) { 3844 debug("mkdir(%s) failed: %d\n", cgroupdir, errno); 3845 } 3846 snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir); 3847 write_file(file, "%d", pid); 3848 // Restrict memory consumption. 3849 // We have some syscalls that inherently consume lots of memory, 3850 // e.g. mounting some filesystem images requires at least 128MB 3851 // image in memory. We restrict RLIMIT_AS to 200MB. Here we gradually 3852 // increase memory limits to make things more interesting. 3853 // Also this takes into account KASAN quarantine size. 3854 // If the limit is lower than KASAN quarantine size, then it can happen 3855 // so that we kill the process, but all of its memory is in quarantine 3856 // and is still accounted against memcg. As the result memcg won't 3857 // allow to allocate any memory in the parent and in the new test process. 3858 // The current limit of 300MB supports up to 9.6GB RAM (quarantine is 1/32). 3859 snprintf(file, sizeof(file), "%s/memory.soft_limit_in_bytes", cgroupdir); 3860 write_file(file, "%d", 299 << 20); 3861 snprintf(file, sizeof(file), "%s/memory.limit_in_bytes", cgroupdir); 3862 write_file(file, "%d", 300 << 20); 3863 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid); 3864 if (mkdir(cgroupdir, 0777)) { 3865 debug("mkdir(%s) failed: %d\n", cgroupdir, errno); 3866 } 3867 snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir); 3868 write_file(file, "%d", pid); 3869 } 3870 3871 static void setup_cgroups_test() 3872 { 3873 #if SYZ_EXECUTOR 3874 if (!flag_cgroups) 3875 return; 3876 #endif 3877 char cgroupdir[64]; 3878 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid); 3879 if (symlink(cgroupdir, "./cgroup")) { 3880 debug("symlink(%s, ./cgroup) failed: %d\n", cgroupdir, errno); 3881 } 3882 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid); 3883 if (symlink(cgroupdir, "./cgroup.cpu")) { 3884 debug("symlink(%s, ./cgroup.cpu) failed: %d\n", cgroupdir, errno); 3885 } 3886 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid); 3887 if (symlink(cgroupdir, "./cgroup.net")) { 3888 debug("symlink(%s, ./cgroup.net) failed: %d\n", cgroupdir, errno); 3889 } 3890 } 3891 #endif 3892 3893 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_NAMESPACE 3894 static void initialize_cgroups() 3895 { 3896 #if SYZ_EXECUTOR 3897 if (!flag_cgroups) 3898 return; 3899 #endif 3900 if (mkdir("./syz-tmp/newroot/syzcgroup", 0700)) 3901 fail("mkdir failed"); 3902 if (mkdir("./syz-tmp/newroot/syzcgroup/unified", 0700)) 3903 fail("mkdir failed"); 3904 if (mkdir("./syz-tmp/newroot/syzcgroup/cpu", 0700)) 3905 fail("mkdir failed"); 3906 if (mkdir("./syz-tmp/newroot/syzcgroup/net", 0700)) 3907 fail("mkdir failed"); 3908 unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE; 3909 if (mount("/syzcgroup/unified", "./syz-tmp/newroot/syzcgroup/unified", NULL, bind_mount_flags, NULL)) { 3910 debug("mount(cgroup2, MS_BIND) failed: %d\n", errno); 3911 } 3912 if (mount("/syzcgroup/cpu", "./syz-tmp/newroot/syzcgroup/cpu", NULL, bind_mount_flags, NULL)) { 3913 debug("mount(cgroup/cpu, MS_BIND) failed: %d\n", errno); 3914 } 3915 if (mount("/syzcgroup/net", "./syz-tmp/newroot/syzcgroup/net", NULL, bind_mount_flags, NULL)) { 3916 debug("mount(cgroup/net, MS_BIND) failed: %d\n", errno); 3917 } 3918 } 3919 #endif 3920 #endif 3921 3922 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_NAMESPACE 3923 static void setup_gadgetfs(); 3924 static void setup_binderfs(); 3925 static void setup_fusectl(); 3926 // Mount tmpfs and chroot into it in sandbox=none and sandbox=namespace. 3927 // This is to prevent persistent changes to the root file system (e.g. setting attributes) that may 3928 // hinder fuzzing. 3929 // See https://github.com/google/syzkaller/issues/4939 for more details. 3930 static void sandbox_common_mount_tmpfs(void) 3931 { 3932 // Android systems set fs.mount-max to a very low value, causing ENOSPC when doing the mounts below 3933 // (see https://github.com/google/syzkaller/issues/4972). 100K mounts should be enough for everyone. 3934 write_file("/proc/sys/fs/mount-max", "100000"); 3935 if (mkdir("./syz-tmp", 0777)) 3936 fail("mkdir(syz-tmp) failed"); 3937 if (mount("", "./syz-tmp", "tmpfs", 0, NULL)) 3938 fail("mount(tmpfs) failed"); 3939 if (mkdir("./syz-tmp/newroot", 0777)) 3940 fail("mkdir failed"); 3941 if (mkdir("./syz-tmp/newroot/dev", 0700)) 3942 fail("mkdir failed"); 3943 unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE; 3944 if (mount("/dev", "./syz-tmp/newroot/dev", NULL, bind_mount_flags, NULL)) 3945 fail("mount(dev) failed"); 3946 if (mkdir("./syz-tmp/newroot/proc", 0700)) 3947 fail("mkdir failed"); 3948 if (mount("syz-proc", "./syz-tmp/newroot/proc", "proc", 0, NULL)) 3949 fail("mount(proc) failed"); 3950 if (mkdir("./syz-tmp/newroot/selinux", 0700)) 3951 fail("mkdir failed"); 3952 // selinux mount used to be at /selinux, but then moved to /sys/fs/selinux. 3953 const char* selinux_path = "./syz-tmp/newroot/selinux"; 3954 if (mount("/selinux", selinux_path, NULL, bind_mount_flags, NULL)) { 3955 if (errno != ENOENT) 3956 fail("mount(/selinux) failed"); 3957 if (mount("/sys/fs/selinux", selinux_path, NULL, bind_mount_flags, NULL) && errno != ENOENT) 3958 fail("mount(/sys/fs/selinux) failed"); 3959 } 3960 if (mkdir("./syz-tmp/newroot/sys", 0700)) 3961 fail("mkdir(/sys) failed"); 3962 if (mount("/sys", "./syz-tmp/newroot/sys", 0, bind_mount_flags, NULL)) 3963 fail("mount(sysfs) failed"); 3964 if (mount("/sys/kernel/debug", "./syz-tmp/newroot/sys/kernel/debug", NULL, bind_mount_flags, NULL) && errno != ENOENT) 3965 fail("mount(debug) failed"); 3966 if (mount("/sys/fs/smackfs", "./syz-tmp/newroot/sys/fs/smackfs", NULL, bind_mount_flags, NULL) && errno != ENOENT) 3967 fail("mount(smackfs) failed"); 3968 if (mount("/proc/sys/fs/binfmt_misc", "./syz-tmp/newroot/proc/sys/fs/binfmt_misc", NULL, bind_mount_flags, NULL) && errno != ENOENT) 3969 fail("mount(binfmt_misc) failed"); 3970 3971 // If user wants to supply custom inputs, those can be placed to /syz-inputs 3972 // That folder will be mounted to fuzzer sandbox 3973 // https://groups.google.com/g/syzkaller/c/U-DISFjKLzg 3974 if (mkdir("./syz-tmp/newroot/syz-inputs", 0700)) 3975 fail("mkdir(/syz-inputs) failed"); 3976 3977 if (mount("/syz-inputs", "./syz-tmp/newroot/syz-inputs", NULL, bind_mount_flags | MS_RDONLY, NULL) && errno != ENOENT) 3978 fail("mount(syz-inputs) failed"); 3979 3980 #if SYZ_EXECUTOR || SYZ_CGROUPS 3981 initialize_cgroups(); 3982 #endif 3983 if (mkdir("./syz-tmp/pivot", 0777)) 3984 fail("mkdir failed"); 3985 if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) { 3986 debug("pivot_root failed\n"); 3987 if (chdir("./syz-tmp")) 3988 fail("chdir failed"); 3989 } else { 3990 debug("pivot_root OK\n"); 3991 if (chdir("/")) 3992 fail("chdir failed"); 3993 if (umount2("./pivot", MNT_DETACH)) 3994 fail("umount failed"); 3995 } 3996 if (chroot("./newroot")) 3997 fail("chroot failed"); 3998 if (chdir("/")) 3999 fail("chdir failed"); 4000 setup_gadgetfs(); 4001 setup_binderfs(); 4002 setup_fusectl(); 4003 } 4004 #endif 4005 4006 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_NAMESPACE 4007 #include <sys/mount.h> 4008 #include <sys/stat.h> 4009 4010 static void setup_gadgetfs() 4011 { 4012 if (mkdir("/dev/gadgetfs", 0777)) { 4013 debug("mkdir(/dev/gadgetfs) failed: %d\n", errno); 4014 } 4015 if (mount("gadgetfs", "/dev/gadgetfs", "gadgetfs", 0, NULL)) { 4016 debug("mount of gadgetfs at /dev/gadgetfs failed: %d\n", errno); 4017 } 4018 } 4019 #endif 4020 4021 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID 4022 #include <errno.h> 4023 #include <sys/mount.h> 4024 #include <sys/stat.h> 4025 #include <unistd.h> 4026 4027 static void setup_fusectl() 4028 { 4029 if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) { 4030 debug("mount(fusectl) failed: %d\n", errno); 4031 } 4032 } 4033 4034 static void setup_binderfs() 4035 { 4036 // NOTE: this function must be called after chroot. 4037 // Bind an instance of binderfs specific just to this executor - it will 4038 // only be visible in its mount namespace and will help isolate binder 4039 // devices during fuzzing. 4040 // These commands will just silently fail if binderfs is not supported. 4041 // Ideally it should have been added as a separate feature (with lots of 4042 // minor changes throughout the code base), but it seems to be an overkill 4043 // for just 2 simple lines of code. 4044 if (mkdir("/dev/binderfs", 0777)) { 4045 debug("mkdir(/dev/binderfs) failed: %d\n", errno); 4046 } 4047 4048 if (mount("binder", "/dev/binderfs", "binder", 0, NULL)) { 4049 debug("mount of binder at /dev/binderfs failed: %d\n", errno); 4050 } 4051 #if !SYZ_EXECUTOR && !SYZ_USE_TMP_DIR 4052 // Do a local symlink right away. 4053 if (symlink("/dev/binderfs", "./binderfs")) { 4054 debug("symlink(/dev/binderfs, ./binderfs) failed: %d\n", errno); 4055 } 4056 #endif 4057 } 4058 4059 #include <sched.h> 4060 #include <sys/prctl.h> 4061 #include <sys/resource.h> 4062 #include <sys/time.h> 4063 #include <sys/wait.h> 4064 4065 static void loop(); 4066 4067 static void sandbox_common() 4068 { 4069 prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); 4070 if (getppid() == 1) 4071 exitf("the sandbox parent process was killed"); 4072 4073 #if SYZ_EXECUTOR || __NR_syz_init_net_socket || SYZ_DEVLINK_PCI || __NR_syz_socket_connect_nvme_tcp 4074 int netns = open("/proc/self/ns/net", O_RDONLY); 4075 if (netns == -1) 4076 fail("open(/proc/self/ns/net) failed"); 4077 if (dup2(netns, kInitNetNsFd) < 0) 4078 fail("dup2(netns, kInitNetNsFd) failed"); 4079 close(netns); 4080 #endif 4081 4082 struct rlimit rlim; 4083 #if SYZ_EXECUTOR 4084 rlim.rlim_cur = rlim.rlim_max = (200 << 20) + 4085 (kMaxThreads * kCoverSize + kExtraCoverSize) * sizeof(void*); 4086 #else 4087 rlim.rlim_cur = rlim.rlim_max = (200 << 20); 4088 #endif 4089 setrlimit(RLIMIT_AS, &rlim); 4090 rlim.rlim_cur = rlim.rlim_max = 32 << 20; 4091 setrlimit(RLIMIT_MEMLOCK, &rlim); 4092 rlim.rlim_cur = rlim.rlim_max = 136 << 20; 4093 setrlimit(RLIMIT_FSIZE, &rlim); 4094 rlim.rlim_cur = rlim.rlim_max = 1 << 20; 4095 setrlimit(RLIMIT_STACK, &rlim); 4096 // Note: core size is also restricted by RLIMIT_FSIZE. 4097 rlim.rlim_cur = rlim.rlim_max = 128 << 20; 4098 setrlimit(RLIMIT_CORE, &rlim); 4099 rlim.rlim_cur = rlim.rlim_max = 256; // see kMaxFd 4100 setrlimit(RLIMIT_NOFILE, &rlim); 4101 4102 // CLONE_NEWNS/NEWCGROUP cause EINVAL on some systems, 4103 // so we do them separately of clone in do_sandbox_namespace. 4104 if (unshare(CLONE_NEWNS)) { 4105 debug("unshare(CLONE_NEWNS): %d\n", errno); 4106 } 4107 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) { 4108 debug("mount(\"/\", MS_REC | MS_PRIVATE): %d\n", errno); 4109 } 4110 if (unshare(CLONE_NEWIPC)) { 4111 debug("unshare(CLONE_NEWIPC): %d\n", errno); 4112 } 4113 if (unshare(0x02000000)) { 4114 debug("unshare(CLONE_NEWCGROUP): %d\n", errno); 4115 } 4116 if (unshare(CLONE_NEWUTS)) { 4117 debug("unshare(CLONE_NEWUTS): %d\n", errno); 4118 } 4119 if (unshare(CLONE_SYSVSEM)) { 4120 debug("unshare(CLONE_SYSVSEM): %d\n", errno); 4121 } 4122 // These sysctl's restrict ipc resource usage (by default it's possible 4123 // to eat all system memory by creating e.g. lots of large sem sets). 4124 // These sysctl's are per-namespace, so we need to set them inside 4125 // of the test ipc namespace (after CLONE_NEWIPC). 4126 typedef struct { 4127 const char* name; 4128 const char* value; 4129 } sysctl_t; 4130 static const sysctl_t sysctls[] = { 4131 {"/proc/sys/kernel/shmmax", "16777216"}, 4132 {"/proc/sys/kernel/shmall", "536870912"}, 4133 {"/proc/sys/kernel/shmmni", "1024"}, 4134 {"/proc/sys/kernel/msgmax", "8192"}, 4135 {"/proc/sys/kernel/msgmni", "1024"}, 4136 {"/proc/sys/kernel/msgmnb", "1024"}, 4137 {"/proc/sys/kernel/sem", "1024 1048576 500 1024"}, 4138 }; 4139 unsigned i; 4140 for (i = 0; i < sizeof(sysctls) / sizeof(sysctls[0]); i++) 4141 write_file(sysctls[i].name, sysctls[i].value); 4142 } 4143 #endif 4144 4145 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE 4146 static int wait_for_loop(int pid) 4147 { 4148 if (pid < 0) 4149 fail("sandbox fork failed"); 4150 debug("spawned loop pid %d\n", pid); 4151 int status = 0; 4152 while (waitpid(-1, &status, __WALL) != pid) { 4153 } 4154 return WEXITSTATUS(status); 4155 } 4156 #endif 4157 4158 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID 4159 #include <linux/capability.h> 4160 4161 static void drop_caps(void) 4162 { 4163 struct __user_cap_header_struct cap_hdr = {}; 4164 struct __user_cap_data_struct cap_data[2] = {}; 4165 cap_hdr.version = _LINUX_CAPABILITY_VERSION_3; 4166 cap_hdr.pid = getpid(); 4167 if (syscall(SYS_capget, &cap_hdr, &cap_data)) 4168 fail("capget failed"); 4169 // Drop CAP_SYS_PTRACE so that test processes can't attach to parent processes. 4170 // Previously it lead to hangs because the loop process stopped due to SIGSTOP. 4171 // Note that a process can always ptrace its direct children, which is enough for testing purposes. 4172 // 4173 // A process with CAP_SYS_NICE can bring kernel down by asking for too high SCHED_DEADLINE priority, 4174 // as the result rcu and other system services that use kernel threads will stop functioning. 4175 // Some parameters for SCHED_DEADLINE should be OK, but we don't have means to enforce 4176 // values of indirect syscall arguments. Peter Zijlstra proposed sysctl_deadline_period_{min,max} 4177 // which could be used to enfore safe limits without droppping CAP_SYS_NICE, but we don't have it yet. 4178 // See the following bug for details: 4179 // https://groups.google.com/forum/#!topic/syzkaller-bugs/G6Wl_PKPIWI 4180 const int drop = (1 << CAP_SYS_PTRACE) | (1 << CAP_SYS_NICE); 4181 cap_data[0].effective &= ~drop; 4182 cap_data[0].permitted &= ~drop; 4183 cap_data[0].inheritable &= ~drop; 4184 if (syscall(SYS_capset, &cap_hdr, &cap_data)) 4185 fail("capset failed"); 4186 } 4187 #endif 4188 4189 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE 4190 #include <sched.h> 4191 #include <sys/types.h> 4192 4193 static int do_sandbox_none(void) 4194 { 4195 // CLONE_NEWPID takes effect for the first child of the current process, 4196 // so we do it before fork to make the loop "init" process of the namespace. 4197 // We ought to do fail here, but sandbox=none is used in pkg/ipc tests 4198 // and they are usually run under non-root. 4199 // Also since debug is stripped by pkg/csource, we need to do {} 4200 // even though we generally don't do {} around single statements. 4201 if (unshare(CLONE_NEWPID)) { 4202 debug("unshare(CLONE_NEWPID): %d\n", errno); 4203 } 4204 int pid = fork(); 4205 if (pid != 0) 4206 return wait_for_loop(pid); 4207 4208 #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION 4209 initialize_vhci(); 4210 #endif 4211 sandbox_common(); 4212 drop_caps(); 4213 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 4214 initialize_netdevices_init(); 4215 #endif 4216 if (unshare(CLONE_NEWNET)) { 4217 debug("unshare(CLONE_NEWNET): %d\n", errno); 4218 } 4219 // Enable access to IPPROTO_ICMP sockets, must be done after CLONE_NEWNET. 4220 write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535"); 4221 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI 4222 initialize_devlink_pci(); 4223 #endif 4224 #if SYZ_EXECUTOR || SYZ_NET_INJECTION 4225 initialize_tun(); 4226 #endif 4227 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 4228 initialize_netdevices(); 4229 #endif 4230 #if SYZ_EXECUTOR || SYZ_WIFI 4231 initialize_wifi_devices(); 4232 #endif 4233 sandbox_common_mount_tmpfs(); 4234 loop(); 4235 doexit(1); 4236 } 4237 #endif 4238 4239 #if SYZ_EXECUTOR || SYZ_SANDBOX_SETUID 4240 #include <grp.h> 4241 #include <sched.h> 4242 #include <sys/prctl.h> 4243 4244 #define SYZ_HAVE_SANDBOX_SETUID 1 4245 static int do_sandbox_setuid(void) 4246 { 4247 if (unshare(CLONE_NEWPID)) { 4248 debug("unshare(CLONE_NEWPID): %d\n", errno); 4249 } 4250 int pid = fork(); 4251 if (pid != 0) 4252 return wait_for_loop(pid); 4253 4254 #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION 4255 initialize_vhci(); 4256 #endif 4257 sandbox_common(); 4258 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 4259 initialize_netdevices_init(); 4260 #endif 4261 if (unshare(CLONE_NEWNET)) { 4262 debug("unshare(CLONE_NEWNET): %d\n", errno); 4263 } 4264 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI 4265 initialize_devlink_pci(); 4266 #endif 4267 #if SYZ_EXECUTOR || SYZ_NET_INJECTION 4268 initialize_tun(); 4269 #endif 4270 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 4271 initialize_netdevices(); 4272 #endif 4273 #if SYZ_EXECUTOR || SYZ_WIFI 4274 initialize_wifi_devices(); 4275 #endif 4276 setup_binderfs(); 4277 setup_fusectl(); 4278 4279 const int nobody = 65534; 4280 if (setgroups(0, NULL)) 4281 fail("failed to setgroups"); 4282 if (syscall(SYS_setresgid, nobody, nobody, nobody)) 4283 fail("failed to setresgid"); 4284 if (syscall(SYS_setresuid, nobody, nobody, nobody)) 4285 fail("failed to setresuid"); 4286 4287 // setresuid and setresgid clear the parent-death signal. 4288 prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); 4289 // This is required to open /proc/self/ files. 4290 // Otherwise they are owned by root and we can't open them after setuid. 4291 // See task_dump_owner function in kernel. 4292 prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); 4293 4294 loop(); 4295 doexit(1); 4296 } 4297 #endif 4298 4299 #if SYZ_EXECUTOR || SYZ_SANDBOX_NAMESPACE 4300 #include <sched.h> 4301 #include <sys/mman.h> 4302 #include <sys/mount.h> 4303 4304 static int real_uid; 4305 static int real_gid; 4306 __attribute__((aligned(64 << 10))) static char sandbox_stack[1 << 20]; 4307 4308 static int namespace_sandbox_proc(void* arg) 4309 { 4310 sandbox_common(); 4311 4312 // /proc/self/setgroups is not present on some systems, ignore error. 4313 write_file("/proc/self/setgroups", "deny"); 4314 if (!write_file("/proc/self/uid_map", "0 %d 1\n", real_uid)) 4315 fail("write of /proc/self/uid_map failed"); 4316 if (!write_file("/proc/self/gid_map", "0 %d 1\n", real_gid)) 4317 fail("write of /proc/self/gid_map failed"); 4318 4319 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 4320 initialize_netdevices_init(); 4321 #endif 4322 // CLONE_NEWNET must always happen before tun setup, 4323 // because we want the tun device in the test namespace. 4324 if (unshare(CLONE_NEWNET)) 4325 fail("unshare(CLONE_NEWNET)"); 4326 // Enable access to IPPROTO_ICMP sockets, must be done after CLONE_NEWNET. 4327 write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535"); 4328 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI 4329 initialize_devlink_pci(); 4330 #endif 4331 #if SYZ_EXECUTOR || SYZ_NET_INJECTION 4332 // We setup tun here as it needs to be in the test net namespace, 4333 // which in turn needs to be in the test user namespace. 4334 // However, IFF_NAPI_FRAGS will fail as we are not root already. 4335 // TODO: we should create tun in the init net namespace and use setns 4336 // to move it to the target namespace. 4337 initialize_tun(); 4338 #endif 4339 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 4340 initialize_netdevices(); 4341 #endif 4342 #if SYZ_EXECUTOR || SYZ_WIFI 4343 initialize_wifi_devices(); 4344 #endif 4345 4346 sandbox_common_mount_tmpfs(); 4347 drop_caps(); 4348 4349 loop(); 4350 doexit(1); 4351 } 4352 4353 #define SYZ_HAVE_SANDBOX_NAMESPACE 1 4354 static int do_sandbox_namespace(void) 4355 { 4356 #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION 4357 // HCIDEVUP requires CAP_ADMIN, so this needs to happen early. 4358 initialize_vhci(); 4359 #endif 4360 real_uid = getuid(); 4361 real_gid = getgid(); 4362 mprotect(sandbox_stack, 4096, PROT_NONE); // to catch stack underflows 4363 int pid = clone(namespace_sandbox_proc, &sandbox_stack[sizeof(sandbox_stack) - 64], 4364 CLONE_NEWUSER | CLONE_NEWPID, 0); 4365 return wait_for_loop(pid); 4366 } 4367 #endif 4368 4369 #if SYZ_EXECUTOR || SYZ_SANDBOX_ANDROID 4370 // seccomp only supported for Arm, Arm64, X86, and X86_64 archs 4371 #if GOARCH_arm || GOARCH_arm64 || GOARCH_386 || GOARCH_amd64 4372 #include <assert.h> 4373 #include <errno.h> 4374 #include <linux/audit.h> 4375 #include <linux/filter.h> 4376 #include <linux/seccomp.h> 4377 #include <stddef.h> 4378 #include <stdlib.h> 4379 #include <sys/prctl.h> 4380 #include <sys/syscall.h> 4381 4382 #include "android/android_seccomp.h" 4383 4384 #if GOARCH_amd64 || GOARCH_386 4385 // Syz-executor is linked against glibc when fuzzing runs on Cuttlefish x86-x64. 4386 // However Android blocks calls into mkdir, rmdir, symlink which causes 4387 // syz-executor to crash. When fuzzing runs on Android device this issue 4388 // is not observed, because syz-executor is linked against Bionic. Under 4389 // the hood Bionic invokes mkdirat, inlinkat and symlinkat, which are 4390 // allowed by seccomp-bpf. 4391 // This issue may exist not only in Android, but also in Linux in general 4392 // where seccomp filtering is enforced. 4393 // 4394 // This trick makes linker believe it matched the correct version of mkdir, 4395 // rmdir, symlink. So now behavior is the same across ARM and non-ARM builds. 4396 inline int mkdir(const char* path, mode_t mode) 4397 { 4398 return mkdirat(AT_FDCWD, path, mode); 4399 } 4400 4401 inline int rmdir(const char* path) 4402 { 4403 return unlinkat(AT_FDCWD, path, AT_REMOVEDIR); 4404 } 4405 4406 inline int symlink(const char* old_path, const char* new_path) 4407 { 4408 return symlinkat(old_path, AT_FDCWD, new_path); 4409 } 4410 #endif 4411 4412 #endif 4413 #include <fcntl.h> // open(2) 4414 #include <grp.h> // setgroups 4415 #include <sys/xattr.h> // setxattr, getxattr 4416 4417 #define AID_NET_BT_ADMIN 3001 4418 #define AID_NET_BT 3002 4419 #define AID_INET 3003 4420 #define AID_EVERYBODY 9997 4421 #define AID_APP 10000 4422 4423 #define UNTRUSTED_APP_UID (AID_APP + 999) 4424 #define UNTRUSTED_APP_GID (AID_APP + 999) 4425 4426 #define SYSTEM_UID 1000 4427 #define SYSTEM_GID 1000 4428 4429 const char* const SELINUX_CONTEXT_UNTRUSTED_APP = "u:r:untrusted_app:s0:c512,c768"; 4430 const char* const SELINUX_LABEL_APP_DATA_FILE = "u:object_r:app_data_file:s0:c512,c768"; 4431 const char* const SELINUX_CONTEXT_FILE = "/proc/thread-self/attr/current"; 4432 const char* const SELINUX_XATTR_NAME = "security.selinux"; 4433 4434 const gid_t UNTRUSTED_APP_GROUPS[] = {UNTRUSTED_APP_GID, AID_NET_BT_ADMIN, AID_NET_BT, AID_INET, AID_EVERYBODY}; 4435 const size_t UNTRUSTED_APP_NUM_GROUPS = sizeof(UNTRUSTED_APP_GROUPS) / sizeof(UNTRUSTED_APP_GROUPS[0]); 4436 4437 const gid_t SYSTEM_GROUPS[] = {SYSTEM_GID, AID_NET_BT_ADMIN, AID_NET_BT, AID_INET, AID_EVERYBODY}; 4438 const size_t SYSTEM_NUM_GROUPS = sizeof(SYSTEM_GROUPS) / sizeof(SYSTEM_GROUPS[0]); 4439 4440 // Similar to libselinux getcon(3), but: 4441 // - No library dependency 4442 // - No dynamic memory allocation 4443 // - Uses fail() instead of returning an error code 4444 static void getcon(char* context, size_t context_size) 4445 { 4446 int fd = open(SELINUX_CONTEXT_FILE, O_RDONLY); 4447 if (fd < 0) 4448 fail("getcon: couldn't open context file"); 4449 4450 ssize_t nread = read(fd, context, context_size); 4451 4452 close(fd); 4453 4454 if (nread <= 0) 4455 fail("getcon: failed to read context file"); 4456 4457 // The contents of the context file MAY end with a newline 4458 // and MAY not have a null terminator. Handle this here. 4459 if (context[nread - 1] == '\n') 4460 context[nread - 1] = '\0'; 4461 } 4462 4463 // Similar to libselinux setcon(3), but: 4464 // - No library dependency 4465 // - No dynamic memory allocation 4466 // - Uses fail() instead of returning an error code 4467 static void setcon(const char* context) 4468 { 4469 char new_context[512]; 4470 4471 // Attempt to write the new context 4472 int fd = open(SELINUX_CONTEXT_FILE, O_WRONLY); 4473 4474 if (fd < 0) 4475 fail("setcon: could not open context file"); 4476 4477 ssize_t bytes_written = write(fd, context, strlen(context)); 4478 4479 // N.B.: We cannot reuse this file descriptor, since the target SELinux context 4480 // may not be able to read from it. 4481 close(fd); 4482 4483 if (bytes_written != (ssize_t)strlen(context)) 4484 failmsg("setcon: could not write entire context", "wrote=%zi, expected=%zu", bytes_written, strlen(context)); 4485 4486 // Validate the transition by checking the context 4487 getcon(new_context, sizeof(new_context)); 4488 4489 if (strcmp(context, new_context) != 0) 4490 failmsg("setcon: failed to change", "want=%s, context=%s", context, new_context); 4491 } 4492 4493 // Similar to libselinux setfilecon(3), but: 4494 // - No library dependency 4495 // - No dynamic memory allocation 4496 // - Uses fail() instead of returning an error code 4497 static void setfilecon(const char* path, const char* context) 4498 { 4499 char new_context[512]; 4500 4501 if (setxattr(path, SELINUX_XATTR_NAME, context, strlen(context) + 1, 0) != 0) 4502 fail("setfilecon: setxattr failed"); 4503 if (getxattr(path, SELINUX_XATTR_NAME, new_context, sizeof(new_context)) < 0) 4504 fail("setfilecon: getxattr failed"); 4505 if (strcmp(context, new_context) != 0) 4506 failmsg("setfilecon: could not set context", "want=%s, got=%s", context, new_context); 4507 } 4508 4509 #define SYZ_HAVE_SANDBOX_ANDROID 1 4510 4511 static int do_sandbox_android(uint64 sandbox_arg) 4512 { 4513 setup_fusectl(); 4514 #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION 4515 initialize_vhci(); 4516 #endif 4517 sandbox_common(); 4518 drop_caps(); 4519 4520 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 4521 initialize_netdevices_init(); 4522 #endif 4523 // CLONE_NEWNET must always happen before tun setup, because we want the tun 4524 // device in the test namespace. If we don't do this, executor will crash with 4525 // SYZFATAL: executor NUM failed NUM times: executor NUM: EOF 4526 if (unshare(CLONE_NEWNET)) { 4527 debug("unshare(CLONE_NEWNET): %d\n", errno); 4528 } 4529 // Enable access to IPPROTO_ICMP sockets, must be done after CLONE_NEWNET. 4530 write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535"); 4531 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI 4532 initialize_devlink_pci(); 4533 #endif 4534 #if SYZ_EXECUTOR || SYZ_NET_INJECTION 4535 initialize_tun(); 4536 #endif 4537 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 4538 initialize_netdevices(); 4539 #endif 4540 uid_t uid = UNTRUSTED_APP_UID; 4541 size_t num_groups = UNTRUSTED_APP_NUM_GROUPS; 4542 const gid_t* groups = UNTRUSTED_APP_GROUPS; 4543 gid_t gid = UNTRUSTED_APP_GID; 4544 debug("executor received sandbox_arg=%llu\n", sandbox_arg); 4545 if (sandbox_arg == 1) { 4546 uid = SYSTEM_UID; 4547 num_groups = SYSTEM_NUM_GROUPS; 4548 groups = SYSTEM_GROUPS; 4549 gid = SYSTEM_GID; 4550 4551 debug("fuzzing under SYSTEM account\n"); 4552 } 4553 if (chown(".", uid, uid) != 0) 4554 failmsg("do_sandbox_android: chmod failed", "sandbox_arg=%llu", sandbox_arg); 4555 4556 if (setgroups(num_groups, groups) != 0) 4557 failmsg("do_sandbox_android: setgroups failed", "sandbox_arg=%llu", sandbox_arg); 4558 4559 if (setresgid(gid, gid, gid) != 0) 4560 failmsg("do_sandbox_android: setresgid failed", "sandbox_arg=%llu", sandbox_arg); 4561 4562 setup_binderfs(); 4563 4564 #if GOARCH_arm || GOARCH_arm64 || GOARCH_386 || GOARCH_amd64 4565 // Will fail() if anything fails. 4566 // Must be called when the new process still has CAP_SYS_ADMIN, in this case, 4567 // before changing uid from 0, which clears capabilities. 4568 int account = SCFS_RestrictedApp; 4569 if (sandbox_arg == 1) 4570 account = SCFS_SystemAccount; 4571 set_app_seccomp_filter(account); 4572 #endif 4573 4574 if (setresuid(uid, uid, uid) != 0) 4575 failmsg("do_sandbox_android: setresuid failed", "sandbox_arg=%llu", sandbox_arg); 4576 4577 // setresuid and setresgid clear the parent-death signal. 4578 prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); 4579 4580 setfilecon(".", SELINUX_LABEL_APP_DATA_FILE); 4581 if (uid == UNTRUSTED_APP_UID) 4582 setcon(SELINUX_CONTEXT_UNTRUSTED_APP); 4583 4584 loop(); 4585 doexit(1); 4586 } 4587 #endif 4588 4589 #if SYZ_EXECUTOR || SYZ_REPEAT && SYZ_USE_TMP_DIR 4590 #include <dirent.h> 4591 #include <errno.h> 4592 #include <string.h> 4593 #include <sys/ioctl.h> 4594 #include <sys/mount.h> 4595 4596 #define FS_IOC_SETFLAGS _IOW('f', 2, long) 4597 4598 // One does not simply remove a directory. 4599 // There can be mounts, so we need to try to umount. 4600 // Moreover, a mount can be mounted several times, so we need to try to umount in a loop. 4601 // Moreover, after umount a dir can become non-empty again, so we need another loop. 4602 // Moreover, a mount can be re-mounted as read-only and then we will fail to make a dir empty. 4603 static void remove_dir(const char* dir) 4604 { 4605 int iter = 0; 4606 DIR* dp = 0; 4607 4608 #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID 4609 // Starting from v6.9, it does no longer make sense to use MNT_DETACH, because 4610 // a loop device may only be reused in RW mode if no mounted filesystem keeps a 4611 // reference to it. So we have to umount them synchronously. 4612 // MNT_FORCE should hopefully prevent hangs for filesystems that may require a complex cleanup. 4613 // 4614 // This declaration should not be moved under retry label, since label followed by a declaration 4615 // is not supported by old compilers. 4616 const int umount_flags = MNT_FORCE | UMOUNT_NOFOLLOW; 4617 #endif 4618 4619 retry: 4620 #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID 4621 #if SYZ_EXECUTOR 4622 if (!flag_sandbox_android) 4623 #endif 4624 while (umount2(dir, umount_flags) == 0) { 4625 debug("umount(%s)\n", dir); 4626 } 4627 #endif 4628 dp = opendir(dir); 4629 if (dp == NULL) { 4630 if (errno == EMFILE) { 4631 // This happens when the test process casts prlimit(NOFILE) on us. 4632 // Ideally we somehow prevent test processes from messing with parent processes. 4633 // But full sandboxing is expensive, so let's ignore this error for now. 4634 exitf("opendir(%s) failed due to NOFILE, exiting", dir); 4635 } 4636 exitf("opendir(%s) failed", dir); 4637 } 4638 struct dirent* ep = 0; 4639 while ((ep = readdir(dp))) { 4640 if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0) 4641 continue; 4642 char filename[FILENAME_MAX]; 4643 snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name); 4644 // If it's 9p mount with broken transport, lstat will fail. 4645 // So try to umount first. 4646 #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID 4647 #if SYZ_EXECUTOR 4648 if (!flag_sandbox_android) 4649 #endif 4650 while (umount2(filename, umount_flags) == 0) { 4651 debug("umount(%s)\n", filename); 4652 } 4653 #endif 4654 struct stat st; 4655 if (lstat(filename, &st)) 4656 exitf("lstat(%s) failed", filename); 4657 if (S_ISDIR(st.st_mode)) { 4658 remove_dir(filename); 4659 continue; 4660 } 4661 int i; 4662 for (i = 0;; i++) { 4663 if (unlink(filename) == 0) 4664 break; 4665 if (errno == EPERM) { 4666 // Try to reset FS_XFLAG_IMMUTABLE. 4667 int fd = open(filename, O_RDONLY); 4668 if (fd != -1) { 4669 long flags = 0; 4670 if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) { 4671 debug("reset FS_XFLAG_IMMUTABLE\n"); 4672 } 4673 close(fd); 4674 continue; 4675 } 4676 } 4677 if (errno == EROFS) { 4678 debug("ignoring EROFS\n"); 4679 break; 4680 } 4681 if (errno != EBUSY || i > 100) 4682 exitf("unlink(%s) failed", filename); 4683 #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID 4684 #if SYZ_EXECUTOR 4685 if (!flag_sandbox_android) { 4686 #endif 4687 debug("umount(%s)\n", filename); 4688 if (umount2(filename, umount_flags)) 4689 exitf("umount(%s) failed", filename); 4690 #if SYZ_EXECUTOR 4691 } 4692 #endif 4693 #endif 4694 } 4695 } 4696 closedir(dp); 4697 for (int i = 0;; i++) { 4698 if (rmdir(dir) == 0) 4699 break; 4700 if (i < 100) { 4701 if (errno == EPERM) { 4702 // Try to reset FS_XFLAG_IMMUTABLE. 4703 int fd = open(dir, O_RDONLY); 4704 if (fd != -1) { 4705 long flags = 0; 4706 if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) { 4707 debug("reset FS_XFLAG_IMMUTABLE\n"); 4708 } 4709 close(fd); 4710 continue; 4711 } 4712 } 4713 if (errno == EROFS) { 4714 debug("ignoring EROFS\n"); 4715 break; 4716 } 4717 if (errno == EBUSY) { 4718 #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID 4719 #if SYZ_EXECUTOR 4720 if (!flag_sandbox_android) { 4721 #endif 4722 debug("umount(%s)\n", dir); 4723 if (umount2(dir, umount_flags)) 4724 exitf("umount(%s) failed", dir); 4725 #if SYZ_EXECUTOR 4726 } 4727 #endif 4728 #endif 4729 continue; 4730 } 4731 if (errno == ENOTEMPTY) { 4732 if (iter < 100) { 4733 iter++; 4734 goto retry; 4735 } 4736 } 4737 } 4738 exitf("rmdir(%s) failed", dir); 4739 } 4740 } 4741 #endif 4742 4743 #if SYZ_EXECUTOR || SYZ_FAULT 4744 #include <fcntl.h> 4745 #include <string.h> 4746 #include <sys/stat.h> 4747 #include <sys/types.h> 4748 4749 static int inject_fault(int nth) 4750 { 4751 int fd; 4752 fd = open("/proc/thread-self/fail-nth", O_RDWR); 4753 // We treat errors here as temporal/non-critical because we see 4754 // occasional ENOENT/EACCES errors returned. It seems that fuzzer 4755 // somehow gets its hands to it. 4756 if (fd == -1) 4757 exitf("failed to open /proc/thread-self/fail-nth"); 4758 char buf[16]; 4759 sprintf(buf, "%d", nth); 4760 if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf)) 4761 exitf("failed to write /proc/thread-self/fail-nth"); 4762 return fd; 4763 } 4764 #endif 4765 4766 #if SYZ_EXECUTOR 4767 static int fault_injected(int fail_fd) 4768 { 4769 char buf[16]; 4770 int n = read(fail_fd, buf, sizeof(buf) - 1); 4771 if (n <= 0) 4772 exitf("failed to read /proc/thread-self/fail-nth"); 4773 int res = n == 2 && buf[0] == '0' && buf[1] == '\n'; 4774 buf[0] = '0'; 4775 if (write(fail_fd, buf, 1) != 1) 4776 exitf("failed to write /proc/thread-self/fail-nth"); 4777 close(fail_fd); 4778 return res; 4779 } 4780 #endif 4781 4782 #if (SYZ_EXECUTOR || SYZ_REPEAT) && SYZ_EXECUTOR_USES_FORK_SERVER 4783 #include <dirent.h> 4784 #include <errno.h> 4785 #include <fcntl.h> 4786 #include <signal.h> 4787 #include <string.h> 4788 #include <sys/stat.h> 4789 #include <sys/types.h> 4790 #include <sys/wait.h> 4791 4792 static void kill_and_wait(int pid, int* status) 4793 { 4794 kill(-pid, SIGKILL); 4795 kill(pid, SIGKILL); 4796 // First, give it up to 100 ms to surrender. 4797 for (int i = 0; i < 100; i++) { 4798 if (waitpid(-1, status, WNOHANG | __WALL) == pid) 4799 return; 4800 usleep(1000); 4801 } 4802 // Now, try to abort fuse connections as they cause deadlocks, 4803 // see Documentation/filesystems/fuse.txt for details. 4804 // There is no good way to figure out the right connections 4805 // provided that the process could use unshare(CLONE_NEWNS), 4806 // so we abort all. 4807 debug("kill is not working\n"); 4808 DIR* dir = opendir("/sys/fs/fuse/connections"); 4809 if (dir) { 4810 for (;;) { 4811 struct dirent* ent = readdir(dir); 4812 if (!ent) 4813 break; 4814 if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) 4815 continue; 4816 char abort[300]; 4817 snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name); 4818 int fd = open(abort, O_WRONLY); 4819 if (fd == -1) { 4820 debug("failed to open %s: %d\n", abort, errno); 4821 continue; 4822 } 4823 debug("aborting fuse conn %s\n", ent->d_name); 4824 if (write(fd, abort, 1) < 0) { 4825 debug("failed to abort: %d\n", errno); 4826 } 4827 close(fd); 4828 } 4829 closedir(dir); 4830 } else { 4831 debug("failed to open /sys/fs/fuse/connections: %d\n", errno); 4832 } 4833 // Now, just wait, no other options. 4834 while (waitpid(-1, status, __WALL) != pid) { 4835 } 4836 } 4837 #endif 4838 4839 #if (SYZ_EXECUTOR || SYZ_REPEAT && (SYZ_CGROUPS || SYZ_NET_RESET)) && SYZ_EXECUTOR_USES_FORK_SERVER 4840 #include <fcntl.h> 4841 #include <sys/ioctl.h> 4842 #include <sys/stat.h> 4843 #include <sys/types.h> 4844 #include <unistd.h> 4845 4846 #define SYZ_HAVE_SETUP_LOOP 1 4847 static void setup_loop() 4848 { 4849 #if SYZ_EXECUTOR || SYZ_CGROUPS 4850 setup_cgroups_loop(); 4851 #endif 4852 #if SYZ_EXECUTOR || SYZ_NET_RESET 4853 checkpoint_net_namespace(); 4854 #endif 4855 } 4856 #endif 4857 4858 #if (SYZ_EXECUTOR || SYZ_REPEAT && (SYZ_NET_RESET || __NR_syz_mount_image || __NR_syz_read_part_table)) && SYZ_EXECUTOR_USES_FORK_SERVER 4859 #define SYZ_HAVE_RESET_LOOP 1 4860 static void reset_loop() 4861 { 4862 #if SYZ_EXECUTOR || __NR_syz_mount_image || __NR_syz_read_part_table 4863 char buf[64]; 4864 snprintf(buf, sizeof(buf), "/dev/loop%llu", procid); 4865 int loopfd = open(buf, O_RDWR); 4866 if (loopfd != -1) { 4867 ioctl(loopfd, LOOP_CLR_FD, 0); 4868 close(loopfd); 4869 } 4870 #endif 4871 #if SYZ_EXECUTOR || SYZ_NET_RESET 4872 reset_net_namespace(); 4873 #endif 4874 } 4875 #endif 4876 4877 #if (SYZ_EXECUTOR || SYZ_REPEAT) && SYZ_EXECUTOR_USES_FORK_SERVER 4878 #include <sys/prctl.h> 4879 #include <unistd.h> 4880 4881 #define SYZ_HAVE_SETUP_TEST 1 4882 static void setup_test() 4883 { 4884 prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); 4885 // We don't check for getppid() == 1 here b/c of unshare(CLONE_NEWPID), 4886 // our parent is normally pid 1. 4887 setpgrp(); 4888 #if SYZ_EXECUTOR || SYZ_CGROUPS 4889 setup_cgroups_test(); 4890 #endif 4891 // It's the leaf test process we want to be always killed first. 4892 write_file("/proc/self/oom_score_adj", "1000"); 4893 #if SYZ_EXECUTOR || SYZ_NET_INJECTION 4894 // Read all remaining packets from tun to better 4895 // isolate consequently executing programs. 4896 flush_tun(); 4897 #endif 4898 #if SYZ_EXECUTOR || SYZ_USE_TMP_DIR 4899 // Add a binderfs symlink to the tmp folder. 4900 if (symlink("/dev/binderfs", "./binderfs")) { 4901 debug("symlink(/dev/binderfs, ./binderfs) failed: %d", errno); 4902 } 4903 #endif 4904 } 4905 #endif 4906 4907 #if SYZ_EXECUTOR || SYZ_CLOSE_FDS 4908 #include <sys/syscall.h> 4909 #define SYZ_HAVE_CLOSE_FDS 1 4910 static void close_fds() 4911 { 4912 #if SYZ_EXECUTOR 4913 if (!flag_close_fds) 4914 return; 4915 #endif 4916 #ifdef SYS_close_range 4917 if (!syscall(SYS_close_range, 3, MAX_FDS, 0)) 4918 return; 4919 #endif 4920 // Keeping a 9p transport pipe open will hang the proccess dead, 4921 // so close all opened file descriptors. 4922 // Also close all USB emulation descriptors to trigger exit from USB 4923 // event loop to collect coverage. 4924 for (int fd = 3; fd < MAX_FDS; fd++) 4925 close(fd); 4926 } 4927 #endif 4928 4929 #if SYZ_EXECUTOR || SYZ_FAULT 4930 #include <errno.h> 4931 4932 static const char* setup_fault() 4933 { 4934 int fd = open("/proc/self/make-it-fail", O_WRONLY); 4935 if (fd == -1) 4936 return "CONFIG_FAULT_INJECTION is not enabled"; 4937 close(fd); 4938 4939 fd = open("/proc/thread-self/fail-nth", O_WRONLY); 4940 if (fd == -1) 4941 return "kernel does not have systematic fault injection support"; 4942 close(fd); 4943 4944 static struct { 4945 const char* file; 4946 const char* val; 4947 bool fatal; 4948 } files[] = { 4949 {"/sys/kernel/debug/failslab/ignore-gfp-wait", "N", true}, 4950 // These are enabled by separate configs (e.g. CONFIG_FAIL_FUTEX) 4951 // and we did not check all of them in host.checkFaultInjection, so we ignore errors. 4952 {"/sys/kernel/debug/fail_futex/ignore-private", "N", false}, 4953 {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem", "N", false}, 4954 {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-wait", "N", false}, 4955 {"/sys/kernel/debug/fail_page_alloc/min-order", "0", false}, 4956 }; 4957 unsigned i; 4958 for (i = 0; i < sizeof(files) / sizeof(files[0]); i++) { 4959 if (!write_file(files[i].file, files[i].val)) { 4960 debug("failed to write %s: %d\n", files[i].file, errno); 4961 if (files[i].fatal) 4962 return "failed to write fault injection file"; 4963 } 4964 } 4965 return NULL; 4966 } 4967 #endif 4968 4969 #if SYZ_EXECUTOR || SYZ_LEAK 4970 #include <fcntl.h> 4971 #include <stdio.h> 4972 #include <string.h> 4973 #include <sys/stat.h> 4974 #include <sys/types.h> 4975 4976 #define KMEMLEAK_FILE "/sys/kernel/debug/kmemleak" 4977 4978 static const char* setup_leak() 4979 { 4980 if (!write_file(KMEMLEAK_FILE, "scan=off")) { 4981 if (errno == EBUSY) 4982 return "KMEMLEAK disabled: increase CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE" 4983 " or unset CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF"; 4984 return "failed to write(kmemleak, \"scan=off\")"; 4985 } 4986 // Flush boot leaks. 4987 if (!write_file(KMEMLEAK_FILE, "scan")) 4988 return "failed to write(kmemleak, \"scan\")"; 4989 sleep(5); // account for MSECS_MIN_AGE 4990 if (!write_file(KMEMLEAK_FILE, "scan")) 4991 return "failed to write(kmemleak, \"scan\")"; 4992 if (!write_file(KMEMLEAK_FILE, "clear")) 4993 return "failed to write(kmemleak, \"clear\")"; 4994 return NULL; 4995 } 4996 4997 #define SYZ_HAVE_LEAK_CHECK 1 4998 #if SYZ_EXECUTOR 4999 static void check_leaks(char** frames, int nframes) 5000 #else 5001 static void check_leaks(void) 5002 #endif 5003 { 5004 int fd = open(KMEMLEAK_FILE, O_RDWR); 5005 if (fd == -1) 5006 fail("failed to open(kmemleak)"); 5007 // KMEMLEAK has false positives. To mitigate most of them, it checksums 5008 // potentially leaked objects, and reports them only on the next scan 5009 // iff the checksum does not change. Because of that we do the following 5010 // intricate dance: 5011 // Scan, sleep, scan again. At this point we can get some leaks. 5012 // If there are leaks, we sleep and scan again, this can remove 5013 // false leaks. Then, read kmemleak again. If we get leaks now, then 5014 // hopefully these are true positives during the previous testing cycle. 5015 uint64 start = current_time_ms(); 5016 if (write(fd, "scan", 4) != 4) 5017 fail("failed to write(kmemleak, \"scan\")"); 5018 sleep(1); 5019 // Account for MSECS_MIN_AGE 5020 // (1 second less because scanning will take at least a second). 5021 while (current_time_ms() - start < 4 * 1000) 5022 sleep(1); 5023 if (write(fd, "scan", 4) != 4) 5024 fail("failed to write(kmemleak, \"scan\")"); 5025 static char buf[128 << 10]; 5026 ssize_t n = read(fd, buf, sizeof(buf) - 1); 5027 if (n < 0) 5028 fail("failed to read(kmemleak)"); 5029 int nleaks = 0; 5030 if (n != 0) { 5031 sleep(1); 5032 if (write(fd, "scan", 4) != 4) 5033 fail("failed to write(kmemleak, \"scan\")"); 5034 if (lseek(fd, 0, SEEK_SET) < 0) 5035 fail("failed to lseek(kmemleak)"); 5036 n = read(fd, buf, sizeof(buf) - 1); 5037 if (n < 0) 5038 fail("failed to read(kmemleak)"); 5039 buf[n] = 0; 5040 char* pos = buf; 5041 char* end = buf + n; 5042 while (pos < end) { 5043 char* next = strstr(pos + 1, "unreferenced object"); 5044 if (!next) 5045 next = end; 5046 char prev = *next; 5047 *next = 0; 5048 #if SYZ_EXECUTOR 5049 int f; 5050 for (f = 0; f < nframes; f++) { 5051 if (strstr(pos, frames[f])) 5052 break; 5053 } 5054 if (f != nframes) { 5055 *next = prev; 5056 pos = next; 5057 continue; 5058 } 5059 #endif 5060 // BUG in output should be recognized by manager. 5061 fprintf(stderr, "BUG: memory leak\n%s\n", pos); 5062 *next = prev; 5063 pos = next; 5064 nleaks++; 5065 } 5066 } 5067 if (write(fd, "clear", 5) != 5) 5068 fail("failed to write(kmemleak, \"clear\")"); 5069 close(fd); 5070 if (nleaks) 5071 doexit(1); 5072 } 5073 #endif 5074 5075 #if SYZ_EXECUTOR || SYZ_BINFMT_MISC 5076 #include <fcntl.h> 5077 #include <sys/mount.h> 5078 #include <sys/stat.h> 5079 #include <sys/types.h> 5080 5081 static const char* setup_binfmt_misc() 5082 { 5083 // EBUSY means it's already mounted here. 5084 if (mount(0, "/proc/sys/fs/binfmt_misc", "binfmt_misc", 0, 0) && errno != EBUSY) { 5085 debug("mount(binfmt_misc) failed: %d\n", errno); 5086 return NULL; 5087 } 5088 if (!write_file("/proc/sys/fs/binfmt_misc/register", ":syz0:M:0:\x01::./file0:") || 5089 !write_file("/proc/sys/fs/binfmt_misc/register", ":syz1:M:1:\x02::./file0:POC")) 5090 return "write(/proc/sys/fs/binfmt_misc/register) failed"; 5091 return NULL; 5092 } 5093 #endif 5094 5095 #if SYZ_EXECUTOR || SYZ_KCSAN 5096 static const char* setup_kcsan() 5097 { 5098 if (!write_file("/sys/kernel/debug/kcsan", "on")) 5099 return "write(/sys/kernel/debug/kcsan, on) failed"; 5100 return NULL; 5101 } 5102 #endif 5103 5104 #if SYZ_EXECUTOR || SYZ_USB 5105 static const char* setup_usb() 5106 { 5107 if (chmod("/dev/raw-gadget", 0666)) 5108 return "failed to chmod /dev/raw-gadget"; 5109 return NULL; 5110 } 5111 #endif 5112 5113 #if SYZ_EXECUTOR || SYZ_SYSCTL 5114 #include <errno.h> 5115 #include <signal.h> 5116 #include <stdio.h> 5117 #include <string.h> 5118 #include <sys/wait.h> 5119 5120 static void setup_sysctl() 5121 { 5122 // See ctrl-alt-del comment below. 5123 int cad_pid = fork(); 5124 if (cad_pid < 0) 5125 fail("fork failed"); 5126 if (cad_pid == 0) { 5127 for (;;) 5128 sleep(100); 5129 } 5130 char tmppid[32]; 5131 snprintf(tmppid, sizeof(tmppid), "%d", cad_pid); 5132 5133 // TODO: consider moving all sysctl's into CMDLINE config later. 5134 // Kernel has support for setting sysctl's via command line since 3db978d480e28 (v5.8). 5135 struct { 5136 const char* name; 5137 const char* data; 5138 } files[] = { 5139 #if GOARCH_amd64 || GOARCH_386 5140 // nmi_check_duration() prints "INFO: NMI handler took too long" on slow debug kernels. 5141 // It happens a lot in qemu, and the messages are frequently corrupted 5142 // (intermixed with other kernel output as they are printed from NMI) 5143 // and are not matched against the suppression in pkg/report. 5144 // This write prevents these messages from being printed. 5145 {"/sys/kernel/debug/x86/nmi_longest_ns", "10000000000"}, 5146 #endif 5147 {"/proc/sys/kernel/hung_task_check_interval_secs", "20"}, 5148 // bpf_jit_kallsyms and disabling bpf_jit_harden are required 5149 // for unwinding through bpf functions. 5150 {"/proc/sys/net/core/bpf_jit_kallsyms", "1"}, 5151 {"/proc/sys/net/core/bpf_jit_harden", "0"}, 5152 // This is to provide more useful info in crash reports. 5153 {"/proc/sys/kernel/kptr_restrict", "0"}, 5154 {"/proc/sys/kernel/softlockup_all_cpu_backtrace", "1"}, 5155 // This is to restrict effects of recursive exponential mounts, for details see 5156 // "mnt: Add a per mount namespace limit on the number of mounts" commit. 5157 {"/proc/sys/fs/mount-max", "100"}, 5158 // Dumping all tasks to console can take too long. 5159 {"/proc/sys/vm/oom_dump_tasks", "0"}, 5160 // Executor hits lots of SIGSEGVs, no point in logging them. 5161 {"/proc/sys/debug/exception-trace", "0"}, 5162 {"/proc/sys/kernel/printk", "7 4 1 3"}, 5163 // Faster gc (1 second) is intended to make tests more repeatable. 5164 {"/proc/sys/kernel/keys/gc_delay", "1"}, 5165 // We always want to prefer killing the allocating test process rather than somebody else 5166 // (sshd or another random test process). 5167 {"/proc/sys/vm/oom_kill_allocating_task", "1"}, 5168 // This blocks some of the ways the fuzzer can trigger a reboot. 5169 // ctrl-alt-del=0 tells kernel to signal cad_pid instead of rebooting. 5170 // We set cad_pid to a transient process pid ctrl-alt-del a no-op. 5171 // Note: we need to write a live process pid. 5172 // For context see: https://groups.google.com/g/syzkaller-bugs/c/WqOY4TiRnFg/m/6P9u8lWZAQAJ 5173 {"/proc/sys/kernel/ctrl-alt-del", "0"}, 5174 {"/proc/sys/kernel/cad_pid", tmppid}, 5175 5176 }; 5177 for (size_t i = 0; i < sizeof(files) / sizeof(files[0]); i++) { 5178 if (!write_file(files[i].name, files[i].data)) { 5179 debug("write to %s failed: %s\n", files[i].name, strerror(errno)); 5180 } 5181 } 5182 kill(cad_pid, SIGKILL); 5183 while (waitpid(cad_pid, NULL, 0) != cad_pid) 5184 ; 5185 } 5186 #endif 5187 5188 #if SYZ_EXECUTOR || SYZ_802154 5189 #include <net/if.h> 5190 #include <string.h> 5191 #include <sys/socket.h> 5192 #include <sys/types.h> 5193 5194 #define NL802154_CMD_SET_SHORT_ADDR 11 5195 #define NL802154_ATTR_IFINDEX 3 5196 #define NL802154_ATTR_SHORT_ADDR 10 5197 5198 static const char* setup_802154() 5199 { 5200 const char* error = NULL; 5201 int sock_generic = -1; 5202 int sock_route = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 5203 if (sock_route == -1) { 5204 error = "socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE) failed"; 5205 goto fail; 5206 } 5207 sock_generic = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 5208 if (sock_generic == -1) { 5209 error = "socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC) failed"; 5210 goto fail; 5211 } 5212 { 5213 int nl802154_family_id = netlink_query_family_id(&nlmsg, sock_generic, "nl802154", true); 5214 if (nl802154_family_id < 0) { 5215 error = "netlink_query_family_id failed"; 5216 goto fail; 5217 } 5218 5219 for (int i = 0; i < 2; i++) { 5220 // wpan0/1 are created by CONFIG_IEEE802154_HWSIM. 5221 // sys/linux/socket_ieee802154.txt knowns about these names and consts. 5222 char devname[] = "wpan0"; 5223 devname[strlen(devname) - 1] += i; 5224 uint64 hwaddr = 0xaaaaaaaaaaaa0002 + (i << 8); 5225 uint16 shortaddr = 0xaaa0 + i; 5226 int ifindex = if_nametoindex(devname); 5227 struct genlmsghdr genlhdr; 5228 memset(&genlhdr, 0, sizeof(genlhdr)); 5229 genlhdr.cmd = NL802154_CMD_SET_SHORT_ADDR; 5230 netlink_init(&nlmsg, nl802154_family_id, 0, &genlhdr, sizeof(genlhdr)); 5231 netlink_attr(&nlmsg, NL802154_ATTR_IFINDEX, &ifindex, sizeof(ifindex)); 5232 netlink_attr(&nlmsg, NL802154_ATTR_SHORT_ADDR, &shortaddr, sizeof(shortaddr)); 5233 if (netlink_send(&nlmsg, sock_generic) < 0) { 5234 error = "NL802154_CMD_SET_SHORT_ADDR failed"; 5235 goto fail; 5236 } 5237 netlink_device_change(&nlmsg, sock_route, devname, true, 0, &hwaddr, sizeof(hwaddr), 0); 5238 if (i == 0) { 5239 netlink_add_device_impl(&nlmsg, "lowpan", "lowpan0", false); 5240 netlink_done(&nlmsg); 5241 netlink_attr(&nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); 5242 if (netlink_send(&nlmsg, sock_route) < 0) { 5243 error = "netlink: adding device lowpan0 type lowpan link wpan0"; 5244 goto fail; 5245 } 5246 } 5247 } 5248 } 5249 fail: 5250 close(sock_route); 5251 close(sock_generic); 5252 return error; 5253 } 5254 #endif 5255 5256 #if GOARCH_s390x 5257 #include <sys/mman.h> 5258 // Ugly way to work around gcc's "error: function called through a non-compatible type". 5259 // Simply casting via (void*) inline does not work b/c gcc sees through a chain of casts. 5260 // The macro is used in generated C code. 5261 #define CAST(f) ({void* p = (void*)f; p; }) 5262 #endif 5263 5264 #if SYZ_EXECUTOR || __NR_syz_fuse_handle_req 5265 #include <fcntl.h> 5266 #include <stddef.h> 5267 #include <stdio.h> 5268 #include <sys/stat.h> 5269 #include <sys/types.h> 5270 5271 // From linux/fuse.h 5272 #define FUSE_MIN_READ_BUFFER 8192 5273 5274 // From linux/fuse.h 5275 enum fuse_opcode { 5276 FUSE_LOOKUP = 1, 5277 FUSE_FORGET = 2, // no reply 5278 FUSE_GETATTR = 3, 5279 FUSE_SETATTR = 4, 5280 FUSE_READLINK = 5, 5281 FUSE_SYMLINK = 6, 5282 FUSE_MKNOD = 8, 5283 FUSE_MKDIR = 9, 5284 FUSE_UNLINK = 10, 5285 FUSE_RMDIR = 11, 5286 FUSE_RENAME = 12, 5287 FUSE_LINK = 13, 5288 FUSE_OPEN = 14, 5289 FUSE_READ = 15, 5290 FUSE_WRITE = 16, 5291 FUSE_STATFS = 17, 5292 FUSE_RELEASE = 18, 5293 FUSE_FSYNC = 20, 5294 FUSE_SETXATTR = 21, 5295 FUSE_GETXATTR = 22, 5296 FUSE_LISTXATTR = 23, 5297 FUSE_REMOVEXATTR = 24, 5298 FUSE_FLUSH = 25, 5299 FUSE_INIT = 26, 5300 FUSE_OPENDIR = 27, 5301 FUSE_READDIR = 28, 5302 FUSE_RELEASEDIR = 29, 5303 FUSE_FSYNCDIR = 30, 5304 FUSE_GETLK = 31, 5305 FUSE_SETLK = 32, 5306 FUSE_SETLKW = 33, 5307 FUSE_ACCESS = 34, 5308 FUSE_CREATE = 35, 5309 FUSE_INTERRUPT = 36, 5310 FUSE_BMAP = 37, 5311 FUSE_DESTROY = 38, 5312 FUSE_IOCTL = 39, 5313 FUSE_POLL = 40, 5314 FUSE_NOTIFY_REPLY = 41, 5315 FUSE_BATCH_FORGET = 42, 5316 FUSE_FALLOCATE = 43, 5317 FUSE_READDIRPLUS = 44, 5318 FUSE_RENAME2 = 45, 5319 FUSE_LSEEK = 46, 5320 FUSE_COPY_FILE_RANGE = 47, 5321 FUSE_SETUPMAPPING = 48, 5322 FUSE_REMOVEMAPPING = 49, 5323 FUSE_SYNCFS = 50, 5324 FUSE_TMPFILE = 51, 5325 FUSE_STATX = 52, 5326 5327 // CUSE specific operations 5328 CUSE_INIT = 4096, 5329 5330 // Reserved opcodes: helpful to detect structure endian-ness 5331 CUSE_INIT_BSWAP_RESERVED = 1048576, // CUSE_INIT << 8 5332 FUSE_INIT_BSWAP_RESERVED = 436207616, // FUSE_INIT << 24 5333 }; 5334 5335 // From linux/fuse.h 5336 struct fuse_in_header { 5337 uint32 len; 5338 uint32 opcode; 5339 uint64 unique; 5340 uint64 nodeid; 5341 uint32 uid; 5342 uint32 gid; 5343 uint32 pid; 5344 uint32 padding; 5345 }; 5346 5347 // From linux/fuse.h 5348 struct fuse_out_header { 5349 uint32 len; 5350 // This is actually a int32_t but *_t variants fail to compile inside 5351 // the executor (it appends an additional _t for some reason) and int32 5352 // does not exist. Since we don't touch this field, defining it as 5353 // unsigned should not cause any problems. 5354 uint32 error; 5355 uint64 unique; 5356 }; 5357 5358 // Struct shared between syz_fuse_handle_req() and the fuzzer. Used to provide 5359 // a fuzzed response for each request type. 5360 struct syz_fuse_req_out { 5361 struct fuse_out_header* init; 5362 struct fuse_out_header* lseek; 5363 struct fuse_out_header* bmap; 5364 struct fuse_out_header* poll; 5365 struct fuse_out_header* getxattr; 5366 struct fuse_out_header* lk; 5367 struct fuse_out_header* statfs; 5368 struct fuse_out_header* write; 5369 struct fuse_out_header* read; 5370 struct fuse_out_header* open; 5371 struct fuse_out_header* attr; 5372 struct fuse_out_header* entry; 5373 struct fuse_out_header* dirent; 5374 struct fuse_out_header* direntplus; 5375 struct fuse_out_header* create_open; 5376 struct fuse_out_header* ioctl; 5377 struct fuse_out_header* statx; 5378 }; 5379 5380 // Link the reponse to the request and send it to /dev/fuse. 5381 static int fuse_send_response(int fd, 5382 const struct fuse_in_header* in_hdr, 5383 struct fuse_out_header* out_hdr) 5384 { 5385 if (!out_hdr) { 5386 debug("fuse_send_response: received a NULL out_hdr\n"); 5387 return -1; 5388 } 5389 5390 out_hdr->unique = in_hdr->unique; 5391 if (write(fd, out_hdr, out_hdr->len) == -1) { 5392 debug("fuse_send_response > write failed: %d\n", errno); 5393 return -1; 5394 } 5395 5396 return 0; 5397 } 5398 5399 // This function reads a request from /dev/fuse and tries to pick the correct 5400 // response from the input struct syz_fuse_req_out (a3). Responses are still 5401 // generated by the fuzzer. 5402 static volatile long syz_fuse_handle_req(volatile long a0, // /dev/fuse fd. 5403 volatile long a1, // Read buffer. 5404 volatile long a2, // Buffer len. 5405 volatile long a3) // syz_fuse_req_out. 5406 { 5407 struct syz_fuse_req_out* req_out = (struct syz_fuse_req_out*)a3; 5408 struct fuse_out_header* out_hdr = NULL; 5409 char* buf = (char*)a1; 5410 int buf_len = (int)a2; 5411 int fd = (int)a0; 5412 5413 if (!req_out) { 5414 debug("syz_fuse_handle_req: received a NULL syz_fuse_req_out\n"); 5415 return -1; 5416 } 5417 if (buf_len < FUSE_MIN_READ_BUFFER) { 5418 debug("FUSE requires the read buffer to be at least %u\n", FUSE_MIN_READ_BUFFER); 5419 return -1; 5420 } 5421 5422 int ret = read(fd, buf, buf_len); 5423 if (ret == -1) { 5424 debug("syz_fuse_handle_req > read failed: %d\n", errno); 5425 return -1; 5426 } 5427 // Safe to do because ret > 0 (!= -1) and < FUSE_MIN_READ_BUFFER (= 8192). 5428 if ((size_t)ret < sizeof(struct fuse_in_header)) { 5429 debug("syz_fuse_handle_req: received a truncated FUSE header\n"); 5430 return -1; 5431 } 5432 5433 const struct fuse_in_header* in_hdr = (const struct fuse_in_header*)buf; 5434 debug("syz_fuse_handle_req: received opcode %d\n", in_hdr->opcode); 5435 if (in_hdr->len > (uint32)ret) { 5436 debug("syz_fuse_handle_req: received a truncated message\n"); 5437 return -1; 5438 } 5439 5440 switch (in_hdr->opcode) { 5441 case FUSE_GETATTR: 5442 case FUSE_SETATTR: 5443 out_hdr = req_out->attr; 5444 break; 5445 case FUSE_LOOKUP: 5446 case FUSE_SYMLINK: 5447 case FUSE_LINK: 5448 case FUSE_MKNOD: 5449 case FUSE_MKDIR: 5450 out_hdr = req_out->entry; 5451 break; 5452 case FUSE_OPEN: 5453 case FUSE_OPENDIR: 5454 out_hdr = req_out->open; 5455 break; 5456 case FUSE_STATFS: 5457 out_hdr = req_out->statfs; 5458 break; 5459 case FUSE_RMDIR: 5460 case FUSE_RENAME: 5461 case FUSE_RENAME2: 5462 case FUSE_FALLOCATE: 5463 case FUSE_SETXATTR: 5464 case FUSE_REMOVEXATTR: 5465 case FUSE_FSYNCDIR: 5466 case FUSE_FSYNC: 5467 case FUSE_SETLKW: 5468 case FUSE_SETLK: 5469 case FUSE_ACCESS: 5470 case FUSE_FLUSH: 5471 case FUSE_RELEASE: 5472 case FUSE_RELEASEDIR: 5473 case FUSE_UNLINK: 5474 case FUSE_DESTROY: 5475 // These opcodes do not have any reply data. Hence, we pick 5476 // another response and only use the shared header. 5477 out_hdr = req_out->init; 5478 if (!out_hdr) { 5479 debug("syz_fuse_handle_req: received a NULL out_hdr\n"); 5480 return -1; 5481 } 5482 out_hdr->len = sizeof(struct fuse_out_header); 5483 break; 5484 case FUSE_READ: 5485 out_hdr = req_out->read; 5486 break; 5487 case FUSE_READDIR: 5488 out_hdr = req_out->dirent; 5489 break; 5490 case FUSE_READDIRPLUS: 5491 out_hdr = req_out->direntplus; 5492 break; 5493 case FUSE_INIT: 5494 out_hdr = req_out->init; 5495 break; 5496 case FUSE_LSEEK: 5497 out_hdr = req_out->lseek; 5498 break; 5499 case FUSE_GETLK: 5500 out_hdr = req_out->lk; 5501 break; 5502 case FUSE_BMAP: 5503 out_hdr = req_out->bmap; 5504 break; 5505 case FUSE_POLL: 5506 out_hdr = req_out->poll; 5507 break; 5508 case FUSE_GETXATTR: 5509 case FUSE_LISTXATTR: 5510 out_hdr = req_out->getxattr; 5511 break; 5512 case FUSE_WRITE: 5513 case FUSE_COPY_FILE_RANGE: 5514 out_hdr = req_out->write; 5515 break; 5516 case FUSE_FORGET: 5517 case FUSE_BATCH_FORGET: 5518 // FUSE_FORGET and FUSE_BATCH_FORGET expect no reply. 5519 return 0; 5520 case FUSE_CREATE: 5521 out_hdr = req_out->create_open; 5522 break; 5523 case FUSE_IOCTL: 5524 out_hdr = req_out->ioctl; 5525 break; 5526 case FUSE_STATX: 5527 out_hdr = req_out->statx; 5528 break; 5529 default: 5530 debug("syz_fuse_handle_req: unknown FUSE opcode\n"); 5531 return -1; 5532 } 5533 5534 return fuse_send_response(fd, in_hdr, out_hdr); 5535 } 5536 #endif 5537 5538 #if SYZ_EXECUTOR || __NR_syz_80211_inject_frame 5539 #include <errno.h> 5540 #include <linux/genetlink.h> 5541 #include <linux/if_ether.h> 5542 #include <linux/nl80211.h> 5543 #include <net/if.h> 5544 #include <sys/ioctl.h> 5545 5546 // This pseudo syscall performs 802.11 frame injection. 5547 // 5548 // Its current implementation performs the injection by means of mac80211_hwsim. 5549 // The procedure consists of the following steps: 5550 // 1. Open a netlink socket 5551 // 2. Register as an application responsible for wireless medium simulation by executing 5552 // HWSIM_CMD_REGISTER. This is a preq-requisite for the following step. After HWSIM_CMD_REGISTER 5553 // is executed, mac80211_hwsim stops simulating a perfect medium. 5554 // It is also important to note that this command registers a specific socket, not a netlink port. 5555 // 3. Inject a frame to the required interface by executing HWSIM_CMD_FRAME. 5556 // 4. Close the socket. mac80211_hwsim will detect this and return to perfect medium simulation. 5557 // 5558 // Note that we cannot (should not) open a socket, register it once and then use it for frame injection 5559 // throughout the lifetime of a proc. When some socket is registered, mac80211_hwsim does not broadcast 5560 // frames to all interfaces itself. As we do not perform this activity either, a permanently registered 5561 // socket will disrupt normal network operation. 5562 5563 #define HWSIM_ATTR_RX_RATE 5 5564 #define HWSIM_ATTR_SIGNAL 6 5565 #define HWSIM_ATTR_ADDR_RECEIVER 1 5566 #define HWSIM_ATTR_FRAME 3 5567 5568 #define WIFI_MAX_INJECT_LEN 2048 5569 5570 static int hwsim_register_socket(struct nlmsg* nlmsg, int sock, int hwsim_family) 5571 { 5572 struct genlmsghdr genlhdr; 5573 memset(&genlhdr, 0, sizeof(genlhdr)); 5574 genlhdr.cmd = HWSIM_CMD_REGISTER; 5575 netlink_init(nlmsg, hwsim_family, 0, &genlhdr, sizeof(genlhdr)); 5576 int err = netlink_send_ext(nlmsg, sock, 0, NULL, false); 5577 if (err < 0) { 5578 debug("hwsim_register_device failed: %s\n", strerror(errno)); 5579 } 5580 return err; 5581 } 5582 5583 static int hwsim_inject_frame(struct nlmsg* nlmsg, int sock, int hwsim_family, uint8* mac_addr, uint8* data, int len) 5584 { 5585 struct genlmsghdr genlhdr; 5586 uint32 rx_rate = WIFI_DEFAULT_RX_RATE; 5587 uint32 signal = WIFI_DEFAULT_SIGNAL; 5588 5589 memset(&genlhdr, 0, sizeof(genlhdr)); 5590 genlhdr.cmd = HWSIM_CMD_FRAME; 5591 netlink_init(nlmsg, hwsim_family, 0, &genlhdr, sizeof(genlhdr)); 5592 netlink_attr(nlmsg, HWSIM_ATTR_RX_RATE, &rx_rate, sizeof(rx_rate)); 5593 netlink_attr(nlmsg, HWSIM_ATTR_SIGNAL, &signal, sizeof(signal)); 5594 netlink_attr(nlmsg, HWSIM_ATTR_ADDR_RECEIVER, mac_addr, ETH_ALEN); 5595 netlink_attr(nlmsg, HWSIM_ATTR_FRAME, data, len); 5596 int err = netlink_send_ext(nlmsg, sock, 0, NULL, false); 5597 if (err < 0) { 5598 debug("hwsim_inject_frame failed: %s\n", strerror(errno)); 5599 } 5600 return err; 5601 } 5602 5603 static long syz_80211_inject_frame(volatile long a0, volatile long a1, volatile long a2) 5604 { 5605 uint8* mac_addr = (uint8*)a0; 5606 uint8* buf = (uint8*)a1; 5607 int buf_len = (int)a2; 5608 struct nlmsg tmp_msg; 5609 5610 if (buf_len < 0 || buf_len > WIFI_MAX_INJECT_LEN) { 5611 debug("syz_80211_inject_frame: wrong buffer size %d\n", buf_len); 5612 return -1; 5613 } 5614 5615 int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 5616 if (sock < 0) { 5617 debug("syz_80211_inject_frame: socket creation failed, errno %d\n", errno); 5618 return -1; 5619 } 5620 5621 int hwsim_family_id = netlink_query_family_id(&tmp_msg, sock, "MAC80211_HWSIM", false); 5622 if (hwsim_family_id < 0) { 5623 debug("syz_80211_inject_frame: failed to query family id\n"); 5624 close(sock); 5625 return -1; 5626 } 5627 int ret = hwsim_register_socket(&tmp_msg, sock, hwsim_family_id); 5628 if (ret < 0) { 5629 debug("syz_80211_inject_frame: failed to register socket, ret %d\n", ret); 5630 close(sock); 5631 return -1; 5632 } 5633 5634 ret = hwsim_inject_frame(&tmp_msg, sock, hwsim_family_id, mac_addr, buf, buf_len); 5635 close(sock); 5636 if (ret < 0) { 5637 debug("syz_80211_inject_frame: failed to inject message, ret %d\n", ret); 5638 return -1; 5639 } 5640 5641 return 0; 5642 } 5643 5644 #endif 5645 5646 #if SYZ_EXECUTOR || __NR_syz_80211_join_ibss 5647 5648 #define WIFI_MAX_SSID_LEN 32 5649 5650 #define WIFI_JOIN_IBSS_NO_SCAN 0 5651 #define WIFI_JOIN_IBSS_BG_SCAN 1 5652 #define WIFI_JOIN_IBSS_BG_NO_SCAN 2 5653 5654 static long syz_80211_join_ibss(volatile long a0, volatile long a1, volatile long a2, volatile long a3) 5655 { 5656 char* interface = (char*)a0; 5657 uint8* ssid = (uint8*)a1; 5658 int ssid_len = (int)a2; 5659 int mode = (int)a3; // This parameter essentially determines whether it will perform a scan 5660 5661 struct nlmsg tmp_msg; 5662 uint8 bssid[ETH_ALEN] = WIFI_IBSS_BSSID; 5663 5664 if (ssid_len < 0 || ssid_len > WIFI_MAX_SSID_LEN) { 5665 debug("syz_80211_join_ibss: invalid ssid len %d\n", ssid_len); 5666 return -1; 5667 } 5668 5669 if (mode < 0 || mode > WIFI_JOIN_IBSS_BG_NO_SCAN) { 5670 debug("syz_80211_join_ibss: invalid mode %d\n", mode); 5671 return -1; 5672 } 5673 5674 int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 5675 if (sock < 0) { 5676 debug("syz_80211_join_ibss: socket creation failed, errno %d\n", errno); 5677 return -1; 5678 } 5679 5680 int nl80211_family_id = netlink_query_family_id(&tmp_msg, sock, "nl80211", false); 5681 if (nl80211_family_id < 0) { 5682 debug("syz_80211_join_ibss: netlink_query_family_id failed\n"); 5683 close(sock); 5684 return -1; 5685 } 5686 struct join_ibss_props ibss_props = { 5687 .wiphy_freq = WIFI_DEFAULT_FREQUENCY, 5688 .wiphy_freq_fixed = (mode == WIFI_JOIN_IBSS_NO_SCAN || mode == WIFI_JOIN_IBSS_BG_NO_SCAN), 5689 .mac = bssid, 5690 .ssid = ssid, 5691 .ssid_len = ssid_len}; 5692 5693 int ret = nl80211_setup_ibss_interface(&tmp_msg, sock, nl80211_family_id, interface, &ibss_props, false); 5694 close(sock); 5695 if (ret < 0) { 5696 debug("syz_80211_join_ibss: failed set up IBSS network for %.32s\n", interface); 5697 return -1; 5698 } 5699 5700 if (mode == WIFI_JOIN_IBSS_NO_SCAN) { 5701 ret = await_ifla_operstate(&tmp_msg, interface, IF_OPER_UP, false); 5702 if (ret < 0) { 5703 debug("syz_80211_join_ibss: await_ifla_operstate failed for %.32s, ret %d\n", interface, ret); 5704 return -1; 5705 } 5706 } 5707 5708 return 0; 5709 } 5710 5711 #endif 5712 5713 #if SYZ_EXECUTOR || __NR_syz_clone || __NR_syz_clone3 5714 #if SYZ_EXECUTOR 5715 // The slowdown multiplier is already taken into account. 5716 #define USLEEP_FORKED_CHILD (3 * syscall_timeout_ms * 1000) 5717 #else 5718 #define USLEEP_FORKED_CHILD (3 * /*{{{BASE_CALL_TIMEOUT_MS}}}*/ *1000) 5719 #endif 5720 5721 static long handle_clone_ret(long ret) 5722 { 5723 if (ret != 0) { 5724 #if SYZ_EXECUTOR || SYZ_HANDLE_SEGV 5725 __atomic_store_n(&clone_ongoing, 0, __ATOMIC_RELAXED); 5726 #endif 5727 return ret; 5728 } 5729 // Exit if we're in the child process - not all kernels provide the proper means 5730 // to prevent fork-bombs. 5731 // But first sleep for some time. This will hopefully foster IPC fuzzing. 5732 usleep(USLEEP_FORKED_CHILD); 5733 // Note that exit_group is a bad choice here because if we created just a thread, then 5734 // the whole process will be killed. A plain exit will work fine in any case. 5735 syscall(__NR_exit, 0); 5736 while (1) { 5737 } 5738 } 5739 #endif 5740 5741 #if SYZ_EXECUTOR || __NR_syz_clone 5742 #include <sched.h> 5743 5744 // syz_clone is mostly needed on kernels which do not suport clone3. 5745 static long syz_clone(volatile long flags, volatile long stack, volatile long stack_len, 5746 volatile long ptid, volatile long ctid, volatile long tls) 5747 { 5748 // ABI requires 16-byte stack alignment. 5749 long sp = (stack + stack_len) & ~15; 5750 #if SYZ_EXECUTOR || SYZ_HANDLE_SEGV 5751 __atomic_store_n(&clone_ongoing, 1, __ATOMIC_RELAXED); 5752 #endif 5753 // Clear the CLONE_VM flag. Otherwise it'll very likely corrupt syz-executor. 5754 long ret = (long)syscall(__NR_clone, flags & ~CLONE_VM, sp, ptid, ctid, tls); 5755 return handle_clone_ret(ret); 5756 } 5757 #endif 5758 5759 #if SYZ_EXECUTOR || __NR_syz_clone3 5760 #include <linux/sched.h> 5761 #include <sched.h> 5762 5763 #define MAX_CLONE_ARGS_BYTES 256 5764 static long syz_clone3(volatile long a0, volatile long a1) 5765 { 5766 unsigned long copy_size = a1; 5767 if (copy_size < sizeof(uint64) || copy_size > MAX_CLONE_ARGS_BYTES) 5768 return -1; 5769 // The structure may have different sizes on different kernel versions, so copy it as raw bytes. 5770 char clone_args[MAX_CLONE_ARGS_BYTES]; 5771 memcpy(&clone_args, (void*)a0, copy_size); 5772 5773 // As in syz_clone, clear the CLONE_VM flag. Flags are in the first 8-byte integer field. 5774 uint64* flags = (uint64*)&clone_args; 5775 *flags &= ~CLONE_VM; 5776 #if SYZ_EXECUTOR || SYZ_HANDLE_SEGV 5777 __atomic_store_n(&clone_ongoing, 1, __ATOMIC_RELAXED); 5778 #endif 5779 return handle_clone_ret((long)syscall(__NR_clone3, &clone_args, copy_size)); 5780 } 5781 5782 #endif 5783 5784 #if SYZ_EXECUTOR || __NR_syz_pkey_set 5785 #include <errno.h> 5786 #define RESERVED_PKEY 15 5787 // syz_pkey_set(key pkey, val flags[pkey_flags]) 5788 static long syz_pkey_set(volatile long pkey, volatile long val) 5789 { 5790 #if GOARCH_amd64 || GOARCH_386 5791 if (pkey == RESERVED_PKEY) { 5792 errno = EINVAL; 5793 return -1; 5794 } 5795 uint32 eax = 0; 5796 uint32 ecx = 0; 5797 asm volatile("rdpkru" 5798 : "=a"(eax) 5799 : "c"(ecx) 5800 : "edx"); 5801 // PKRU register contains 2 bits per key. 5802 // Max number of keys is 16. 5803 // Clear old bits for the key: 5804 eax &= ~(3 << ((pkey % 16) * 2)); 5805 // Set new bits for the key: 5806 eax |= (val & 3) << ((pkey % 16) * 2); 5807 uint32 edx = 0; 5808 asm volatile("wrpkru" ::"a"(eax), "c"(ecx), "d"(edx)); 5809 #endif 5810 return 0; 5811 } 5812 #endif 5813 5814 #if SYZ_EXECUTOR || SYZ_SWAP 5815 #include <fcntl.h> 5816 #include <linux/falloc.h> 5817 #include <stdio.h> 5818 #include <string.h> 5819 #include <sys/stat.h> 5820 #include <sys/swap.h> 5821 #include <sys/types.h> 5822 5823 #define SWAP_FILE "./swap-file" 5824 #define SWAP_FILE_SIZE (128 * 1000 * 1000) // 128 MB. 5825 5826 static const char* setup_swap() 5827 { 5828 // The call must be idempotent, so first disable swap and remove the swap file. 5829 swapoff(SWAP_FILE); 5830 unlink(SWAP_FILE); 5831 // Zero-fill the file. 5832 int fd = open(SWAP_FILE, O_CREAT | O_WRONLY | O_CLOEXEC, 0600); 5833 if (fd == -1) 5834 return "swap file open failed"; 5835 // We cannot do ftruncate -- swapon complains about this. Do fallocate instead. 5836 fallocate(fd, FALLOC_FL_ZERO_RANGE, 0, SWAP_FILE_SIZE); 5837 close(fd); 5838 // Set up the swap file. 5839 char cmdline[64]; 5840 sprintf(cmdline, "mkswap %s", SWAP_FILE); 5841 if (runcmdline(cmdline)) 5842 return "mkswap failed"; 5843 if (swapon(SWAP_FILE, SWAP_FLAG_PREFER) == 1) 5844 return "swapon failed"; 5845 return NULL; 5846 } 5847 #endif 5848 5849 #if SYZ_EXECUTOR || __NR_syz_pidfd_open 5850 #include <sys/syscall.h> 5851 5852 // TODO: long-term we should improve our sandboxing rules since there are also 5853 // many other opportunities for a fuzzer process to access what it shouldn't. 5854 // Here we only shut down one of the recently discovered ways. 5855 static long syz_pidfd_open(volatile long pid, volatile long flags) 5856 { 5857 if (pid == 1) { 5858 // Under a PID namespace, pid=1 is the parent process. 5859 // We don't want a forked child to mangle parent syz-executor's fds. 5860 pid = 0; 5861 } 5862 return syscall(__NR_pidfd_open, pid, flags); 5863 } 5864 5865 #endif 5866 5867 #if SYZ_EXECUTOR || __NR_syz_kfuzztest_run 5868 5869 #include <fcntl.h> 5870 #include <stdint.h> 5871 #include <stdio.h> 5872 #include <stdlib.h> 5873 #include <string.h> 5874 #include <sys/stat.h> 5875 #include <sys/types.h> 5876 #include <unistd.h> 5877 5878 static long syz_kfuzztest_run(volatile long test_name_ptr, volatile long input_data, 5879 volatile long input_data_size, volatile long buffer) 5880 { 5881 const char* test_name = (const char*)test_name_ptr; 5882 if (!test_name) { 5883 debug("syz_kfuzztest_run: test name was NULL\n"); 5884 return -1; 5885 } 5886 if (!buffer) { 5887 debug("syz_kfuzztest_run: buffer was NULL\n"); 5888 return -1; 5889 } 5890 5891 char buf[256]; 5892 int ret = snprintf(buf, sizeof(buf), "/sys/kernel/debug/kfuzztest/%s/input", test_name); 5893 if (ret < 0 || (unsigned long)ret >= sizeof(buf)) { 5894 debug("syz_kfuzztest_run: constructed path is too long or snprintf failed\n"); 5895 return -1; 5896 } 5897 5898 int fd = openat(AT_FDCWD, buf, O_WRONLY, 0); 5899 if (fd < 0) { 5900 debug("syz_kfuzztest_run: failed to open %s\n", buf); 5901 return -1; 5902 } 5903 5904 ssize_t bytes_written = write(fd, (void*)buffer, (size_t)input_data_size); 5905 if (bytes_written != input_data_size) { 5906 debug("syz_kfuzztest_run: failed to write to %s, reason: %s\n", buf, strerror(errno)); 5907 close(fd); 5908 return -1; 5909 } 5910 5911 if (close(fd) != 0) { 5912 debug("syz_kfuzztest_run: failed to close file\n"); 5913 return -1; 5914 } 5915 5916 return 0; 5917 } 5918 5919 #endif