github.com/google/syzkaller@v0.0.0-20240517125934-c0f1611a36d6/executor/common_linux.h (about) 1 // Copyright 2016 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 // This file is shared between executor and csource package. 5 6 #include <stdlib.h> 7 #include <sys/syscall.h> 8 #include <sys/types.h> 9 #include <unistd.h> 10 11 #if SYZ_EXECUTOR 12 const int kExtraCoverSize = 256 << 10; 13 struct cover_t; 14 static void cover_reset(cover_t* cov); 15 #endif 16 17 #if SYZ_EXECUTOR || SYZ_THREADED 18 #include <linux/futex.h> 19 #include <pthread.h> 20 21 typedef struct { 22 int state; 23 } event_t; 24 25 static void event_init(event_t* ev) 26 { 27 ev->state = 0; 28 } 29 30 static void event_reset(event_t* ev) 31 { 32 ev->state = 0; 33 } 34 35 static void event_set(event_t* ev) 36 { 37 if (ev->state) 38 exitf("event already set"); 39 __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE); 40 syscall(SYS_futex, &ev->state, FUTEX_WAKE | FUTEX_PRIVATE_FLAG, 1000000); 41 } 42 43 static void event_wait(event_t* ev) 44 { 45 while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE)) 46 syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, 0); 47 } 48 49 static int event_isset(event_t* ev) 50 { 51 return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE); 52 } 53 54 static int event_timedwait(event_t* ev, uint64 timeout) 55 { 56 uint64 start = current_time_ms(); 57 uint64 now = start; 58 for (;;) { 59 uint64 remain = timeout - (now - start); 60 struct timespec ts; 61 ts.tv_sec = remain / 1000; 62 ts.tv_nsec = (remain % 1000) * 1000 * 1000; 63 syscall(SYS_futex, &ev->state, FUTEX_WAIT | FUTEX_PRIVATE_FLAG, 0, &ts); 64 if (__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE)) 65 return 1; 66 now = current_time_ms(); 67 if (now - start > timeout) 68 return 0; 69 } 70 } 71 #endif 72 73 #if SYZ_EXECUTOR || SYZ_REPEAT || SYZ_NET_INJECTION || SYZ_FAULT || SYZ_SANDBOX_NONE || \ 74 SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID || \ 75 SYZ_FAULT || SYZ_LEAK || SYZ_BINFMT_MISC || SYZ_SYSCTL || \ 76 ((__NR_syz_usb_connect || __NR_syz_usb_connect_ath9k) && USB_DEBUG) || \ 77 __NR_syz_usbip_server_init 78 #include <errno.h> 79 #include <fcntl.h> 80 #include <stdarg.h> 81 #include <stdbool.h> 82 #include <string.h> 83 #include <sys/stat.h> 84 #include <sys/types.h> 85 86 static bool write_file(const char* file, const char* what, ...) 87 { 88 char buf[1024]; 89 va_list args; 90 va_start(args, what); 91 vsnprintf(buf, sizeof(buf), what, args); 92 va_end(args); 93 buf[sizeof(buf) - 1] = 0; 94 int len = strlen(buf); 95 96 int fd = open(file, O_WRONLY | O_CLOEXEC); 97 if (fd == -1) 98 return false; 99 if (write(fd, buf, len) != len) { 100 int err = errno; 101 close(fd); 102 debug("write(%s) failed: %d\n", file, err); 103 errno = err; 104 return false; 105 } 106 close(fd); 107 return true; 108 } 109 #endif 110 111 #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI || SYZ_WIFI || SYZ_802154 || \ 112 __NR_syz_genetlink_get_family_id || __NR_syz_80211_inject_frame || __NR_syz_80211_join_ibss || SYZ_NIC_VF 113 #include <arpa/inet.h> 114 #include <errno.h> 115 #include <net/if.h> 116 #include <netinet/in.h> 117 #include <stdbool.h> 118 #include <string.h> 119 #include <sys/socket.h> 120 #include <sys/types.h> 121 122 #include <linux/genetlink.h> 123 #include <linux/if_addr.h> 124 #include <linux/if_link.h> 125 #include <linux/in6.h> 126 #include <linux/neighbour.h> 127 #include <linux/net.h> 128 #include <linux/netlink.h> 129 #include <linux/rtnetlink.h> 130 #include <linux/veth.h> 131 132 struct nlmsg { 133 char* pos; 134 int nesting; 135 struct nlattr* nested[8]; 136 char buf[4096]; 137 }; 138 139 static void netlink_init(struct nlmsg* nlmsg, int typ, int flags, 140 const void* data, int size) 141 { 142 memset(nlmsg, 0, sizeof(*nlmsg)); 143 struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf; 144 hdr->nlmsg_type = typ; 145 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | flags; 146 memcpy(hdr + 1, data, size); 147 nlmsg->pos = (char*)(hdr + 1) + NLMSG_ALIGN(size); 148 } 149 150 static void netlink_attr(struct nlmsg* nlmsg, int typ, 151 const void* data, int size) 152 { 153 struct nlattr* attr = (struct nlattr*)nlmsg->pos; 154 attr->nla_len = sizeof(*attr) + size; 155 attr->nla_type = typ; 156 if (size > 0) 157 memcpy(attr + 1, data, size); 158 nlmsg->pos += NLMSG_ALIGN(attr->nla_len); 159 } 160 161 #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_802154 162 static void netlink_nest(struct nlmsg* nlmsg, int typ) 163 { 164 struct nlattr* attr = (struct nlattr*)nlmsg->pos; 165 attr->nla_type = typ; 166 nlmsg->pos += sizeof(*attr); 167 nlmsg->nested[nlmsg->nesting++] = attr; 168 } 169 170 static void netlink_done(struct nlmsg* nlmsg) 171 { 172 struct nlattr* attr = nlmsg->nested[--nlmsg->nesting]; 173 attr->nla_len = nlmsg->pos - (char*)attr; 174 } 175 176 #if SYZ_EXECUTOR || SYZ_NIC_VF 177 #include <ifaddrs.h> 178 #include <linux/ethtool.h> 179 #include <linux/sockios.h> 180 #include <sys/ioctl.h> 181 182 struct vf_intf { 183 char pass_thru_intf[IFNAMSIZ]; 184 int ppid; // used by Child 185 }; 186 187 static struct vf_intf vf_intf; 188 189 static void find_vf_interface(void) 190 { 191 #if SYZ_EXECUTOR 192 if (!flag_nic_vf) 193 return; 194 #endif 195 struct ifaddrs* addresses = NULL; 196 int pid = getpid(); 197 int ret = 0; 198 199 memset(&vf_intf, 0, sizeof(struct vf_intf)); 200 201 debug("Checking for VF pass-thru interface.\n"); 202 if (getifaddrs(&addresses) == -1) { 203 debug("%s: getifaddrs() failed.\n", __func__); 204 return; 205 } 206 207 int fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP); 208 209 if (fd < 0) { 210 debug("%s: socket() failed.\n", __func__); 211 return; 212 } 213 struct ifreq ifr; 214 struct ethtool_drvinfo drvinfo; 215 struct ifaddrs* address = addresses; 216 217 while (address) { 218 debug("ifa_name: %s\n", address->ifa_name); 219 memset(&ifr, 0, sizeof(struct ifreq)); 220 strcpy(ifr.ifr_name, address->ifa_name); 221 memset(&drvinfo, 0, sizeof(struct ethtool_drvinfo)); 222 drvinfo.cmd = ETHTOOL_GDRVINFO; 223 ifr.ifr_data = (caddr_t)&drvinfo; 224 ret = ioctl(fd, SIOCETHTOOL, &ifr); 225 226 if (ret < 0) { 227 debug("%s: ioctl() failed.\n", __func__); 228 } else if (strlen(drvinfo.bus_info)) { 229 debug("bus_info: %s, strlen(drvinfo.bus_info)=%zu\n", 230 drvinfo.bus_info, strlen(drvinfo.bus_info)); 231 if (strcmp(drvinfo.bus_info, "0000:00:11.0") == 0) { 232 if (strlen(address->ifa_name) < IFNAMSIZ) { 233 strncpy(vf_intf.pass_thru_intf, 234 address->ifa_name, IFNAMSIZ); 235 vf_intf.ppid = pid; 236 } else { 237 debug("%s: %d strlen(%s) >= IFNAMSIZ.\n", 238 __func__, pid, address->ifa_name); 239 } 240 break; 241 } 242 } 243 address = address->ifa_next; 244 } 245 freeifaddrs(addresses); 246 if (!vf_intf.ppid) { 247 memset(&vf_intf, 0, sizeof(struct vf_intf)); 248 debug("%s: %d could not find VF pass-thru interface.\n", __func__, pid); 249 return; 250 } 251 debug("%s: %d found VF pass-thru interface %s\n", 252 __func__, pid, vf_intf.pass_thru_intf); 253 } 254 #endif // SYZ_NIC_VF 255 256 #endif 257 258 static int netlink_send_ext(struct nlmsg* nlmsg, int sock, 259 uint16 reply_type, int* reply_len, bool dofail) 260 { 261 #if SYZ_EXECUTOR 262 if (in_execute_one && dofail) { 263 // We can expect different sorts of breakages during fuzzing, 264 // we should not kill the whole process because of them. 265 failmsg("invalid netlink_send_ext arguments", "dofail is true during syscall execution"); 266 } 267 #endif 268 if (nlmsg->pos > nlmsg->buf + sizeof(nlmsg->buf) || nlmsg->nesting) 269 fail("nlmsg overflow/bad nesting"); 270 struct nlmsghdr* hdr = (struct nlmsghdr*)nlmsg->buf; 271 hdr->nlmsg_len = nlmsg->pos - nlmsg->buf; 272 struct sockaddr_nl addr; 273 memset(&addr, 0, sizeof(addr)); 274 addr.nl_family = AF_NETLINK; 275 ssize_t n = sendto(sock, nlmsg->buf, hdr->nlmsg_len, 0, (struct sockaddr*)&addr, sizeof(addr)); 276 if (n != (ssize_t)hdr->nlmsg_len) { 277 if (dofail) 278 failmsg("netlink_send_ext: short netlink write", "wrote=%zd, want=%d", n, hdr->nlmsg_len); 279 debug("netlink_send_ext: short netlink write: %zd/%d errno=%d\n", n, hdr->nlmsg_len, errno); 280 return -1; 281 } 282 n = recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0); 283 if (reply_len) 284 *reply_len = 0; 285 if (n < 0) { 286 if (dofail) 287 fail("netlink_send_ext: netlink read failed"); 288 debug("netlink_send_ext: netlink read failed: errno=%d\n", errno); 289 return -1; 290 } 291 if (n < (ssize_t)sizeof(struct nlmsghdr)) { 292 errno = EINVAL; 293 if (dofail) 294 failmsg("netlink_send_ext: short netlink read", "read=%zd", n); 295 debug("netlink_send_ext: short netlink read: %zd\n", n); 296 return -1; 297 } 298 if (hdr->nlmsg_type == NLMSG_DONE) 299 return 0; 300 if (reply_len && hdr->nlmsg_type == reply_type) { 301 *reply_len = n; 302 return 0; 303 } 304 if (n < (ssize_t)(sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr))) { 305 errno = EINVAL; 306 if (dofail) 307 failmsg("netlink_send_ext: short netlink read", "read=%zd", n); 308 debug("netlink_send_ext: short netlink read: %zd\n", n); 309 return -1; 310 } 311 if (hdr->nlmsg_type != NLMSG_ERROR) { 312 errno = EINVAL; 313 if (dofail) 314 failmsg("netlink_send_ext: bad netlink ack type", "type=%d", hdr->nlmsg_type); 315 debug("netlink_send_ext: short netlink ack: %d\n", hdr->nlmsg_type); 316 return -1; 317 } 318 errno = -((struct nlmsgerr*)(hdr + 1))->error; 319 return -errno; 320 } 321 322 #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI || SYZ_WIFI || SYZ_802154 323 static int netlink_send(struct nlmsg* nlmsg, int sock) 324 { 325 return netlink_send_ext(nlmsg, sock, 0, NULL, true); 326 } 327 #endif 328 329 static int netlink_query_family_id(struct nlmsg* nlmsg, int sock, const char* family_name, bool dofail) 330 { 331 struct genlmsghdr genlhdr; 332 memset(&genlhdr, 0, sizeof(genlhdr)); 333 genlhdr.cmd = CTRL_CMD_GETFAMILY; 334 netlink_init(nlmsg, GENL_ID_CTRL, 0, &genlhdr, sizeof(genlhdr)); 335 netlink_attr(nlmsg, CTRL_ATTR_FAMILY_NAME, family_name, strnlen(family_name, GENL_NAMSIZ - 1) + 1); 336 int n = 0; 337 int err = netlink_send_ext(nlmsg, sock, GENL_ID_CTRL, &n, dofail); 338 if (err < 0) { 339 debug("netlink: failed to get family id for %.*s: %s\n", GENL_NAMSIZ, family_name, strerror(errno)); 340 return -1; 341 } 342 uint16 id = 0; 343 struct nlattr* attr = (struct nlattr*)(nlmsg->buf + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(genlhdr))); 344 for (; (char*)attr < nlmsg->buf + n; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) { 345 if (attr->nla_type == CTRL_ATTR_FAMILY_ID) { 346 id = *(uint16*)(attr + 1); 347 break; 348 } 349 } 350 if (!id) { 351 debug("netlink: failed to parse family id for %.*s\n", GENL_NAMSIZ, family_name); 352 errno = EINVAL; 353 return -1; 354 } 355 recv(sock, nlmsg->buf, sizeof(nlmsg->buf), 0); // recv ack 356 357 return id; 358 } 359 360 #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_DEVLINK_PCI 361 static int netlink_next_msg(struct nlmsg* nlmsg, unsigned int offset, 362 unsigned int total_len) 363 { 364 struct nlmsghdr* hdr = (struct nlmsghdr*)(nlmsg->buf + offset); 365 366 if (offset == total_len || offset + hdr->nlmsg_len > total_len) 367 return -1; 368 return hdr->nlmsg_len; 369 } 370 #endif 371 372 #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_802154 373 static void netlink_add_device_impl(struct nlmsg* nlmsg, const char* type, 374 const char* name, bool up) 375 { 376 struct ifinfomsg hdr; 377 memset(&hdr, 0, sizeof(hdr)); 378 if (up) 379 hdr.ifi_flags = hdr.ifi_change = IFF_UP; 380 netlink_init(nlmsg, RTM_NEWLINK, NLM_F_EXCL | NLM_F_CREATE, &hdr, sizeof(hdr)); 381 if (name) 382 netlink_attr(nlmsg, IFLA_IFNAME, name, strlen(name)); 383 netlink_nest(nlmsg, IFLA_LINKINFO); 384 netlink_attr(nlmsg, IFLA_INFO_KIND, type, strlen(type)); 385 } 386 #endif 387 388 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 389 static void netlink_add_device(struct nlmsg* nlmsg, int sock, const char* type, 390 const char* name) 391 { 392 netlink_add_device_impl(nlmsg, type, name, false); 393 netlink_done(nlmsg); 394 int err = netlink_send(nlmsg, sock); 395 if (err < 0) { 396 debug("netlink: adding device %s type %s: %s\n", name, type, strerror(errno)); 397 } 398 } 399 400 static void netlink_add_veth(struct nlmsg* nlmsg, int sock, const char* name, 401 const char* peer) 402 { 403 netlink_add_device_impl(nlmsg, "veth", name, false); 404 netlink_nest(nlmsg, IFLA_INFO_DATA); 405 netlink_nest(nlmsg, VETH_INFO_PEER); 406 nlmsg->pos += sizeof(struct ifinfomsg); 407 netlink_attr(nlmsg, IFLA_IFNAME, peer, strlen(peer)); 408 netlink_done(nlmsg); 409 netlink_done(nlmsg); 410 netlink_done(nlmsg); 411 int err = netlink_send(nlmsg, sock); 412 if (err < 0) { 413 debug("netlink: adding device %s type veth peer %s: %s\n", name, peer, strerror(errno)); 414 } 415 } 416 417 static void netlink_add_xfrm(struct nlmsg* nlmsg, int sock, const char* name) 418 { 419 netlink_add_device_impl(nlmsg, "xfrm", name, true); 420 netlink_nest(nlmsg, IFLA_INFO_DATA); 421 int if_id = 1; 422 // This is IFLA_XFRM_IF_ID attr which is not present in older kernel headers. 423 netlink_attr(nlmsg, 2, &if_id, sizeof(if_id)); 424 netlink_done(nlmsg); 425 netlink_done(nlmsg); 426 int err = netlink_send(nlmsg, sock); 427 if (err < 0) { 428 debug("netlink: adding device %s type xfrm if_id %d: %s\n", name, if_id, strerror(errno)); 429 } 430 } 431 432 static void netlink_add_hsr(struct nlmsg* nlmsg, int sock, const char* name, 433 const char* slave1, const char* slave2) 434 { 435 netlink_add_device_impl(nlmsg, "hsr", name, false); 436 netlink_nest(nlmsg, IFLA_INFO_DATA); 437 int ifindex1 = if_nametoindex(slave1); 438 netlink_attr(nlmsg, IFLA_HSR_SLAVE1, &ifindex1, sizeof(ifindex1)); 439 int ifindex2 = if_nametoindex(slave2); 440 netlink_attr(nlmsg, IFLA_HSR_SLAVE2, &ifindex2, sizeof(ifindex2)); 441 netlink_done(nlmsg); 442 netlink_done(nlmsg); 443 int err = netlink_send(nlmsg, sock); 444 if (err < 0) { 445 debug("netlink: adding device %s type hsr slave1 %s slave2 %s: %s\n", name, slave1, slave2, strerror(errno)); 446 } 447 } 448 449 static void netlink_add_linked(struct nlmsg* nlmsg, int sock, const char* type, const char* name, const char* link) 450 { 451 netlink_add_device_impl(nlmsg, type, name, false); 452 netlink_done(nlmsg); 453 int ifindex = if_nametoindex(link); 454 netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); 455 int err = netlink_send(nlmsg, sock); 456 if (err < 0) { 457 debug("netlink: adding device %s type %s link %s: %s\n", name, type, link, strerror(errno)); 458 } 459 } 460 461 static void netlink_add_vlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link, uint16 id, uint16 proto) 462 { 463 netlink_add_device_impl(nlmsg, "vlan", name, false); 464 netlink_nest(nlmsg, IFLA_INFO_DATA); 465 netlink_attr(nlmsg, IFLA_VLAN_ID, &id, sizeof(id)); 466 netlink_attr(nlmsg, IFLA_VLAN_PROTOCOL, &proto, sizeof(proto)); 467 netlink_done(nlmsg); 468 netlink_done(nlmsg); 469 int ifindex = if_nametoindex(link); 470 netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); 471 int err = netlink_send(nlmsg, sock); 472 if (err < 0) { 473 debug("netlink: add %s type vlan link %s id %d: %s\n", name, link, id, strerror(errno)); 474 } 475 } 476 477 static void netlink_add_macvlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link) 478 { 479 netlink_add_device_impl(nlmsg, "macvlan", name, false); 480 netlink_nest(nlmsg, IFLA_INFO_DATA); 481 uint32 mode = MACVLAN_MODE_BRIDGE; 482 netlink_attr(nlmsg, IFLA_MACVLAN_MODE, &mode, sizeof(mode)); 483 netlink_done(nlmsg); 484 netlink_done(nlmsg); 485 int ifindex = if_nametoindex(link); 486 netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); 487 int err = netlink_send(nlmsg, sock); 488 if (err < 0) { 489 debug("netlink: add %s type macvlan link %s mode %d: %s\n", name, link, mode, strerror(errno)); 490 } 491 } 492 493 static void netlink_add_geneve(struct nlmsg* nlmsg, int sock, const char* name, uint32 vni, struct in_addr* addr4, struct in6_addr* addr6) 494 { 495 netlink_add_device_impl(nlmsg, "geneve", name, false); 496 netlink_nest(nlmsg, IFLA_INFO_DATA); 497 netlink_attr(nlmsg, IFLA_GENEVE_ID, &vni, sizeof(vni)); 498 if (addr4) 499 netlink_attr(nlmsg, IFLA_GENEVE_REMOTE, addr4, sizeof(*addr4)); 500 if (addr6) 501 netlink_attr(nlmsg, IFLA_GENEVE_REMOTE6, addr6, sizeof(*addr6)); 502 netlink_done(nlmsg); 503 netlink_done(nlmsg); 504 int err = netlink_send(nlmsg, sock); 505 if (err < 0) { 506 debug("netlink: add %s type geneve vni %u: %s\n", name, vni, strerror(errno)); 507 } 508 } 509 510 #define IFLA_IPVLAN_FLAGS 2 511 #define IPVLAN_MODE_L3S 2 512 #undef IPVLAN_F_VEPA 513 #define IPVLAN_F_VEPA 2 514 515 static void netlink_add_ipvlan(struct nlmsg* nlmsg, int sock, const char* name, const char* link, uint16 mode, uint16 flags) 516 { 517 netlink_add_device_impl(nlmsg, "ipvlan", name, false); 518 netlink_nest(nlmsg, IFLA_INFO_DATA); 519 netlink_attr(nlmsg, IFLA_IPVLAN_MODE, &mode, sizeof(mode)); 520 netlink_attr(nlmsg, IFLA_IPVLAN_FLAGS, &flags, sizeof(flags)); 521 netlink_done(nlmsg); 522 netlink_done(nlmsg); 523 int ifindex = if_nametoindex(link); 524 netlink_attr(nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); 525 int err = netlink_send(nlmsg, sock); 526 if (err < 0) { 527 debug("netlink: add %s type ipvlan link %s mode %d: %s\n", name, link, mode, strerror(errno)); 528 } 529 } 530 #endif 531 532 #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI || SYZ_802154 533 static void netlink_device_change(struct nlmsg* nlmsg, int sock, const char* name, bool up, 534 const char* master, const void* mac, int macsize, 535 const char* new_name) 536 { 537 struct ifinfomsg hdr; 538 memset(&hdr, 0, sizeof(hdr)); 539 if (up) 540 hdr.ifi_flags = hdr.ifi_change = IFF_UP; 541 hdr.ifi_index = if_nametoindex(name); 542 netlink_init(nlmsg, RTM_NEWLINK, 0, &hdr, sizeof(hdr)); 543 if (new_name) 544 netlink_attr(nlmsg, IFLA_IFNAME, new_name, strlen(new_name)); 545 if (master) { 546 int ifindex = if_nametoindex(master); 547 netlink_attr(nlmsg, IFLA_MASTER, &ifindex, sizeof(ifindex)); 548 } 549 if (macsize) 550 netlink_attr(nlmsg, IFLA_ADDRESS, mac, macsize); 551 int err = netlink_send(nlmsg, sock); 552 if (err < 0) { 553 debug("netlink: device %s up master %s: %s\n", name, master ? master : "NULL", strerror(errno)); 554 } 555 } 556 #endif 557 558 #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION 559 static int netlink_add_addr(struct nlmsg* nlmsg, int sock, const char* dev, 560 const void* addr, int addrsize) 561 { 562 struct ifaddrmsg hdr; 563 memset(&hdr, 0, sizeof(hdr)); 564 hdr.ifa_family = addrsize == 4 ? AF_INET : AF_INET6; 565 hdr.ifa_prefixlen = addrsize == 4 ? 24 : 120; 566 hdr.ifa_scope = RT_SCOPE_UNIVERSE; 567 hdr.ifa_index = if_nametoindex(dev); 568 netlink_init(nlmsg, RTM_NEWADDR, NLM_F_CREATE | NLM_F_REPLACE, &hdr, sizeof(hdr)); 569 netlink_attr(nlmsg, IFA_LOCAL, addr, addrsize); 570 netlink_attr(nlmsg, IFA_ADDRESS, addr, addrsize); 571 return netlink_send(nlmsg, sock); 572 } 573 574 static void netlink_add_addr4(struct nlmsg* nlmsg, int sock, 575 const char* dev, const char* addr) 576 { 577 struct in_addr in_addr; 578 inet_pton(AF_INET, addr, &in_addr); 579 int err = netlink_add_addr(nlmsg, sock, dev, &in_addr, sizeof(in_addr)); 580 if (err < 0) { 581 debug("netlink: add addr %s dev %s: %s\n", addr, dev, strerror(errno)); 582 } 583 } 584 585 static void netlink_add_addr6(struct nlmsg* nlmsg, int sock, 586 const char* dev, const char* addr) 587 { 588 struct in6_addr in6_addr; 589 inet_pton(AF_INET6, addr, &in6_addr); 590 int err = netlink_add_addr(nlmsg, sock, dev, &in6_addr, sizeof(in6_addr)); 591 if (err < 0) { 592 debug("netlink: add addr %s dev %s: %s\n", addr, dev, strerror(errno)); 593 } 594 } 595 #endif 596 597 #if SYZ_EXECUTOR || SYZ_NET_INJECTION 598 static void netlink_add_neigh(struct nlmsg* nlmsg, int sock, const char* name, 599 const void* addr, int addrsize, const void* mac, int macsize) 600 { 601 struct ndmsg hdr; 602 memset(&hdr, 0, sizeof(hdr)); 603 hdr.ndm_family = addrsize == 4 ? AF_INET : AF_INET6; 604 hdr.ndm_ifindex = if_nametoindex(name); 605 hdr.ndm_state = NUD_PERMANENT; 606 netlink_init(nlmsg, RTM_NEWNEIGH, NLM_F_EXCL | NLM_F_CREATE, &hdr, sizeof(hdr)); 607 netlink_attr(nlmsg, NDA_DST, addr, addrsize); 608 netlink_attr(nlmsg, NDA_LLADDR, mac, macsize); 609 int err = netlink_send(nlmsg, sock); 610 if (err < 0) { 611 debug("netlink: add neigh %s addr %d lladdr %d: %s\n", name, addrsize, macsize, strerror(errno)); 612 } 613 } 614 #endif 615 #endif 616 617 #if SYZ_EXECUTOR || SYZ_NET_DEVICES || SYZ_NET_INJECTION || SYZ_DEVLINK_PCI || SYZ_WIFI || SYZ_802154 618 static struct nlmsg nlmsg; 619 #endif 620 621 #if SYZ_EXECUTOR || SYZ_NET_INJECTION 622 #include <arpa/inet.h> 623 #include <errno.h> 624 #include <fcntl.h> 625 #include <net/if.h> 626 #include <net/if_arp.h> 627 #include <stdarg.h> 628 #include <stdbool.h> 629 #include <sys/ioctl.h> 630 #include <sys/stat.h> 631 632 #include <linux/if_ether.h> 633 #include <linux/if_tun.h> 634 #include <linux/ip.h> 635 #include <linux/tcp.h> 636 637 static int tunfd = -1; 638 639 #define TUN_IFACE "syz_tun" 640 #define LOCAL_MAC 0xaaaaaaaaaaaa 641 #define REMOTE_MAC 0xaaaaaaaaaabb 642 #define LOCAL_IPV4 "172.20.20.170" 643 #define REMOTE_IPV4 "172.20.20.187" 644 #define LOCAL_IPV6 "fe80::aa" 645 #define REMOTE_IPV6 "fe80::bb" 646 647 #ifndef IFF_NAPI 648 #define IFF_NAPI 0x0010 649 #endif 650 #if ENABLE_NAPI_FRAGS 651 static int tun_frags_enabled; 652 #ifndef IFF_NAPI_FRAGS 653 #define IFF_NAPI_FRAGS 0x0020 654 #endif 655 #endif 656 657 static void initialize_tun(void) 658 { 659 #if SYZ_EXECUTOR 660 if (!flag_net_injection) 661 return; 662 #endif 663 tunfd = open("/dev/net/tun", O_RDWR | O_NONBLOCK); 664 if (tunfd == -1) { 665 #if SYZ_EXECUTOR 666 fail("tun: can't open /dev/net/tun"); 667 #else 668 printf("tun: can't open /dev/net/tun: please enable CONFIG_TUN=y\n"); 669 printf("otherwise fuzzing or reproducing might not work as intended\n"); 670 return; 671 #endif 672 } 673 // Remap tun onto higher fd number to hide it from fuzzer and to keep 674 // fd numbers stable regardless of whether tun is opened or not (also see kMaxFd). 675 const int kTunFd = 200; 676 if (dup2(tunfd, kTunFd) < 0) 677 fail("dup2(tunfd, kTunFd) failed"); 678 close(tunfd); 679 tunfd = kTunFd; 680 681 struct ifreq ifr; 682 memset(&ifr, 0, sizeof(ifr)); 683 strncpy(ifr.ifr_name, TUN_IFACE, IFNAMSIZ); 684 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 685 // Note: SYZ_ENABLE_NAPI_FRAGS is never enabled. This is code is only for reference 686 // in case we figure out how IFF_NAPI_FRAGS works. With IFF_NAPI_FRAGS packets 687 // don't reach destinations and bail out in udp_gro_receive (see #1594). 688 // Also IFF_NAPI_FRAGS does not work with sandbox_namespace (see comment there). 689 #if ENABLE_NAPI_FRAGS 690 ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS; 691 #endif 692 if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0) { 693 #if ENABLE_NAPI_FRAGS 694 // IFF_NAPI_FRAGS requires root, so try without it. 695 ifr.ifr_flags = IFF_TAP | IFF_NO_PI; 696 if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0) 697 #endif 698 fail("tun: ioctl(TUNSETIFF) failed"); 699 } 700 #if ENABLE_NAPI_FRAGS 701 // If IFF_NAPI_FRAGS is not supported it will be silently dropped, 702 // so query the effective flags. 703 if (ioctl(tunfd, TUNGETIFF, (void*)&ifr) < 0) 704 fail("tun: ioctl(TUNGETIFF) failed"); 705 tun_frags_enabled = (ifr.ifr_flags & IFF_NAPI_FRAGS) != 0; 706 debug("tun_frags_enabled=%d\n", tun_frags_enabled); 707 #endif 708 709 // Disable IPv6 DAD, otherwise the address remains unusable until DAD completes. 710 // Don't panic because this is an optional config. 711 char sysctl[64]; 712 sprintf(sysctl, "/proc/sys/net/ipv6/conf/%s/accept_dad", TUN_IFACE); 713 write_file(sysctl, "0"); 714 // Disable IPv6 router solicitation to prevent IPv6 spam. 715 // Don't panic because this is an optional config. 716 sprintf(sysctl, "/proc/sys/net/ipv6/conf/%s/router_solicitations", TUN_IFACE); 717 write_file(sysctl, "0"); 718 // There seems to be no way to disable IPv6 MTD to prevent more IPv6 spam. 719 720 int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 721 if (sock == -1) 722 fail("socket(AF_NETLINK) failed"); 723 724 netlink_add_addr4(&nlmsg, sock, TUN_IFACE, LOCAL_IPV4); 725 netlink_add_addr6(&nlmsg, sock, TUN_IFACE, LOCAL_IPV6); 726 uint64 macaddr = REMOTE_MAC; 727 struct in_addr in_addr; 728 inet_pton(AF_INET, REMOTE_IPV4, &in_addr); 729 netlink_add_neigh(&nlmsg, sock, TUN_IFACE, &in_addr, sizeof(in_addr), &macaddr, ETH_ALEN); 730 struct in6_addr in6_addr; 731 inet_pton(AF_INET6, REMOTE_IPV6, &in6_addr); 732 netlink_add_neigh(&nlmsg, sock, TUN_IFACE, &in6_addr, sizeof(in6_addr), &macaddr, ETH_ALEN); 733 macaddr = LOCAL_MAC; 734 netlink_device_change(&nlmsg, sock, TUN_IFACE, true, 0, &macaddr, ETH_ALEN, NULL); 735 close(sock); 736 } 737 #endif 738 739 #if SYZ_EXECUTOR || __NR_syz_init_net_socket || SYZ_DEVLINK_PCI || __NR_syz_socket_connect_nvme_tcp 740 const int kInitNetNsFd = 201; // see kMaxFd 741 #endif 742 743 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI || SYZ_NET_DEVICES 744 745 #include <linux/genetlink.h> 746 #include <stdbool.h> 747 748 #define DEVLINK_FAMILY_NAME "devlink" 749 750 #define DEVLINK_CMD_PORT_GET 5 751 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI 752 #define DEVLINK_CMD_RELOAD 37 753 #endif 754 #define DEVLINK_ATTR_BUS_NAME 1 755 #define DEVLINK_ATTR_DEV_NAME 2 756 #define DEVLINK_ATTR_NETDEV_NAME 7 757 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI 758 #define DEVLINK_ATTR_NETNS_FD 138 759 #endif 760 761 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI 762 static void netlink_devlink_netns_move(const char* bus_name, const char* dev_name, int netns_fd) 763 { 764 struct genlmsghdr genlhdr; 765 int sock; 766 int id, err; 767 768 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 769 if (sock == -1) 770 fail("socket(AF_NETLINK) failed"); 771 772 id = netlink_query_family_id(&nlmsg, sock, DEVLINK_FAMILY_NAME, true); 773 if (id == -1) 774 goto error; 775 776 memset(&genlhdr, 0, sizeof(genlhdr)); 777 genlhdr.cmd = DEVLINK_CMD_RELOAD; 778 netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr)); 779 netlink_attr(&nlmsg, DEVLINK_ATTR_BUS_NAME, bus_name, strlen(bus_name) + 1); 780 netlink_attr(&nlmsg, DEVLINK_ATTR_DEV_NAME, dev_name, strlen(dev_name) + 1); 781 netlink_attr(&nlmsg, DEVLINK_ATTR_NETNS_FD, &netns_fd, sizeof(netns_fd)); 782 err = netlink_send(&nlmsg, sock); 783 if (err < 0) { 784 debug("netlink: failed to move devlink instance %s/%s into network namespace: %s\n", 785 bus_name, dev_name, strerror(errno)); 786 } 787 error: 788 close(sock); 789 } 790 #endif 791 792 static struct nlmsg nlmsg2; 793 794 static void initialize_devlink_ports(const char* bus_name, const char* dev_name, 795 const char* netdev_prefix) 796 { 797 struct genlmsghdr genlhdr; 798 int len, total_len, id, err, offset; 799 uint16 netdev_index; 800 801 int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 802 if (sock == -1) 803 fail("socket(AF_NETLINK) failed"); 804 805 int rtsock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 806 if (rtsock == -1) 807 fail("socket(AF_NETLINK) failed"); 808 809 id = netlink_query_family_id(&nlmsg, sock, DEVLINK_FAMILY_NAME, true); 810 if (id == -1) 811 goto error; 812 813 memset(&genlhdr, 0, sizeof(genlhdr)); 814 genlhdr.cmd = DEVLINK_CMD_PORT_GET; 815 netlink_init(&nlmsg, id, NLM_F_DUMP, &genlhdr, sizeof(genlhdr)); 816 netlink_attr(&nlmsg, DEVLINK_ATTR_BUS_NAME, bus_name, strlen(bus_name) + 1); 817 netlink_attr(&nlmsg, DEVLINK_ATTR_DEV_NAME, dev_name, strlen(dev_name) + 1); 818 819 err = netlink_send_ext(&nlmsg, sock, id, &total_len, true); 820 if (err < 0) { 821 debug("netlink: failed to get port get reply: %s\n", strerror(errno)); 822 goto error; 823 } 824 825 offset = 0; 826 netdev_index = 0; 827 while ((len = netlink_next_msg(&nlmsg, offset, total_len)) != -1) { 828 struct nlattr* attr = (struct nlattr*)(nlmsg.buf + offset + NLMSG_HDRLEN + NLMSG_ALIGN(sizeof(genlhdr))); 829 for (; (char*)attr < nlmsg.buf + offset + len; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) { 830 if (attr->nla_type == DEVLINK_ATTR_NETDEV_NAME) { 831 char* port_name; 832 char netdev_name[IFNAMSIZ]; 833 port_name = (char*)(attr + 1); 834 snprintf(netdev_name, sizeof(netdev_name), "%s%d", netdev_prefix, netdev_index); 835 netlink_device_change(&nlmsg2, rtsock, port_name, true, 0, 0, 0, netdev_name); 836 break; 837 } 838 } 839 offset += len; 840 netdev_index++; 841 } 842 error: 843 close(rtsock); 844 close(sock); 845 } 846 847 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI 848 #include <fcntl.h> 849 #include <sched.h> 850 851 static void initialize_devlink_pci(void) 852 { 853 #if SYZ_EXECUTOR 854 if (!flag_devlink_pci) 855 return; 856 #endif 857 int netns = open("/proc/self/ns/net", O_RDONLY); 858 if (netns == -1) 859 fail("open(/proc/self/ns/net) failed"); 860 int ret = setns(kInitNetNsFd, 0); 861 if (ret == -1) 862 fail("set_ns(init_netns_fd) failed"); 863 netlink_devlink_netns_move("pci", "0000:00:10.0", netns); 864 ret = setns(netns, 0); 865 if (ret == -1) 866 fail("set_ns(this_netns_fd) failed"); 867 close(netns); 868 869 initialize_devlink_ports("pci", "0000:00:10.0", "netpci"); 870 } 871 #endif 872 #endif 873 874 #if SYZ_EXECUTOR || SYZ_WIFI || __NR_syz_80211_inject_frame || __NR_syz_80211_join_ibss 875 876 #define WIFI_INITIAL_DEVICE_COUNT 2 877 #define WIFI_MAC_BASE \ 878 { \ 879 0x08, 0x02, 0x11, 0x00, 0x00, 0x00 \ 880 } 881 #define WIFI_IBSS_BSSID \ 882 { \ 883 0x50, 0x50, 0x50, 0x50, 0x50, 0x50 \ 884 } 885 #define WIFI_IBSS_SSID \ 886 { \ 887 0x10, 0x10, 0x10, 0x10, 0x10, 0x10 \ 888 } 889 #define WIFI_DEFAULT_FREQUENCY 2412 890 #define WIFI_DEFAULT_SIGNAL 0 891 #define WIFI_DEFAULT_RX_RATE 1 892 893 // consts from drivers/net/wireless/mac80211_hwsim.h 894 #define HWSIM_CMD_REGISTER 1 895 #define HWSIM_CMD_FRAME 2 896 #define HWSIM_CMD_NEW_RADIO 4 897 #define HWSIM_ATTR_SUPPORT_P2P_DEVICE 14 898 #define HWSIM_ATTR_PERM_ADDR 22 899 900 #endif 901 902 #if SYZ_EXECUTOR || SYZ_WIFI || __NR_syz_80211_join_ibss 903 #include <linux/genetlink.h> 904 #include <linux/if_ether.h> 905 #include <linux/nl80211.h> 906 #include <linux/rtnetlink.h> 907 #include <net/if.h> 908 #include <stdbool.h> 909 #include <sys/ioctl.h> 910 911 // From linux/if.h, but we cannot include the file as it conflicts with net/if.h 912 #define IF_OPER_UP 6 913 914 // IBSS parameters for nl80211_join_ibss 915 struct join_ibss_props { 916 int wiphy_freq; 917 bool wiphy_freq_fixed; 918 uint8* mac; 919 uint8* ssid; 920 int ssid_len; 921 }; 922 923 static int set_interface_state(const char* interface_name, int on) 924 { 925 struct ifreq ifr; 926 int sock = socket(AF_INET, SOCK_DGRAM, 0); 927 if (sock < 0) { 928 debug("set_interface_state: failed to open socket, errno %d\n", errno); 929 return -1; 930 } 931 932 memset(&ifr, 0, sizeof(ifr)); 933 strcpy(ifr.ifr_name, interface_name); 934 int ret = ioctl(sock, SIOCGIFFLAGS, &ifr); 935 if (ret < 0) { 936 debug("set_interface_state: failed to execute SIOCGIFFLAGS, ret %d\n", ret); 937 close(sock); 938 return -1; 939 } 940 941 if (on) 942 ifr.ifr_flags |= IFF_UP; 943 else 944 ifr.ifr_flags &= ~IFF_UP; 945 946 ret = ioctl(sock, SIOCSIFFLAGS, &ifr); 947 close(sock); 948 if (ret < 0) { 949 debug("set_interface_state: failed to execute SIOCSIFFLAGS, ret %d\n", ret); 950 return -1; 951 } 952 return 0; 953 } 954 955 static int nl80211_set_interface(struct nlmsg* nlmsg, int sock, int nl80211_family, uint32 ifindex, 956 uint32 iftype, bool dofail) 957 { 958 struct genlmsghdr genlhdr; 959 960 memset(&genlhdr, 0, sizeof(genlhdr)); 961 genlhdr.cmd = NL80211_CMD_SET_INTERFACE; 962 netlink_init(nlmsg, nl80211_family, 0, &genlhdr, sizeof(genlhdr)); 963 netlink_attr(nlmsg, NL80211_ATTR_IFINDEX, &ifindex, sizeof(ifindex)); 964 netlink_attr(nlmsg, NL80211_ATTR_IFTYPE, &iftype, sizeof(iftype)); 965 int err = netlink_send_ext(nlmsg, sock, 0, NULL, dofail); 966 if (err < 0) { 967 debug("nl80211_set_interface failed: %s\n", strerror(errno)); 968 } 969 return err; 970 } 971 972 static int nl80211_join_ibss(struct nlmsg* nlmsg, int sock, int nl80211_family, uint32 ifindex, 973 struct join_ibss_props* props, bool dofail) 974 { 975 struct genlmsghdr genlhdr; 976 977 memset(&genlhdr, 0, sizeof(genlhdr)); 978 genlhdr.cmd = NL80211_CMD_JOIN_IBSS; 979 netlink_init(nlmsg, nl80211_family, 0, &genlhdr, sizeof(genlhdr)); 980 netlink_attr(nlmsg, NL80211_ATTR_IFINDEX, &ifindex, sizeof(ifindex)); 981 netlink_attr(nlmsg, NL80211_ATTR_SSID, props->ssid, props->ssid_len); 982 netlink_attr(nlmsg, NL80211_ATTR_WIPHY_FREQ, &(props->wiphy_freq), sizeof(props->wiphy_freq)); 983 if (props->mac) 984 netlink_attr(nlmsg, NL80211_ATTR_MAC, props->mac, ETH_ALEN); 985 if (props->wiphy_freq_fixed) 986 netlink_attr(nlmsg, NL80211_ATTR_FREQ_FIXED, NULL, 0); 987 int err = netlink_send_ext(nlmsg, sock, 0, NULL, dofail); 988 if (err < 0) { 989 debug("nl80211_join_ibss failed: %s\n", strerror(errno)); 990 } 991 return err; 992 } 993 994 static int get_ifla_operstate(struct nlmsg* nlmsg, int ifindex, bool dofail) 995 { 996 struct ifinfomsg info; 997 memset(&info, 0, sizeof(info)); 998 info.ifi_family = AF_UNSPEC; 999 info.ifi_index = ifindex; 1000 1001 int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 1002 if (sock == -1) { 1003 debug("get_ifla_operstate: socket failed: %d\n", errno); 1004 return -1; 1005 } 1006 1007 netlink_init(nlmsg, RTM_GETLINK, 0, &info, sizeof(info)); 1008 int n; 1009 int err = netlink_send_ext(nlmsg, sock, RTM_NEWLINK, &n, dofail); 1010 close(sock); 1011 1012 if (err) { 1013 debug("get_ifla_operstate: failed to query: %s\n", strerror(errno)); 1014 return -1; 1015 } 1016 1017 struct rtattr* attr = IFLA_RTA(NLMSG_DATA(nlmsg->buf)); 1018 for (; RTA_OK(attr, n); attr = RTA_NEXT(attr, n)) { 1019 if (attr->rta_type == IFLA_OPERSTATE) 1020 return *((int32_t*)RTA_DATA(attr)); 1021 } 1022 1023 return -1; 1024 } 1025 1026 static int await_ifla_operstate(struct nlmsg* nlmsg, char* interface, int operstate, bool dofail) 1027 { 1028 int ifindex = if_nametoindex(interface); 1029 while (true) { 1030 usleep(1000); // 1 ms 1031 int ret = get_ifla_operstate(nlmsg, ifindex, dofail); 1032 if (ret < 0) 1033 return ret; 1034 if (ret == operstate) 1035 return 0; 1036 } 1037 return 0; 1038 } 1039 1040 static int nl80211_setup_ibss_interface(struct nlmsg* nlmsg, int sock, int nl80211_family_id, char* interface, 1041 struct join_ibss_props* ibss_props, bool dofail) 1042 { 1043 int ifindex = if_nametoindex(interface); 1044 if (ifindex == 0) { 1045 debug("nl80211_setup_ibss_interface: if_nametoindex failed for %.32s, ret 0\n", interface); 1046 return -1; 1047 } 1048 1049 int ret = nl80211_set_interface(nlmsg, sock, nl80211_family_id, ifindex, NL80211_IFTYPE_ADHOC, dofail); 1050 if (ret < 0) { 1051 debug("nl80211_setup_ibss_interface: nl80211_set_interface failed for %.32s, ret %d\n", interface, ret); 1052 return -1; 1053 } 1054 1055 ret = set_interface_state(interface, 1); 1056 if (ret < 0) { 1057 debug("nl80211_setup_ibss_interface: set_interface_state failed for %.32s, ret %d\n", interface, ret); 1058 return -1; 1059 } 1060 1061 ret = nl80211_join_ibss(nlmsg, sock, nl80211_family_id, ifindex, ibss_props, dofail); 1062 if (ret < 0) { 1063 debug("nl80211_setup_ibss_interface: nl80211_join_ibss failed for %.32s, ret %d\n", interface, ret); 1064 return -1; 1065 } 1066 1067 return 0; 1068 } 1069 #endif 1070 1071 #if SYZ_EXECUTOR || SYZ_WIFI 1072 #include <fcntl.h> 1073 #include <linux/rfkill.h> 1074 #include <sys/stat.h> 1075 #include <sys/types.h> 1076 1077 static int hwsim80211_create_device(struct nlmsg* nlmsg, int sock, int hwsim_family, uint8 mac_addr[ETH_ALEN]) 1078 { 1079 struct genlmsghdr genlhdr; 1080 memset(&genlhdr, 0, sizeof(genlhdr)); 1081 genlhdr.cmd = HWSIM_CMD_NEW_RADIO; 1082 netlink_init(nlmsg, hwsim_family, 0, &genlhdr, sizeof(genlhdr)); 1083 netlink_attr(nlmsg, HWSIM_ATTR_SUPPORT_P2P_DEVICE, NULL, 0); 1084 netlink_attr(nlmsg, HWSIM_ATTR_PERM_ADDR, mac_addr, ETH_ALEN); 1085 int err = netlink_send(nlmsg, sock); 1086 if (err < 0) { 1087 debug("hwsim80211_create_device failed: %s\n", strerror(errno)); 1088 } 1089 return err; 1090 } 1091 1092 static void initialize_wifi_devices(void) 1093 { 1094 // Set up virtual wifi devices and join them into an IBSS network. 1095 // An IBSS network is created here in order to put these devices in an operable state right from 1096 // the beginning. It has the following positive effects. 1097 // 1. Frame injection becomes possible from the very start. 1098 // 2. A number of nl80211 commands expect their target wireless interface to be in an operable state. 1099 // 3. Simplification of reproducer generation - in many cases the reproducer will not have to spend time 1100 // selecting system calls that set up the environment. 1101 // 1102 // IBSS network was chosen as the simplest network type to begin with. 1103 1104 #if SYZ_EXECUTOR 1105 if (!flag_wifi) 1106 return; 1107 #endif 1108 int rfkill = open("/dev/rfkill", O_RDWR); 1109 if (rfkill == -1) 1110 fail("open(/dev/rfkill) failed"); 1111 struct rfkill_event event = {0}; 1112 event.type = RFKILL_TYPE_ALL; 1113 event.op = RFKILL_OP_CHANGE_ALL; 1114 if (write(rfkill, &event, sizeof(event)) != (ssize_t)(sizeof(event))) 1115 fail("write(/dev/rfkill) failed"); 1116 close(rfkill); 1117 1118 uint8 mac_addr[6] = WIFI_MAC_BASE; 1119 int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 1120 if (sock < 0) 1121 fail("initialize_wifi_devices: failed to create socket"); 1122 int hwsim_family_id = netlink_query_family_id(&nlmsg, sock, "MAC80211_HWSIM", true); 1123 int nl80211_family_id = netlink_query_family_id(&nlmsg, sock, "nl80211", true); 1124 uint8 ssid[] = WIFI_IBSS_SSID; 1125 uint8 bssid[] = WIFI_IBSS_BSSID; 1126 struct join_ibss_props ibss_props = { 1127 .wiphy_freq = WIFI_DEFAULT_FREQUENCY, .wiphy_freq_fixed = true, .mac = bssid, .ssid = ssid, .ssid_len = sizeof(ssid)}; 1128 1129 for (int device_id = 0; device_id < WIFI_INITIAL_DEVICE_COUNT; device_id++) { 1130 // Virtual wifi devices will have consequtive mac addresses 1131 mac_addr[5] = device_id; 1132 int ret = hwsim80211_create_device(&nlmsg, sock, hwsim_family_id, mac_addr); 1133 if (ret < 0) 1134 failmsg("initialize_wifi_devices: failed to create device", "device=%d", device_id); 1135 1136 // For each device, unless HWSIM_ATTR_NO_VIF is passed, a network interface is created 1137 // automatically. Such interfaces are named "wlan0", "wlan1" and so on. 1138 char interface[6] = "wlan0"; 1139 interface[4] += device_id; 1140 1141 if (nl80211_setup_ibss_interface(&nlmsg, sock, nl80211_family_id, interface, &ibss_props, true) < 0) 1142 failmsg("initialize_wifi_devices: failed set up IBSS network", "device=%d", device_id); 1143 } 1144 1145 // Wait for all devices to join the IBSS network 1146 for (int device_id = 0; device_id < WIFI_INITIAL_DEVICE_COUNT; device_id++) { 1147 char interface[6] = "wlan0"; 1148 interface[4] += device_id; 1149 int ret = await_ifla_operstate(&nlmsg, interface, IF_OPER_UP, true); 1150 if (ret < 0) 1151 failmsg("initialize_wifi_devices: get_ifla_operstate failed", 1152 "device=%d, ret=%d", device_id, ret); 1153 } 1154 1155 close(sock); 1156 } 1157 #endif 1158 1159 #if SYZ_EXECUTOR || (SYZ_NET_DEVICES && SYZ_NIC_VF) || SYZ_SWAP 1160 static int runcmdline(char* cmdline) 1161 { 1162 debug("%s\n", cmdline); 1163 int ret = system(cmdline); 1164 if (ret) { 1165 debug("FAIL: %s\n", cmdline); 1166 } 1167 return ret; 1168 } 1169 #endif 1170 1171 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 1172 #include <arpa/inet.h> 1173 #include <errno.h> 1174 #include <fcntl.h> 1175 #include <net/if.h> 1176 #include <net/if_arp.h> 1177 #include <stdarg.h> 1178 #include <stdbool.h> 1179 #include <sys/ioctl.h> 1180 #include <sys/stat.h> 1181 #include <sys/uio.h> 1182 1183 #include <linux/if_ether.h> 1184 #include <linux/if_tun.h> 1185 #include <linux/ip.h> 1186 #include <linux/tcp.h> 1187 1188 // Addresses are chosen to be in the same subnet as tun addresses. 1189 #define DEV_IPV4 "172.20.20.%d" 1190 #define DEV_IPV6 "fe80::%02x" 1191 #define DEV_MAC 0x00aaaaaaaaaa 1192 1193 static void netdevsim_add(unsigned int addr, unsigned int port_count) 1194 { 1195 // These devices are sticky and are not deleted on net namespace destruction. 1196 // So try to delete the previous version of the device. 1197 write_file("/sys/bus/netdevsim/del_device", "%u", addr); 1198 if (write_file("/sys/bus/netdevsim/new_device", "%u %u", addr, port_count)) { 1199 char buf[32]; 1200 snprintf(buf, sizeof(buf), "netdevsim%d", addr); 1201 initialize_devlink_ports("netdevsim", buf, "netdevsim"); 1202 } 1203 } 1204 1205 #define WG_GENL_NAME "wireguard" 1206 enum wg_cmd { 1207 WG_CMD_GET_DEVICE, 1208 WG_CMD_SET_DEVICE, 1209 }; 1210 enum wgdevice_attribute { 1211 WGDEVICE_A_UNSPEC, 1212 WGDEVICE_A_IFINDEX, 1213 WGDEVICE_A_IFNAME, 1214 WGDEVICE_A_PRIVATE_KEY, 1215 WGDEVICE_A_PUBLIC_KEY, 1216 WGDEVICE_A_FLAGS, 1217 WGDEVICE_A_LISTEN_PORT, 1218 WGDEVICE_A_FWMARK, 1219 WGDEVICE_A_PEERS, 1220 }; 1221 enum wgpeer_attribute { 1222 WGPEER_A_UNSPEC, 1223 WGPEER_A_PUBLIC_KEY, 1224 WGPEER_A_PRESHARED_KEY, 1225 WGPEER_A_FLAGS, 1226 WGPEER_A_ENDPOINT, 1227 WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, 1228 WGPEER_A_LAST_HANDSHAKE_TIME, 1229 WGPEER_A_RX_BYTES, 1230 WGPEER_A_TX_BYTES, 1231 WGPEER_A_ALLOWEDIPS, 1232 WGPEER_A_PROTOCOL_VERSION, 1233 }; 1234 enum wgallowedip_attribute { 1235 WGALLOWEDIP_A_UNSPEC, 1236 WGALLOWEDIP_A_FAMILY, 1237 WGALLOWEDIP_A_IPADDR, 1238 WGALLOWEDIP_A_CIDR_MASK, 1239 }; 1240 1241 static void netlink_wireguard_setup(void) 1242 { 1243 const char ifname_a[] = "wg0"; 1244 const char ifname_b[] = "wg1"; 1245 const char ifname_c[] = "wg2"; 1246 const char private_a[] = "\xa0\x5c\xa8\x4f\x6c\x9c\x8e\x38\x53\xe2\xfd\x7a\x70\xae\x0f\xb2\x0f\xa1\x52\x60\x0c\xb0\x08\x45\x17\x4f\x08\x07\x6f\x8d\x78\x43"; 1247 const char private_b[] = "\xb0\x80\x73\xe8\xd4\x4e\x91\xe3\xda\x92\x2c\x22\x43\x82\x44\xbb\x88\x5c\x69\xe2\x69\xc8\xe9\xd8\x35\xb1\x14\x29\x3a\x4d\xdc\x6e"; 1248 const char private_c[] = "\xa0\xcb\x87\x9a\x47\xf5\xbc\x64\x4c\x0e\x69\x3f\xa6\xd0\x31\xc7\x4a\x15\x53\xb6\xe9\x01\xb9\xff\x2f\x51\x8c\x78\x04\x2f\xb5\x42"; 1249 const char public_a[] = "\x97\x5c\x9d\x81\xc9\x83\xc8\x20\x9e\xe7\x81\x25\x4b\x89\x9f\x8e\xd9\x25\xae\x9f\x09\x23\xc2\x3c\x62\xf5\x3c\x57\xcd\xbf\x69\x1c"; 1250 const char public_b[] = "\xd1\x73\x28\x99\xf6\x11\xcd\x89\x94\x03\x4d\x7f\x41\x3d\xc9\x57\x63\x0e\x54\x93\xc2\x85\xac\xa4\x00\x65\xcb\x63\x11\xbe\x69\x6b"; 1251 const char public_c[] = "\xf4\x4d\xa3\x67\xa8\x8e\xe6\x56\x4f\x02\x02\x11\x45\x67\x27\x08\x2f\x5c\xeb\xee\x8b\x1b\xf5\xeb\x73\x37\x34\x1b\x45\x9b\x39\x22"; 1252 const uint16 listen_a = 20001; 1253 const uint16 listen_b = 20002; 1254 const uint16 listen_c = 20003; 1255 const uint16 af_inet = AF_INET; 1256 const uint16 af_inet6 = AF_INET6; 1257 // Unused, but useful in case we change this: 1258 // const struct sockaddr_in endpoint_a_v4 = { 1259 // .sin_family = AF_INET, 1260 // .sin_port = htons(listen_a), 1261 // .sin_addr = {htonl(INADDR_LOOPBACK)}}; 1262 const struct sockaddr_in endpoint_b_v4 = { 1263 .sin_family = AF_INET, 1264 .sin_port = htons(listen_b), 1265 .sin_addr = {htonl(INADDR_LOOPBACK)}}; 1266 const struct sockaddr_in endpoint_c_v4 = { 1267 .sin_family = AF_INET, 1268 .sin_port = htons(listen_c), 1269 .sin_addr = {htonl(INADDR_LOOPBACK)}}; 1270 struct sockaddr_in6 endpoint_a_v6 = { 1271 .sin6_family = AF_INET6, 1272 .sin6_port = htons(listen_a)}; 1273 endpoint_a_v6.sin6_addr = in6addr_loopback; 1274 // Unused, but useful in case we change this: 1275 // const struct sockaddr_in6 endpoint_b_v6 = { 1276 // .sin6_family = AF_INET6, 1277 // .sin6_port = htons(listen_b)}; 1278 // endpoint_b_v6.sin6_addr = in6addr_loopback; 1279 struct sockaddr_in6 endpoint_c_v6 = { 1280 .sin6_family = AF_INET6, 1281 .sin6_port = htons(listen_c)}; 1282 endpoint_c_v6.sin6_addr = in6addr_loopback; 1283 const struct in_addr first_half_v4 = {0}; 1284 const struct in_addr second_half_v4 = {(uint32)htonl(128 << 24)}; 1285 const struct in6_addr first_half_v6 = {{{0}}}; 1286 const struct in6_addr second_half_v6 = {{{0x80}}}; 1287 const uint8 half_cidr = 1; 1288 const uint16 persistent_keepalives[] = {1, 3, 7, 9, 14, 19}; 1289 1290 struct genlmsghdr genlhdr = { 1291 .cmd = WG_CMD_SET_DEVICE, 1292 .version = 1}; 1293 int sock; 1294 int id, err; 1295 1296 sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 1297 if (sock == -1) { 1298 debug("socket(AF_NETLINK) failed: %s\n", strerror(errno)); 1299 return; 1300 } 1301 1302 id = netlink_query_family_id(&nlmsg, sock, WG_GENL_NAME, true); 1303 if (id == -1) 1304 goto error; 1305 1306 netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr)); 1307 netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_a, strlen(ifname_a) + 1); 1308 netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_a, 32); 1309 netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_a, 2); 1310 netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS); 1311 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1312 netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_b, 32); 1313 netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_b_v4, sizeof(endpoint_b_v4)); 1314 netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[0], 2); 1315 netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); 1316 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1317 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); 1318 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4)); 1319 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1320 netlink_done(&nlmsg); 1321 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1322 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); 1323 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6)); 1324 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1325 netlink_done(&nlmsg); 1326 netlink_done(&nlmsg); 1327 netlink_done(&nlmsg); 1328 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1329 netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_c, 32); 1330 netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_c_v6, sizeof(endpoint_c_v6)); 1331 netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[1], 2); 1332 netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); 1333 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1334 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); 1335 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4)); 1336 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1337 netlink_done(&nlmsg); 1338 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1339 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); 1340 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6)); 1341 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1342 netlink_done(&nlmsg); 1343 netlink_done(&nlmsg); 1344 netlink_done(&nlmsg); 1345 netlink_done(&nlmsg); 1346 err = netlink_send(&nlmsg, sock); 1347 if (err < 0) { 1348 debug("netlink: failed to setup wireguard instance: %s\n", strerror(errno)); 1349 } 1350 1351 netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr)); 1352 netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_b, strlen(ifname_b) + 1); 1353 netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_b, 32); 1354 netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_b, 2); 1355 netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS); 1356 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1357 netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_a, 32); 1358 netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_a_v6, sizeof(endpoint_a_v6)); 1359 netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[2], 2); 1360 netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); 1361 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1362 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); 1363 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4)); 1364 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1365 netlink_done(&nlmsg); 1366 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1367 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); 1368 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6)); 1369 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1370 netlink_done(&nlmsg); 1371 netlink_done(&nlmsg); 1372 netlink_done(&nlmsg); 1373 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1374 netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_c, 32); 1375 netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_c_v4, sizeof(endpoint_c_v4)); 1376 netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[3], 2); 1377 netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); 1378 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1379 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); 1380 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4)); 1381 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1382 netlink_done(&nlmsg); 1383 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1384 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); 1385 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6)); 1386 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1387 netlink_done(&nlmsg); 1388 netlink_done(&nlmsg); 1389 netlink_done(&nlmsg); 1390 netlink_done(&nlmsg); 1391 err = netlink_send(&nlmsg, sock); 1392 if (err < 0) { 1393 debug("netlink: failed to setup wireguard instance: %s\n", strerror(errno)); 1394 } 1395 1396 netlink_init(&nlmsg, id, 0, &genlhdr, sizeof(genlhdr)); 1397 netlink_attr(&nlmsg, WGDEVICE_A_IFNAME, ifname_c, strlen(ifname_c) + 1); 1398 netlink_attr(&nlmsg, WGDEVICE_A_PRIVATE_KEY, private_c, 32); 1399 netlink_attr(&nlmsg, WGDEVICE_A_LISTEN_PORT, &listen_c, 2); 1400 netlink_nest(&nlmsg, NLA_F_NESTED | WGDEVICE_A_PEERS); 1401 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1402 netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_a, 32); 1403 netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_a_v6, sizeof(endpoint_a_v6)); 1404 netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[4], 2); 1405 netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); 1406 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1407 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); 1408 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v4, sizeof(first_half_v4)); 1409 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1410 netlink_done(&nlmsg); 1411 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1412 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); 1413 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &first_half_v6, sizeof(first_half_v6)); 1414 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1415 netlink_done(&nlmsg); 1416 netlink_done(&nlmsg); 1417 netlink_done(&nlmsg); 1418 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1419 netlink_attr(&nlmsg, WGPEER_A_PUBLIC_KEY, public_b, 32); 1420 netlink_attr(&nlmsg, WGPEER_A_ENDPOINT, &endpoint_b_v4, sizeof(endpoint_b_v4)); 1421 netlink_attr(&nlmsg, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL, &persistent_keepalives[5], 2); 1422 netlink_nest(&nlmsg, NLA_F_NESTED | WGPEER_A_ALLOWEDIPS); 1423 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1424 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet, 2); 1425 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v4, sizeof(second_half_v4)); 1426 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1427 netlink_done(&nlmsg); 1428 netlink_nest(&nlmsg, NLA_F_NESTED | 0); 1429 netlink_attr(&nlmsg, WGALLOWEDIP_A_FAMILY, &af_inet6, 2); 1430 netlink_attr(&nlmsg, WGALLOWEDIP_A_IPADDR, &second_half_v6, sizeof(second_half_v6)); 1431 netlink_attr(&nlmsg, WGALLOWEDIP_A_CIDR_MASK, &half_cidr, 1); 1432 netlink_done(&nlmsg); 1433 netlink_done(&nlmsg); 1434 netlink_done(&nlmsg); 1435 netlink_done(&nlmsg); 1436 err = netlink_send(&nlmsg, sock); 1437 if (err < 0) { 1438 debug("netlink: failed to setup wireguard instance: %s\n", strerror(errno)); 1439 } 1440 1441 error: 1442 close(sock); 1443 } 1444 1445 #if SYZ_EXECUTOR || SYZ_NIC_VF 1446 1447 static void netlink_nicvf_setup(void) 1448 { 1449 char cmdline[256]; 1450 1451 #if SYZ_EXECUTOR 1452 if (!flag_nic_vf) 1453 return; 1454 #endif 1455 if (!vf_intf.ppid) 1456 return; 1457 1458 debug("ppid = %d, vf_intf.pass_thru_intf: %s\n", 1459 vf_intf.ppid, vf_intf.pass_thru_intf); 1460 1461 sprintf(cmdline, "nsenter -t 1 -n ip link set %s netns %d", 1462 vf_intf.pass_thru_intf, getpid()); 1463 if (runcmdline(cmdline)) 1464 failmsg("failed to run command", "%s", cmdline); 1465 sprintf(cmdline, "ip a s %s", vf_intf.pass_thru_intf); 1466 if (runcmdline(cmdline)) 1467 failmsg("failed to run command", "%s", cmdline); 1468 sprintf(cmdline, "ip link set %s down", vf_intf.pass_thru_intf); 1469 if (runcmdline(cmdline)) 1470 failmsg("failed to run command", "%s", cmdline); 1471 sprintf(cmdline, "ip link set %s name nicvf0", vf_intf.pass_thru_intf); 1472 if (runcmdline(cmdline)) 1473 failmsg("failed to run command", "%s", cmdline); 1474 debug("nicvf0 VF pass-through setup complete.\n"); 1475 } 1476 #endif // SYZ_NIC_VF 1477 1478 // We test in a separate namespace, which does not have any network devices initially (even lo). 1479 // Create/up as many as we can. 1480 static void initialize_netdevices(void) 1481 { 1482 #if SYZ_EXECUTOR 1483 if (!flag_net_devices) 1484 return; 1485 #endif 1486 // TODO: add the following devices: 1487 // - vxlan 1488 // - ipip 1489 // - lowpan (requires link to device of type IEEE802154, e.g. wpan0) 1490 // - ipoib (requires link to device of type ARPHRD_INFINIBAND) 1491 // - vrf 1492 // - rmnet 1493 // - openvswitch 1494 // Naive attempts to add devices of these types fail with various errors. 1495 // Also init namespace contains the following devices (which presumably can't be 1496 // created in non-init namespace), can we use them somehow? 1497 // - ifb0/1 1498 // - teql0 1499 // - eql 1500 // Note: netdevsim devices can't have the same name even in different namespaces. 1501 char netdevsim[16]; 1502 sprintf(netdevsim, "netdevsim%d", (int)procid); 1503 struct { 1504 const char* type; 1505 const char* dev; 1506 } devtypes[] = { 1507 // Note: ip6erspan device can't be added if ip6gretap exists in the same namespace. 1508 {"ip6gretap", "ip6gretap0"}, 1509 {"bridge", "bridge0"}, 1510 {"vcan", "vcan0"}, 1511 {"bond", "bond0"}, 1512 {"team", "team0"}, 1513 {"dummy", "dummy0"}, 1514 #if SYZ_EXECUTOR || SYZ_NIC_VF 1515 {"nicvf", "nicvf0"}, 1516 #endif 1517 {"nlmon", "nlmon0"}, 1518 {"caif", "caif0"}, 1519 {"batadv", "batadv0"}, 1520 // Note: this adds vxcan0/vxcan1 pair, similar to veth (creating vxcan0 would fail). 1521 {"vxcan", "vxcan1"}, 1522 // This adds connected veth0 and veth1 devices. 1523 {"veth", 0}, 1524 {"wireguard", "wg0"}, 1525 {"wireguard", "wg1"}, 1526 {"wireguard", "wg2"}, 1527 }; 1528 const char* devmasters[] = {"bridge", "bond", "team", "batadv"}; 1529 // If you extend this array, also update netdev_addr_id in vnet.txt 1530 // and devnames in socket.txt. 1531 struct { 1532 const char* name; 1533 int macsize; 1534 bool noipv6; 1535 } devices[] = { 1536 {"lo", ETH_ALEN}, 1537 {"sit0", 0}, 1538 {"bridge0", ETH_ALEN}, 1539 {"vcan0", 0, true}, 1540 {"tunl0", 0}, 1541 {"gre0", 0}, 1542 {"gretap0", ETH_ALEN}, 1543 {"ip_vti0", 0}, 1544 {"ip6_vti0", 0}, 1545 {"ip6tnl0", 0}, 1546 {"ip6gre0", 0}, 1547 {"ip6gretap0", ETH_ALEN}, 1548 {"erspan0", ETH_ALEN}, 1549 {"bond0", ETH_ALEN}, 1550 {"veth0", ETH_ALEN}, 1551 {"veth1", ETH_ALEN}, 1552 {"team0", ETH_ALEN}, 1553 {"veth0_to_bridge", ETH_ALEN}, 1554 {"veth1_to_bridge", ETH_ALEN}, 1555 {"veth0_to_bond", ETH_ALEN}, 1556 {"veth1_to_bond", ETH_ALEN}, 1557 {"veth0_to_team", ETH_ALEN}, 1558 {"veth1_to_team", ETH_ALEN}, 1559 {"veth0_to_hsr", ETH_ALEN}, 1560 {"veth1_to_hsr", ETH_ALEN}, 1561 {"hsr0", 0}, 1562 {"dummy0", ETH_ALEN}, 1563 #if SYZ_EXECUTOR || SYZ_NIC_VF 1564 {"nicvf0", 0, true}, 1565 #endif 1566 {"nlmon0", 0}, 1567 {"vxcan0", 0, true}, 1568 {"vxcan1", 0, true}, 1569 {"caif0", ETH_ALEN}, // TODO: up'ing caif fails with ENODEV 1570 {"batadv0", ETH_ALEN}, 1571 {netdevsim, ETH_ALEN}, 1572 {"xfrm0", ETH_ALEN}, 1573 {"veth0_virt_wifi", ETH_ALEN}, 1574 {"veth1_virt_wifi", ETH_ALEN}, 1575 {"virt_wifi0", ETH_ALEN}, 1576 {"veth0_vlan", ETH_ALEN}, 1577 {"veth1_vlan", ETH_ALEN}, 1578 {"vlan0", ETH_ALEN}, 1579 {"vlan1", ETH_ALEN}, 1580 {"macvlan0", ETH_ALEN}, 1581 {"macvlan1", ETH_ALEN}, 1582 {"ipvlan0", ETH_ALEN}, 1583 {"ipvlan1", ETH_ALEN}, 1584 {"veth0_macvtap", ETH_ALEN}, 1585 {"veth1_macvtap", ETH_ALEN}, 1586 {"macvtap0", ETH_ALEN}, 1587 {"macsec0", ETH_ALEN}, 1588 {"veth0_to_batadv", ETH_ALEN}, 1589 {"veth1_to_batadv", ETH_ALEN}, 1590 {"batadv_slave_0", ETH_ALEN}, 1591 {"batadv_slave_1", ETH_ALEN}, 1592 {"geneve0", ETH_ALEN}, 1593 {"geneve1", ETH_ALEN}, 1594 {"wg0", 0}, 1595 {"wg1", 0}, 1596 {"wg2", 0}, 1597 }; 1598 int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 1599 if (sock == -1) 1600 fail("socket(AF_NETLINK) failed"); 1601 unsigned i; 1602 for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++) 1603 netlink_add_device(&nlmsg, sock, devtypes[i].type, devtypes[i].dev); 1604 // This creates connected bridge/bond/team_slave devices of type veth, 1605 // and makes them slaves of bridge/bond/team devices, respectively. 1606 // Note: slave devices don't need MAC/IP addresses, only master devices. 1607 // veth0_to_* is not slave devices, which still need ip addresses. 1608 for (i = 0; i < sizeof(devmasters) / (sizeof(devmasters[0])); i++) { 1609 char master[32], slave0[32], veth0[32], slave1[32], veth1[32]; 1610 sprintf(slave0, "%s_slave_0", devmasters[i]); 1611 sprintf(veth0, "veth0_to_%s", devmasters[i]); 1612 netlink_add_veth(&nlmsg, sock, slave0, veth0); 1613 sprintf(slave1, "%s_slave_1", devmasters[i]); 1614 sprintf(veth1, "veth1_to_%s", devmasters[i]); 1615 netlink_add_veth(&nlmsg, sock, slave1, veth1); 1616 sprintf(master, "%s0", devmasters[i]); 1617 netlink_device_change(&nlmsg, sock, slave0, false, master, 0, 0, NULL); 1618 netlink_device_change(&nlmsg, sock, slave1, false, master, 0, 0, NULL); 1619 } 1620 netlink_add_xfrm(&nlmsg, sock, "xfrm0"); 1621 1622 // bond/team_slave_* will set up automatically when set their master. 1623 // But bridge_slave_* need to set up manually. 1624 netlink_device_change(&nlmsg, sock, "bridge_slave_0", true, 0, 0, 0, NULL); 1625 netlink_device_change(&nlmsg, sock, "bridge_slave_1", true, 0, 0, 0, NULL); 1626 1627 // Setup hsr device (slightly different from what we do for devmasters). 1628 netlink_add_veth(&nlmsg, sock, "hsr_slave_0", "veth0_to_hsr"); 1629 netlink_add_veth(&nlmsg, sock, "hsr_slave_1", "veth1_to_hsr"); 1630 netlink_add_hsr(&nlmsg, sock, "hsr0", "hsr_slave_0", "hsr_slave_1"); 1631 netlink_device_change(&nlmsg, sock, "hsr_slave_0", true, 0, 0, 0, NULL); 1632 netlink_device_change(&nlmsg, sock, "hsr_slave_1", true, 0, 0, 0, NULL); 1633 1634 netlink_add_veth(&nlmsg, sock, "veth0_virt_wifi", "veth1_virt_wifi"); 1635 netlink_add_linked(&nlmsg, sock, "virt_wifi", "virt_wifi0", "veth1_virt_wifi"); 1636 1637 netlink_add_veth(&nlmsg, sock, "veth0_vlan", "veth1_vlan"); 1638 netlink_add_vlan(&nlmsg, sock, "vlan0", "veth0_vlan", 0, htons(ETH_P_8021Q)); 1639 netlink_add_vlan(&nlmsg, sock, "vlan1", "veth0_vlan", 1, htons(ETH_P_8021AD)); 1640 netlink_add_macvlan(&nlmsg, sock, "macvlan0", "veth1_vlan"); 1641 netlink_add_macvlan(&nlmsg, sock, "macvlan1", "veth1_vlan"); 1642 netlink_add_ipvlan(&nlmsg, sock, "ipvlan0", "veth0_vlan", IPVLAN_MODE_L2, 0); 1643 netlink_add_ipvlan(&nlmsg, sock, "ipvlan1", "veth0_vlan", IPVLAN_MODE_L3S, IPVLAN_F_VEPA); 1644 1645 netlink_add_veth(&nlmsg, sock, "veth0_macvtap", "veth1_macvtap"); 1646 netlink_add_linked(&nlmsg, sock, "macvtap", "macvtap0", "veth0_macvtap"); 1647 netlink_add_linked(&nlmsg, sock, "macsec", "macsec0", "veth1_macvtap"); 1648 1649 char addr[32]; 1650 sprintf(addr, DEV_IPV4, 14 + 10); // should point to veth0 1651 struct in_addr geneve_addr4; 1652 if (inet_pton(AF_INET, addr, &geneve_addr4) <= 0) 1653 fail("geneve0 inet_pton failed"); 1654 struct in6_addr geneve_addr6; 1655 // Must not be link local (our device addresses are link local). 1656 if (inet_pton(AF_INET6, "fc00::01", &geneve_addr6) <= 0) 1657 fail("geneve1 inet_pton failed"); 1658 netlink_add_geneve(&nlmsg, sock, "geneve0", 0, &geneve_addr4, 0); 1659 netlink_add_geneve(&nlmsg, sock, "geneve1", 1, 0, &geneve_addr6); 1660 1661 netdevsim_add((int)procid, 4); // Number of port is in sync with value in sys/linux/socket_netlink_generic_devlink.txt 1662 1663 netlink_wireguard_setup(); 1664 1665 #if SYZ_EXECUTOR || SYZ_NIC_VF 1666 netlink_nicvf_setup(); 1667 #endif 1668 1669 for (i = 0; i < sizeof(devices) / (sizeof(devices[0])); i++) { 1670 // Assign some unique address to devices. Some devices won't up without this. 1671 // Shift addresses by 10 because 0 subnet address can mean special things. 1672 char addr[32]; 1673 sprintf(addr, DEV_IPV4, i + 10); 1674 netlink_add_addr4(&nlmsg, sock, devices[i].name, addr); 1675 if (!devices[i].noipv6) { 1676 sprintf(addr, DEV_IPV6, i + 10); 1677 netlink_add_addr6(&nlmsg, sock, devices[i].name, addr); 1678 } 1679 uint64 macaddr = DEV_MAC + ((i + 10ull) << 40); 1680 netlink_device_change(&nlmsg, sock, devices[i].name, true, 0, &macaddr, devices[i].macsize, NULL); 1681 } 1682 close(sock); 1683 } 1684 1685 // Same as initialize_netdevices, but called in init net namespace. 1686 static void initialize_netdevices_init(void) 1687 { 1688 #if SYZ_EXECUTOR 1689 if (!flag_net_devices) 1690 return; 1691 #endif 1692 int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 1693 if (sock == -1) 1694 fail("socket(AF_NETLINK) failed"); 1695 struct { 1696 const char* type; 1697 int macsize; 1698 bool noipv6; 1699 bool noup; 1700 } devtypes[] = { 1701 // NETROM device, see net/netrom/{af_netrom,nr_dev}.c 1702 {"nr", 7, true}, 1703 // ROSE device, see net/rose/{af_rose,rose_dev}.c 1704 // We don't up it yet because it crashes kernel right away: 1705 // https://groups.google.com/d/msg/syzkaller/v-4B3zoBC-4/02SCKEzJBwAJ 1706 {"rose", 5, true, true}, 1707 }; 1708 unsigned i; 1709 for (i = 0; i < sizeof(devtypes) / sizeof(devtypes[0]); i++) { 1710 char dev[32], addr[32]; 1711 sprintf(dev, "%s%d", devtypes[i].type, (int)procid); 1712 // Note: syscall descriptions know these addresses. 1713 sprintf(addr, "172.30.%d.%d", i, (int)procid + 1); 1714 netlink_add_addr4(&nlmsg, sock, dev, addr); 1715 if (!devtypes[i].noipv6) { 1716 sprintf(addr, "fe88::%02x:%02x", i, (int)procid + 1); 1717 netlink_add_addr6(&nlmsg, sock, dev, addr); 1718 } 1719 int macsize = devtypes[i].macsize; 1720 uint64 macaddr = 0xbbbbbb + ((unsigned long long)i << (8 * (macsize - 2))) + 1721 (procid << (8 * (macsize - 1))); 1722 netlink_device_change(&nlmsg, sock, dev, !devtypes[i].noup, 0, &macaddr, macsize, NULL); 1723 } 1724 close(sock); 1725 1726 #if SYZ_EXECUTOR || SYZ_NIC_VF 1727 find_vf_interface(); 1728 #endif 1729 } 1730 #endif 1731 1732 #if SYZ_EXECUTOR || SYZ_NET_INJECTION && (__NR_syz_extract_tcp_res || SYZ_REPEAT) 1733 #include <errno.h> 1734 1735 static int read_tun(char* data, int size) 1736 { 1737 if (tunfd < 0) 1738 return -1; 1739 1740 int rv = read(tunfd, data, size); 1741 if (rv < 0) { 1742 // EBADF can be returned if the test closes tunfd with close_range syscall. 1743 // Tun sometimes returns EBADFD, unclear if it's a kernel bug or not. 1744 if (errno == EAGAIN || errno == EBADF || errno == EBADFD) 1745 return -1; 1746 fail("tun read failed"); 1747 } 1748 return rv; 1749 } 1750 #endif 1751 1752 #if SYZ_EXECUTOR || __NR_syz_emit_ethernet && SYZ_NET_INJECTION 1753 #include <stdbool.h> 1754 #include <sys/uio.h> 1755 1756 #if ENABLE_NAPI_FRAGS 1757 #define MAX_FRAGS 4 1758 struct vnet_fragmentation { 1759 uint32 full; 1760 uint32 count; 1761 uint32 frags[MAX_FRAGS]; 1762 }; 1763 #endif 1764 1765 static long syz_emit_ethernet(volatile long a0, volatile long a1, volatile long a2) 1766 { 1767 // syz_emit_ethernet(len len[packet], packet ptr[in, eth_packet], frags ptr[in, vnet_fragmentation, opt]) 1768 // vnet_fragmentation { 1769 // full int32[0:1] 1770 // count int32[1:4] 1771 // frags array[int32[0:4096], 4] 1772 // } 1773 if (tunfd < 0) 1774 return (uintptr_t)-1; 1775 1776 uint32 length = a0; 1777 char* data = (char*)a1; 1778 debug_dump_data(data, length); 1779 1780 #if ENABLE_NAPI_FRAGS 1781 struct vnet_fragmentation* frags = (struct vnet_fragmentation*)a2; 1782 struct iovec vecs[MAX_FRAGS + 1]; 1783 uint32 nfrags = 0; 1784 if (!tun_frags_enabled || frags == NULL) { 1785 vecs[nfrags].iov_base = data; 1786 vecs[nfrags].iov_len = length; 1787 nfrags++; 1788 } else { 1789 bool full = frags->full; 1790 uint32 count = frags->count; 1791 if (count > MAX_FRAGS) 1792 count = MAX_FRAGS; 1793 uint32 i; 1794 for (i = 0; i < count && length != 0; i++) { 1795 uint32 size = frags->frags[i]; 1796 if (size > length) 1797 size = length; 1798 vecs[nfrags].iov_base = data; 1799 vecs[nfrags].iov_len = size; 1800 nfrags++; 1801 data += size; 1802 length -= size; 1803 } 1804 if (length != 0 && (full || nfrags == 0)) { 1805 vecs[nfrags].iov_base = data; 1806 vecs[nfrags].iov_len = length; 1807 nfrags++; 1808 } 1809 } 1810 return writev(tunfd, vecs, nfrags); 1811 #else 1812 return write(tunfd, data, length); 1813 #endif 1814 } 1815 #endif 1816 1817 #if SYZ_EXECUTOR || __NR_syz_io_uring_submit || __NR_syz_io_uring_complete || __NR_syz_io_uring_setup 1818 1819 #define SIZEOF_IO_URING_SQE 64 1820 #define SIZEOF_IO_URING_CQE 16 1821 1822 // Once a io_uring is set up by calling io_uring_setup, the offsets to the member fields 1823 // to be used on the mmap'ed area are set in structs io_sqring_offsets and io_cqring_offsets. 1824 // Except io_sqring_offsets.array, the offsets are static while all depend on how struct io_rings 1825 // is organized in code. The offsets can be marked as resources in syzkaller descriptions but 1826 // this makes it difficult to generate correct programs by the fuzzer. Thus, the offsets are 1827 // hard-coded here (and in the descriptions), and array offset is later computed once the number 1828 // of entries is available. Another way to obtain the offsets is to setup another io_uring here 1829 // and use what it returns. It is slower but might be more maintainable. 1830 #define SQ_HEAD_OFFSET 0 1831 #define SQ_TAIL_OFFSET 64 1832 #define SQ_RING_MASK_OFFSET 256 1833 #define SQ_RING_ENTRIES_OFFSET 264 1834 #define SQ_FLAGS_OFFSET 276 1835 #define SQ_DROPPED_OFFSET 272 1836 #define CQ_HEAD_OFFSET 128 1837 #define CQ_TAIL_OFFSET 192 1838 #define CQ_RING_MASK_OFFSET 260 1839 #define CQ_RING_ENTRIES_OFFSET 268 1840 #define CQ_RING_OVERFLOW_OFFSET 284 1841 #define CQ_FLAGS_OFFSET 280 1842 #define CQ_CQES_OFFSET 320 1843 1844 #if SYZ_EXECUTOR || __NR_syz_io_uring_complete 1845 1846 // From linux/io_uring.h 1847 struct io_uring_cqe { 1848 uint64 user_data; 1849 uint32 res; 1850 uint32 flags; 1851 }; 1852 1853 static long syz_io_uring_complete(volatile long a0) 1854 { 1855 // syzlang: syz_io_uring_complete(ring_ptr ring_ptr) 1856 // C: syz_io_uring_complete(char* ring_ptr) 1857 1858 // It is not checked if the ring is empty 1859 1860 // Cast to original 1861 char* ring_ptr = (char*)a0; 1862 1863 // Compute the head index and the next head value 1864 uint32 cq_ring_mask = *(uint32*)(ring_ptr + CQ_RING_MASK_OFFSET); 1865 uint32* cq_head_ptr = (uint32*)(ring_ptr + CQ_HEAD_OFFSET); 1866 uint32 cq_head = *cq_head_ptr & cq_ring_mask; 1867 uint32 cq_head_next = *cq_head_ptr + 1; 1868 1869 // Compute the ptr to the src cq entry on the ring 1870 char* cqe_src = ring_ptr + CQ_CQES_OFFSET + cq_head * SIZEOF_IO_URING_CQE; 1871 1872 // Get the cq entry from the ring 1873 struct io_uring_cqe cqe; 1874 memcpy(&cqe, cqe_src, sizeof(cqe)); 1875 1876 // Advance the head. Head is a free-flowing integer and relies on natural wrapping. 1877 // Ensure that the kernel will never see a head update without the preceeding CQE 1878 // stores being done. 1879 __atomic_store_n(cq_head_ptr, cq_head_next, __ATOMIC_RELEASE); 1880 1881 // In the descriptions (sys/linux/io_uring.txt), openat and openat2 are passed 1882 // with a unique range of sqe.user_data (0x12345 and 0x23456) to identify the operations 1883 // which produces an fd instance. Check cqe.user_data, which should be the same 1884 // as sqe.user_data for that operation. If it falls in that unique range, return 1885 // cqe.res as fd. Otherwise, just return an invalid fd. 1886 return (cqe.user_data == 0x12345 || cqe.user_data == 0x23456) ? (long)cqe.res : (long)-1; 1887 } 1888 1889 #endif 1890 1891 #if SYZ_EXECUTOR || __NR_syz_io_uring_setup 1892 1893 struct io_sqring_offsets { 1894 uint32 head; 1895 uint32 tail; 1896 uint32 ring_mask; 1897 uint32 ring_entries; 1898 uint32 flags; 1899 uint32 dropped; 1900 uint32 array; 1901 uint32 resv1; 1902 uint64 resv2; 1903 }; 1904 1905 struct io_cqring_offsets { 1906 uint32 head; 1907 uint32 tail; 1908 uint32 ring_mask; 1909 uint32 ring_entries; 1910 uint32 overflow; 1911 uint32 cqes; 1912 uint64 resv[2]; 1913 }; 1914 1915 struct io_uring_params { 1916 uint32 sq_entries; 1917 uint32 cq_entries; 1918 uint32 flags; 1919 uint32 sq_thread_cpu; 1920 uint32 sq_thread_idle; 1921 uint32 features; 1922 uint32 resv[4]; 1923 struct io_sqring_offsets sq_off; 1924 struct io_cqring_offsets cq_off; 1925 }; 1926 1927 #define IORING_OFF_SQ_RING 0 1928 #define IORING_OFF_SQES 0x10000000ULL 1929 #define IORING_SETUP_SQE128 (1U << 10) 1930 #define IORING_SETUP_CQE32 (1U << 11) 1931 1932 #include <sys/mman.h> 1933 #include <unistd.h> 1934 1935 // Wrapper for io_uring_setup and the subsequent mmap calls that map the ring and the sqes 1936 static long syz_io_uring_setup(volatile long a0, volatile long a1, volatile long a2, volatile long a3) 1937 { 1938 // syzlang: syz_io_uring_setup(entries int32[1:IORING_MAX_ENTRIES], params ptr[inout, io_uring_params], ring_ptr ptr[out, ring_ptr], sqes_ptr ptr[out, sqes_ptr]) fd_io_uring 1939 // C: syz_io_uring_setup(uint32 entries, struct io_uring_params* params, void** ring_ptr_out, void** sqes_ptr_out) // returns uint32 fd_io_uring 1940 1941 // Cast to original 1942 uint32 entries = (uint32)a0; 1943 struct io_uring_params* setup_params = (struct io_uring_params*)a1; 1944 void** ring_ptr_out = (void**)a2; 1945 void** sqes_ptr_out = (void**)a3; 1946 // Temporarily disable IORING_SETUP_CQE32 and IORING_SETUP_SQE128 that may change SIZEOF_IO_URING_CQE and SIZEOF_IO_URING_SQE. 1947 // Tracking bug: https://github.com/google/syzkaller/issues/4531. 1948 setup_params->flags &= ~(IORING_SETUP_CQE32 | IORING_SETUP_SQE128); 1949 uint32 fd_io_uring = syscall(__NR_io_uring_setup, entries, setup_params); 1950 1951 // Compute the ring sizes 1952 uint32 sq_ring_sz = setup_params->sq_off.array + setup_params->sq_entries * sizeof(uint32); 1953 uint32 cq_ring_sz = setup_params->cq_off.cqes + setup_params->cq_entries * SIZEOF_IO_URING_CQE; 1954 1955 // Asssumed IORING_FEAT_SINGLE_MMAP, which is always the case with the current implementation 1956 // The implication is that the sq_ring_ptr and the cq_ring_ptr are the same but the 1957 // difference is in the offsets to access the fields of these rings. 1958 uint32 ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz; 1959 *ring_ptr_out = mmap(0, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd_io_uring, IORING_OFF_SQ_RING); 1960 1961 uint32 sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE; 1962 *sqes_ptr_out = mmap(0, sqes_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd_io_uring, IORING_OFF_SQES); 1963 1964 uint32* array = (uint32*)((uintptr_t)*ring_ptr_out + setup_params->sq_off.array); 1965 for (uint32 index = 0; index < entries; index++) 1966 array[index] = index; 1967 1968 return fd_io_uring; 1969 } 1970 1971 #endif 1972 1973 #if SYZ_EXECUTOR || __NR_syz_io_uring_submit 1974 1975 static long syz_io_uring_submit(volatile long a0, volatile long a1, volatile long a2) 1976 { 1977 // syzlang: syz_io_uring_submit(ring_ptr ring_ptr, sqes_ptr sqes_ptr, sqe ptr[in, io_uring_sqe]) 1978 // C: syz_io_uring_submit(char* ring_ptr, io_uring_sqe* sqes_ptr, io_uring_sqe* sqe) 1979 1980 // It is not checked if the ring is full 1981 1982 // Cast to original 1983 char* ring_ptr = (char*)a0; // This will be exposed to offsets in bytes 1984 char* sqes_ptr = (char*)a1; 1985 1986 char* sqe = (char*)a2; 1987 1988 uint32 sq_ring_mask = *(uint32*)(ring_ptr + SQ_RING_MASK_OFFSET); 1989 uint32* sq_tail_ptr = (uint32*)(ring_ptr + SQ_TAIL_OFFSET); 1990 uint32 sq_tail = *sq_tail_ptr & sq_ring_mask; 1991 1992 // Get the ptr to the destination for the sqe 1993 char* sqe_dest = sqes_ptr + sq_tail * SIZEOF_IO_URING_SQE; 1994 1995 // Write the sqe entry to its destination in sqes 1996 memcpy(sqe_dest, sqe, SIZEOF_IO_URING_SQE); 1997 1998 // Write the index to the sqe array 1999 uint32 sq_tail_next = *sq_tail_ptr + 1; 2000 2001 // Advance the tail. Tail is a free-flowing integer and relies on natural wrapping. 2002 // Ensure that the kernel will never see a tail update without the preceeding SQE 2003 // stores being done. 2004 __atomic_store_n(sq_tail_ptr, sq_tail_next, __ATOMIC_RELEASE); 2005 2006 // Now the application is free to call io_uring_enter() to submit the sqe 2007 return 0; 2008 } 2009 2010 #endif 2011 2012 #endif 2013 2014 #if SYZ_EXECUTOR || __NR_syz_usbip_server_init 2015 2016 #include <errno.h> 2017 #include <fcntl.h> 2018 #include <linux/usb/ch9.h> 2019 #include <stdbool.h> 2020 #include <stdio.h> 2021 #include <stdlib.h> 2022 #include <string.h> 2023 #include <sys/socket.h> 2024 #include <unistd.h> 2025 2026 // This should be coherent with CONFIG_USBIP_VHCI_HC_PORTS. 2027 #define VHCI_HC_PORTS 8 2028 #define VHCI_PORTS (VHCI_HC_PORTS * 2) 2029 2030 static long syz_usbip_server_init(volatile long a0) 2031 { 2032 // port_alloc[0] corresponds to ports which can be used by usb2 and 2033 // port_alloc[1] corresponds to ports which can be used by usb3. 2034 static int port_alloc[2]; 2035 2036 int speed = (int)a0; 2037 bool usb3 = (speed == USB_SPEED_SUPER); 2038 2039 int socket_pair[2]; 2040 if (socketpair(AF_UNIX, SOCK_STREAM, 0, socket_pair)) { 2041 // This can happen if the test calls prlimit(RLIMIT_AS). 2042 debug("syz_usbip_server_init: socketpair failed (%d)\n", errno); 2043 return -1; 2044 } 2045 2046 int client_fd = socket_pair[0]; 2047 int server_fd = socket_pair[1]; 2048 2049 int available_port_num = __atomic_fetch_add(&port_alloc[usb3], 1, __ATOMIC_RELAXED); 2050 if (available_port_num > VHCI_HC_PORTS) { 2051 debug("syz_usbip_server_init : no more available port for : %d\n", available_port_num); 2052 return -1; 2053 } 2054 2055 // Each port number corresponds to a particular vhci_hcd (USB/IP Virtual Host Controller) and it is used by either 2056 // an usb2 device or usb3 device. There are 16 ports available in each vhci_hcd. 2057 // (VHCI_PORTS = 16 in our case.) When they are occupied, the following vhci_hcd's ports are used. 2058 // First 16 ports correspond to vhci_hcd0, next 16 ports correspond to 2059 // vhci_hcd1 etc. In a vhci_hcd, first 8 ports are used by usb2 devices and last 8 are used by usb3 devices. 2060 int port_num = procid * VHCI_PORTS + usb3 * VHCI_HC_PORTS + available_port_num; 2061 2062 // Under normal USB/IP usage, devid represents the device ID on the server. 2063 // When fuzzing with syzkaller we don't have an actual server or an actual device, so use 0 for devid. 2064 char buffer[100]; 2065 sprintf(buffer, "%d %d %s %d", port_num, client_fd, "0", speed); 2066 2067 write_file("/sys/devices/platform/vhci_hcd.0/attach", buffer); 2068 return server_fd; 2069 } 2070 2071 #endif 2072 2073 #if SYZ_EXECUTOR || __NR_syz_btf_id_by_name 2074 2075 #include <errno.h> 2076 #include <fcntl.h> 2077 #include <stdbool.h> 2078 #include <stddef.h> 2079 #include <stdio.h> 2080 #include <stdlib.h> 2081 #include <string.h> 2082 #include <sys/stat.h> 2083 #include <unistd.h> 2084 2085 // Some items in linux/btf.h are relatively new, so we copy them here for 2086 // backward compatibility. 2087 #define BTF_MAGIC 0xeB9F 2088 2089 struct btf_header { 2090 __u16 magic; 2091 __u8 version; 2092 __u8 flags; 2093 __u32 hdr_len; 2094 __u32 type_off; 2095 __u32 type_len; 2096 __u32 str_off; 2097 __u32 str_len; 2098 }; 2099 2100 #define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f) 2101 #define BTF_INFO_VLEN(info) ((info)&0xffff) 2102 2103 #define BTF_KIND_INT 1 2104 #define BTF_KIND_ARRAY 3 2105 #define BTF_KIND_STRUCT 4 2106 #define BTF_KIND_UNION 5 2107 #define BTF_KIND_ENUM 6 2108 #define BTF_KIND_FUNC_PROTO 13 2109 #define BTF_KIND_VAR 14 2110 #define BTF_KIND_DATASEC 15 2111 2112 struct btf_type { 2113 __u32 name_off; 2114 __u32 info; 2115 union { 2116 __u32 size; 2117 __u32 type; 2118 }; 2119 }; 2120 2121 struct btf_enum { 2122 __u32 name_off; 2123 __s32 val; 2124 }; 2125 2126 struct btf_array { 2127 __u32 type; 2128 __u32 index_type; 2129 __u32 nelems; 2130 }; 2131 2132 struct btf_member { 2133 __u32 name_off; 2134 __u32 type; 2135 __u32 offset; 2136 }; 2137 2138 struct btf_param { 2139 __u32 name_off; 2140 __u32 type; 2141 }; 2142 2143 struct btf_var { 2144 __u32 linkage; 2145 }; 2146 2147 struct btf_var_secinfo { 2148 __u32 type; 2149 __u32 offset; 2150 __u32 size; 2151 }; 2152 2153 // Set the limit on the maximum size of btf/vmlinux to be 10 MiB. 2154 #define VMLINUX_MAX_SUPPORT_SIZE (10 * 1024 * 1024) 2155 2156 // Read out all the content of /sys/kernel/btf/vmlinux to the fixed address 2157 // buffer and return it. Return NULL if failed. 2158 static char* read_btf_vmlinux() 2159 { 2160 static bool is_read = false; 2161 static char buf[VMLINUX_MAX_SUPPORT_SIZE]; 2162 2163 // There could be a race condition here, but it should not be harmful. 2164 if (is_read) 2165 return buf; 2166 2167 int fd = open("/sys/kernel/btf/vmlinux", O_RDONLY); 2168 if (fd < 0) 2169 return NULL; 2170 2171 unsigned long bytes_read = 0; 2172 for (;;) { 2173 ssize_t ret = read(fd, buf + bytes_read, 2174 VMLINUX_MAX_SUPPORT_SIZE - bytes_read); 2175 2176 if (ret < 0 || bytes_read + ret == VMLINUX_MAX_SUPPORT_SIZE) 2177 return NULL; 2178 2179 if (ret == 0) 2180 break; 2181 2182 bytes_read += ret; 2183 } 2184 2185 is_read = true; 2186 return buf; 2187 } 2188 2189 // Given a pointer to a C-string as the only argument a0, return the 2190 // corresponding btf ID for this name. Return -1 if there is an error when 2191 // opening the vmlinux file or the name is not found in vmlinux. 2192 static long syz_btf_id_by_name(volatile long a0) 2193 { 2194 // syzlang: syz_btf_id_by_name(name ptr[in, string]) btf_id 2195 // C: syz_btf_id_by_name(char* name) 2196 char* target = (char*)a0; 2197 2198 char* vmlinux = read_btf_vmlinux(); 2199 if (vmlinux == NULL) 2200 return -1; 2201 2202 struct btf_header* btf_header = (struct btf_header*)vmlinux; 2203 if (btf_header->magic != BTF_MAGIC) 2204 return -1; 2205 // These offsets are bytes relative to the end of the header. 2206 char* btf_type_sec = vmlinux + btf_header->hdr_len + btf_header->type_off; 2207 char* btf_str_sec = vmlinux + btf_header->hdr_len + btf_header->str_off; 2208 // Scan through the btf type section, and find a type description that 2209 // matches the provided name. 2210 unsigned int bytes_parsed = 0; 2211 // BTF index starts at 1. 2212 long idx = 1; 2213 while (bytes_parsed < btf_header->type_len) { 2214 struct btf_type* btf_type = (struct btf_type*)(btf_type_sec + bytes_parsed); 2215 uint32 kind = BTF_INFO_KIND(btf_type->info); 2216 uint32 vlen = BTF_INFO_VLEN(btf_type->info); 2217 char* name = btf_str_sec + btf_type->name_off; 2218 2219 if (strcmp(name, target) == 0) 2220 return idx; 2221 2222 // From /include/uapi/linux/btf.h, some kinds of types are 2223 // followed by extra data. 2224 size_t skip; 2225 switch (kind) { 2226 case BTF_KIND_INT: 2227 skip = sizeof(uint32); 2228 break; 2229 case BTF_KIND_ENUM: 2230 skip = sizeof(struct btf_enum) * vlen; 2231 break; 2232 case BTF_KIND_ARRAY: 2233 skip = sizeof(struct btf_array); 2234 break; 2235 case BTF_KIND_STRUCT: 2236 case BTF_KIND_UNION: 2237 skip = sizeof(struct btf_member) * vlen; 2238 break; 2239 case BTF_KIND_FUNC_PROTO: 2240 skip = sizeof(struct btf_param) * vlen; 2241 break; 2242 case BTF_KIND_VAR: 2243 skip = sizeof(struct btf_var); 2244 break; 2245 case BTF_KIND_DATASEC: 2246 skip = sizeof(struct btf_var_secinfo) * vlen; 2247 break; 2248 default: 2249 skip = 0; 2250 } 2251 2252 bytes_parsed += sizeof(struct btf_type) + skip; 2253 idx++; 2254 } 2255 2256 return -1; 2257 } 2258 2259 #endif // SYZ_EXECUTOR || __NR_syz_btf_id_by_name 2260 2261 // Same as memcpy except that it accepts offset to dest and src. 2262 #if SYZ_EXECUTOR || __NR_syz_memcpy_off 2263 static long syz_memcpy_off(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4) 2264 { 2265 // C: syz_memcpy_off(void* dest, uint32 dest_off, void* src, uint32 src_off, size_t n) 2266 2267 // Cast to original 2268 char* dest = (char*)a0; 2269 uint32 dest_off = (uint32)a1; 2270 char* src = (char*)a2; 2271 uint32 src_off = (uint32)a3; 2272 size_t n = (size_t)a4; 2273 2274 return (long)memcpy(dest + dest_off, src + src_off, n); 2275 } 2276 #endif 2277 2278 #if (SYZ_EXECUTOR || SYZ_REPEAT && SYZ_NET_INJECTION) && SYZ_EXECUTOR_USES_FORK_SERVER 2279 static void flush_tun() 2280 { 2281 #if SYZ_EXECUTOR 2282 if (!flag_net_injection) 2283 return; 2284 #endif 2285 char data[1000]; 2286 while (read_tun(&data[0], sizeof(data)) != -1) { 2287 } 2288 } 2289 #endif 2290 2291 #if SYZ_EXECUTOR || __NR_syz_extract_tcp_res && SYZ_NET_INJECTION 2292 #ifndef __ANDROID__ 2293 // Can't include <linux/ipv6.h>, since it causes 2294 // conflicts due to some structs redefinition. 2295 struct ipv6hdr { 2296 __u8 priority : 4, 2297 version : 4; 2298 __u8 flow_lbl[3]; 2299 2300 __be16 payload_len; 2301 __u8 nexthdr; 2302 __u8 hop_limit; 2303 2304 struct in6_addr saddr; 2305 struct in6_addr daddr; 2306 }; 2307 #endif 2308 2309 struct tcp_resources { 2310 uint32 seq; 2311 uint32 ack; 2312 }; 2313 2314 static long syz_extract_tcp_res(volatile long a0, volatile long a1, volatile long a2) 2315 { 2316 // syz_extract_tcp_res(res ptr[out, tcp_resources], seq_inc int32, ack_inc int32) 2317 2318 if (tunfd < 0) 2319 return (uintptr_t)-1; 2320 2321 // We just need this to be large enough to hold headers that we parse (ethernet/ip/tcp). 2322 // Rest of the packet (if any) will be silently truncated which is fine. 2323 char data[1000]; 2324 int rv = read_tun(&data[0], sizeof(data)); 2325 if (rv == -1) 2326 return (uintptr_t)-1; 2327 size_t length = rv; 2328 debug_dump_data(data, length); 2329 2330 if (length < sizeof(struct ethhdr)) 2331 return (uintptr_t)-1; 2332 struct ethhdr* ethhdr = (struct ethhdr*)&data[0]; 2333 2334 struct tcphdr* tcphdr = 0; 2335 if (ethhdr->h_proto == htons(ETH_P_IP)) { 2336 if (length < sizeof(struct ethhdr) + sizeof(struct iphdr)) 2337 return (uintptr_t)-1; 2338 struct iphdr* iphdr = (struct iphdr*)&data[sizeof(struct ethhdr)]; 2339 if (iphdr->protocol != IPPROTO_TCP) 2340 return (uintptr_t)-1; 2341 if (length < sizeof(struct ethhdr) + iphdr->ihl * 4 + sizeof(struct tcphdr)) 2342 return (uintptr_t)-1; 2343 tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + iphdr->ihl * 4]; 2344 } else { 2345 if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr)) 2346 return (uintptr_t)-1; 2347 struct ipv6hdr* ipv6hdr = (struct ipv6hdr*)&data[sizeof(struct ethhdr)]; 2348 // TODO: parse and skip extension headers. 2349 if (ipv6hdr->nexthdr != IPPROTO_TCP) 2350 return (uintptr_t)-1; 2351 if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) 2352 return (uintptr_t)-1; 2353 tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr)]; 2354 } 2355 2356 struct tcp_resources* res = (struct tcp_resources*)a0; 2357 res->seq = htonl((ntohl(tcphdr->seq) + (uint32)a1)); 2358 res->ack = htonl((ntohl(tcphdr->ack_seq) + (uint32)a2)); 2359 2360 debug("extracted seq: %08x\n", res->seq); 2361 debug("extracted ack: %08x\n", res->ack); 2362 2363 return 0; 2364 } 2365 #endif 2366 2367 #if SYZ_EXECUTOR || SYZ_CLOSE_FDS || __NR_syz_usb_connect || __NR_syz_usb_connect_ath9k 2368 #define MAX_FDS 30 2369 #endif 2370 2371 #if SYZ_EXECUTOR || __NR_syz_usb_connect || __NR_syz_usb_connect_ath9k || \ 2372 __NR_syz_usb_ep_write || __NR_syz_usb_ep_read || __NR_syz_usb_control_io || \ 2373 __NR_syz_usb_disconnect 2374 #include <errno.h> 2375 #include <fcntl.h> 2376 #include <linux/usb/ch9.h> 2377 #include <stdarg.h> 2378 #include <stdbool.h> 2379 #include <stddef.h> 2380 #include <stdio.h> 2381 #include <sys/mount.h> 2382 #include <sys/stat.h> 2383 #include <sys/types.h> 2384 2385 #include "common_usb_linux.h" 2386 #endif 2387 2388 #if SYZ_EXECUTOR || __NR_syz_open_dev 2389 #include <fcntl.h> 2390 #include <string.h> 2391 #include <sys/stat.h> 2392 #include <sys/types.h> 2393 2394 static long syz_open_dev(volatile long a0, volatile long a1, volatile long a2) 2395 { 2396 if (a0 == 0xc || a0 == 0xb) { 2397 // syz_open_dev$char(dev const[0xc], major intptr, minor intptr) fd 2398 // syz_open_dev$block(dev const[0xb], major intptr, minor intptr) fd 2399 char buf[128]; 2400 sprintf(buf, "/dev/%s/%d:%d", a0 == 0xc ? "char" : "block", (uint8)a1, (uint8)a2); 2401 return open(buf, O_RDWR, 0); 2402 } else { 2403 // syz_open_dev(dev strconst, id intptr, flags flags[open_flags]) fd 2404 char buf[1024]; 2405 char* hash; 2406 strncpy(buf, (char*)a0, sizeof(buf) - 1); 2407 buf[sizeof(buf) - 1] = 0; 2408 while ((hash = strchr(buf, '#'))) { 2409 *hash = '0' + (char)(a1 % 10); // 10 devices should be enough for everyone. 2410 a1 /= 10; 2411 } 2412 return open(buf, a2, 0); 2413 } 2414 } 2415 #endif 2416 2417 #if SYZ_EXECUTOR || __NR_syz_open_procfs 2418 #include <fcntl.h> 2419 #include <string.h> 2420 #include <sys/stat.h> 2421 #include <sys/types.h> 2422 2423 static long syz_open_procfs(volatile long a0, volatile long a1) 2424 { 2425 // syz_open_procfs(pid pid, file ptr[in, string[procfs_file]]) fd 2426 2427 char buf[128]; 2428 memset(buf, 0, sizeof(buf)); 2429 if (a0 == 0) { 2430 snprintf(buf, sizeof(buf), "/proc/self/%s", (char*)a1); 2431 } else if (a0 == -1) { 2432 snprintf(buf, sizeof(buf), "/proc/thread-self/%s", (char*)a1); 2433 } else { 2434 snprintf(buf, sizeof(buf), "/proc/self/task/%d/%s", (int)a0, (char*)a1); 2435 } 2436 int fd = open(buf, O_RDWR); 2437 if (fd == -1) 2438 fd = open(buf, O_RDONLY); 2439 return fd; 2440 } 2441 #endif 2442 2443 #if SYZ_EXECUTOR || __NR_syz_open_pts 2444 #include <fcntl.h> 2445 #include <sys/ioctl.h> 2446 #include <sys/stat.h> 2447 #include <sys/types.h> 2448 2449 static long syz_open_pts(volatile long a0, volatile long a1) 2450 { 2451 // syz_openpts(fd fd[tty], flags flags[open_flags]) fd[tty] 2452 int ptyno = 0; 2453 if (ioctl(a0, TIOCGPTN, &ptyno)) 2454 return -1; 2455 char buf[128]; 2456 sprintf(buf, "/dev/pts/%d", ptyno); 2457 return open(buf, a1, 0); 2458 } 2459 #endif 2460 2461 #if SYZ_EXECUTOR || __NR_syz_init_net_socket 2462 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID 2463 #include <fcntl.h> 2464 #include <sched.h> 2465 #include <sys/stat.h> 2466 #include <sys/types.h> 2467 #include <unistd.h> 2468 2469 // syz_init_net_socket opens a socket in init net namespace. 2470 // Used for families that can only be created in init net namespace. 2471 static long syz_init_net_socket(volatile long domain, volatile long type, volatile long proto) 2472 { 2473 int netns = open("/proc/self/ns/net", O_RDONLY); 2474 if (netns == -1) 2475 return netns; 2476 if (setns(kInitNetNsFd, 0)) 2477 return -1; 2478 int sock = syscall(__NR_socket, domain, type, proto); 2479 int err = errno; 2480 if (setns(netns, 0)) { 2481 // The operation may fail if the fd is closed by 2482 // a syscall from another thread. 2483 exitf("setns(netns) failed"); 2484 } 2485 close(netns); 2486 errno = err; 2487 return sock; 2488 } 2489 #else 2490 static long syz_init_net_socket(volatile long domain, volatile long type, volatile long proto) 2491 { 2492 return syscall(__NR_socket, domain, type, proto); 2493 } 2494 #endif 2495 #endif 2496 2497 #if SYZ_EXECUTOR || __NR_syz_socket_connect_nvme_tcp 2498 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE 2499 #include <arpa/inet.h> 2500 #include <fcntl.h> 2501 #include <netinet/in.h> 2502 #include <sched.h> 2503 #include <sys/socket.h> 2504 #include <sys/stat.h> 2505 #include <sys/types.h> 2506 #include <unistd.h> 2507 2508 static long syz_socket_connect_nvme_tcp() 2509 { 2510 struct sockaddr_in nvme_local_address; 2511 int netns = open("/proc/self/ns/net", O_RDONLY); 2512 if (netns == -1) 2513 return netns; 2514 if (setns(kInitNetNsFd, 0)) 2515 return -1; 2516 int sock = syscall(__NR_socket, AF_INET, SOCK_STREAM, 0x0); 2517 int err = errno; 2518 if (setns(netns, 0)) { 2519 // The operation may fail if the fd is closed by 2520 // a syscall from another thread. 2521 exitf("setns(netns) failed"); 2522 } 2523 close(netns); 2524 errno = err; 2525 // We only connect to an NVMe-oF/TCP server on 127.0.0.1:4420 2526 nvme_local_address.sin_family = AF_INET; 2527 nvme_local_address.sin_port = htobe16(4420); 2528 nvme_local_address.sin_addr.s_addr = htobe32(0x7f000001); 2529 err = syscall(__NR_connect, sock, &nvme_local_address, sizeof(nvme_local_address)); 2530 if (err != 0) { 2531 close(sock); 2532 return -1; 2533 } 2534 return sock; 2535 } 2536 #else 2537 static long syz_socket_connect_nvme_tcp() 2538 { 2539 return syscall(__NR_socket, -1, 0, 0); 2540 } 2541 #endif 2542 #endif 2543 2544 #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION 2545 #include <errno.h> 2546 #include <fcntl.h> 2547 #include <linux/rfkill.h> 2548 #include <pthread.h> 2549 #include <sys/epoll.h> 2550 #include <sys/ioctl.h> 2551 #include <sys/socket.h> 2552 #include <sys/uio.h> 2553 2554 #define BTPROTO_HCI 1 2555 #define ACL_LINK 1 2556 #define SCAN_PAGE 2 2557 2558 typedef struct { 2559 uint8 b[6]; 2560 } __attribute__((packed)) bdaddr_t; 2561 2562 #define HCI_COMMAND_PKT 1 2563 #define HCI_EVENT_PKT 4 2564 #define HCI_VENDOR_PKT 0xff 2565 2566 struct hci_command_hdr { 2567 uint16 opcode; 2568 uint8 plen; 2569 } __attribute__((packed)); 2570 2571 struct hci_event_hdr { 2572 uint8 evt; 2573 uint8 plen; 2574 } __attribute__((packed)); 2575 2576 #define HCI_EV_CONN_COMPLETE 0x03 2577 struct hci_ev_conn_complete { 2578 uint8 status; 2579 uint16 handle; 2580 bdaddr_t bdaddr; 2581 uint8 link_type; 2582 uint8 encr_mode; 2583 } __attribute__((packed)); 2584 2585 #define HCI_EV_CONN_REQUEST 0x04 2586 struct hci_ev_conn_request { 2587 bdaddr_t bdaddr; 2588 uint8 dev_class[3]; 2589 uint8 link_type; 2590 } __attribute__((packed)); 2591 2592 #define HCI_EV_REMOTE_FEATURES 0x0b 2593 struct hci_ev_remote_features { 2594 uint8 status; 2595 uint16 handle; 2596 uint8 features[8]; 2597 } __attribute__((packed)); 2598 2599 #define HCI_EV_CMD_COMPLETE 0x0e 2600 struct hci_ev_cmd_complete { 2601 uint8 ncmd; 2602 uint16 opcode; 2603 } __attribute__((packed)); 2604 2605 #define HCI_OP_WRITE_SCAN_ENABLE 0x0c1a 2606 2607 #define HCI_OP_READ_BUFFER_SIZE 0x1005 2608 struct hci_rp_read_buffer_size { 2609 uint8 status; 2610 uint16 acl_mtu; 2611 uint8 sco_mtu; 2612 uint16 acl_max_pkt; 2613 uint16 sco_max_pkt; 2614 } __attribute__((packed)); 2615 2616 #define HCI_OP_READ_BD_ADDR 0x1009 2617 struct hci_rp_read_bd_addr { 2618 uint8 status; 2619 bdaddr_t bdaddr; 2620 } __attribute__((packed)); 2621 2622 #define HCI_EV_LE_META 0x3e 2623 struct hci_ev_le_meta { 2624 uint8 subevent; 2625 } __attribute__((packed)); 2626 2627 #define HCI_EV_LE_CONN_COMPLETE 0x01 2628 struct hci_ev_le_conn_complete { 2629 uint8 status; 2630 uint16 handle; 2631 uint8 role; 2632 uint8 bdaddr_type; 2633 bdaddr_t bdaddr; 2634 uint16 interval; 2635 uint16 latency; 2636 uint16 supervision_timeout; 2637 uint8 clk_accurancy; 2638 } __attribute__((packed)); 2639 2640 struct hci_dev_req { 2641 uint16 dev_id; 2642 uint32 dev_opt; 2643 }; 2644 2645 struct vhci_vendor_pkt_request { 2646 uint8 type; 2647 uint8 opcode; 2648 } __attribute__((packed)); 2649 2650 struct vhci_pkt { 2651 uint8 type; 2652 union { 2653 struct { 2654 uint8 opcode; 2655 uint16 id; 2656 } __attribute__((packed)) vendor_pkt; 2657 struct hci_command_hdr command_hdr; 2658 }; 2659 } __attribute__((packed)); 2660 2661 #define HCIDEVUP _IOW('H', 201, int) 2662 #define HCISETSCAN _IOW('H', 221, int) 2663 2664 static int vhci_fd = -1; 2665 2666 static void rfkill_unblock_all() 2667 { 2668 int fd = open("/dev/rfkill", O_WRONLY); 2669 if (fd < 0) 2670 fail("open /dev/rfkill failed"); 2671 struct rfkill_event event = {0}; 2672 event.idx = 0; 2673 event.type = RFKILL_TYPE_ALL; 2674 event.op = RFKILL_OP_CHANGE_ALL; 2675 event.soft = 0; 2676 event.hard = 0; 2677 if (write(fd, &event, sizeof(event)) < 0) 2678 fail("write rfkill event failed"); 2679 close(fd); 2680 } 2681 2682 static void hci_send_event_packet(int fd, uint8 evt, void* data, size_t data_len) 2683 { 2684 struct iovec iv[3]; 2685 2686 struct hci_event_hdr hdr; 2687 hdr.evt = evt; 2688 hdr.plen = data_len; 2689 2690 uint8 type = HCI_EVENT_PKT; 2691 2692 iv[0].iov_base = &type; 2693 iv[0].iov_len = sizeof(type); 2694 iv[1].iov_base = &hdr; 2695 iv[1].iov_len = sizeof(hdr); 2696 iv[2].iov_base = data; 2697 iv[2].iov_len = data_len; 2698 2699 if (writev(fd, iv, sizeof(iv) / sizeof(struct iovec)) < 0) 2700 fail("writev failed"); 2701 } 2702 2703 static void hci_send_event_cmd_complete(int fd, uint16 opcode, void* data, size_t data_len) 2704 { 2705 struct iovec iv[4]; 2706 2707 struct hci_event_hdr hdr; 2708 hdr.evt = HCI_EV_CMD_COMPLETE; 2709 hdr.plen = sizeof(struct hci_ev_cmd_complete) + data_len; 2710 2711 struct hci_ev_cmd_complete evt_hdr; 2712 evt_hdr.ncmd = 1; 2713 evt_hdr.opcode = opcode; 2714 2715 uint8 type = HCI_EVENT_PKT; 2716 2717 iv[0].iov_base = &type; 2718 iv[0].iov_len = sizeof(type); 2719 iv[1].iov_base = &hdr; 2720 iv[1].iov_len = sizeof(hdr); 2721 iv[2].iov_base = &evt_hdr; 2722 iv[2].iov_len = sizeof(evt_hdr); 2723 iv[3].iov_base = data; 2724 iv[3].iov_len = data_len; 2725 2726 if (writev(fd, iv, sizeof(iv) / sizeof(struct iovec)) < 0) 2727 fail("writev failed"); 2728 } 2729 2730 static bool process_command_pkt(int fd, char* buf, ssize_t buf_size) 2731 { 2732 struct hci_command_hdr* hdr = (struct hci_command_hdr*)buf; 2733 if (buf_size < (ssize_t)sizeof(struct hci_command_hdr) || 2734 hdr->plen != buf_size - sizeof(struct hci_command_hdr)) 2735 failmsg("process_command_pkt: invalid size", "suze=%zx", buf_size); 2736 2737 switch (hdr->opcode) { 2738 case HCI_OP_WRITE_SCAN_ENABLE: { 2739 uint8 status = 0; 2740 hci_send_event_cmd_complete(fd, hdr->opcode, &status, sizeof(status)); 2741 return true; 2742 } 2743 case HCI_OP_READ_BD_ADDR: { 2744 struct hci_rp_read_bd_addr rp = {0}; 2745 rp.status = 0; 2746 memset(&rp.bdaddr, 0xaa, 6); 2747 hci_send_event_cmd_complete(fd, hdr->opcode, &rp, sizeof(rp)); 2748 return false; 2749 } 2750 case HCI_OP_READ_BUFFER_SIZE: { 2751 struct hci_rp_read_buffer_size rp = {0}; 2752 rp.status = 0; 2753 rp.acl_mtu = 1021; 2754 rp.sco_mtu = 96; 2755 rp.acl_max_pkt = 4; 2756 rp.sco_max_pkt = 6; 2757 hci_send_event_cmd_complete(fd, hdr->opcode, &rp, sizeof(rp)); 2758 return false; 2759 } 2760 } 2761 2762 char dummy[0xf9] = {0}; 2763 hci_send_event_cmd_complete(fd, hdr->opcode, dummy, sizeof(dummy)); 2764 return false; 2765 } 2766 2767 static void* event_thread(void* arg) 2768 { 2769 while (1) { 2770 char buf[1024] = {0}; 2771 ssize_t buf_size = read(vhci_fd, buf, sizeof(buf)); 2772 if (buf_size < 0) 2773 fail("read failed"); 2774 debug_dump_data(buf, buf_size); 2775 if (buf_size > 0 && buf[0] == HCI_COMMAND_PKT) { 2776 if (process_command_pkt(vhci_fd, buf + 1, buf_size - 1)) 2777 break; 2778 } 2779 } 2780 return NULL; 2781 } 2782 2783 // Matches hci_handles in sys/linux/dev_vhci.txt. 2784 #define HCI_HANDLE_1 200 2785 #define HCI_HANDLE_2 201 2786 2787 #define HCI_PRIMARY 0 2788 #define HCI_OP_RESET 0x0c03 2789 2790 static void initialize_vhci() 2791 { 2792 #if SYZ_EXECUTOR 2793 if (!flag_vhci_injection) 2794 return; 2795 #endif 2796 2797 int hci_sock = socket(AF_BLUETOOTH, SOCK_RAW, BTPROTO_HCI); 2798 if (hci_sock < 0) 2799 fail("socket(AF_BLUETOOTH, SOCK_RAW, BTPROTO_HCI) failed"); 2800 2801 vhci_fd = open("/dev/vhci", O_RDWR); 2802 if (vhci_fd == -1) 2803 fail("open /dev/vhci failed"); 2804 2805 // Remap vhci onto higher fd number to hide it from fuzzer and to keep 2806 // fd numbers stable regardless of whether vhci is opened or not (also see kMaxFd). 2807 const int kVhciFd = 202; 2808 if (dup2(vhci_fd, kVhciFd) < 0) 2809 fail("dup2(vhci_fd, kVhciFd) failed"); 2810 close(vhci_fd); 2811 vhci_fd = kVhciFd; 2812 2813 struct vhci_vendor_pkt_request vendor_pkt_req = {HCI_VENDOR_PKT, HCI_PRIMARY}; 2814 if (write(vhci_fd, &vendor_pkt_req, sizeof(vendor_pkt_req)) != sizeof(vendor_pkt_req)) 2815 fail("vendor_pkt_req write failed"); 2816 2817 struct vhci_pkt vhci_pkt; 2818 if (read(vhci_fd, &vhci_pkt, sizeof(vhci_pkt)) != sizeof(vhci_pkt)) 2819 fail("vhci_pkt read failed"); 2820 2821 if (vhci_pkt.type == HCI_COMMAND_PKT && vhci_pkt.command_hdr.opcode == HCI_OP_RESET) { 2822 char response[1] = {0}; 2823 hci_send_event_cmd_complete(vhci_fd, HCI_OP_RESET, response, sizeof(response)); 2824 2825 if (read(vhci_fd, &vhci_pkt, sizeof(vhci_pkt)) != sizeof(vhci_pkt)) 2826 fail("vhci_pkt read failed"); 2827 } 2828 2829 if (vhci_pkt.type != HCI_VENDOR_PKT) 2830 fail("wrong response packet"); 2831 2832 int dev_id = vhci_pkt.vendor_pkt.id; 2833 debug("hci dev id: %x\n", dev_id); 2834 2835 pthread_t th; 2836 if (pthread_create(&th, NULL, event_thread, NULL)) 2837 fail("pthread_create failed"); 2838 2839 // Bring hci device up 2840 int ret = ioctl(hci_sock, HCIDEVUP, dev_id); 2841 if (ret) { 2842 if (errno == ERFKILL) { 2843 rfkill_unblock_all(); 2844 ret = ioctl(hci_sock, HCIDEVUP, dev_id); 2845 } 2846 2847 if (ret && errno != EALREADY) 2848 fail("ioctl(HCIDEVUP) failed"); 2849 } 2850 2851 // Activate page scanning mode which is required to fake a connection. 2852 struct hci_dev_req dr = {0}; 2853 dr.dev_id = dev_id; 2854 dr.dev_opt = SCAN_PAGE; 2855 if (ioctl(hci_sock, HCISETSCAN, &dr)) 2856 fail("ioctl(HCISETSCAN) failed"); 2857 2858 // Fake a connection with bd address 10:aa:aa:aa:aa:aa. 2859 // This is a fixed address used in sys/linux/socket_bluetooth.txt. 2860 struct hci_ev_conn_request request; 2861 memset(&request, 0, sizeof(request)); 2862 memset(&request.bdaddr, 0xaa, 6); 2863 *(uint8*)&request.bdaddr.b[5] = 0x10; 2864 request.link_type = ACL_LINK; 2865 hci_send_event_packet(vhci_fd, HCI_EV_CONN_REQUEST, &request, sizeof(request)); 2866 2867 struct hci_ev_conn_complete complete; 2868 memset(&complete, 0, sizeof(complete)); 2869 complete.status = 0; 2870 complete.handle = HCI_HANDLE_1; 2871 memset(&complete.bdaddr, 0xaa, 6); 2872 *(uint8*)&complete.bdaddr.b[5] = 0x10; 2873 complete.link_type = ACL_LINK; 2874 complete.encr_mode = 0; 2875 hci_send_event_packet(vhci_fd, HCI_EV_CONN_COMPLETE, &complete, sizeof(complete)); 2876 2877 struct hci_ev_remote_features features; 2878 memset(&features, 0, sizeof(features)); 2879 features.status = 0; 2880 features.handle = HCI_HANDLE_1; 2881 hci_send_event_packet(vhci_fd, HCI_EV_REMOTE_FEATURES, &features, sizeof(features)); 2882 2883 // Fake a low-energy connection with bd address 11:aa:aa:aa:aa:aa. 2884 // This is a fixed address used in sys/linux/socket_bluetooth.txt. 2885 struct { 2886 struct hci_ev_le_meta le_meta; 2887 struct hci_ev_le_conn_complete le_conn; 2888 } le_conn; 2889 memset(&le_conn, 0, sizeof(le_conn)); 2890 le_conn.le_meta.subevent = HCI_EV_LE_CONN_COMPLETE; 2891 memset(&le_conn.le_conn.bdaddr, 0xaa, 6); 2892 *(uint8*)&le_conn.le_conn.bdaddr.b[5] = 0x11; 2893 le_conn.le_conn.role = 1; 2894 le_conn.le_conn.handle = HCI_HANDLE_2; 2895 hci_send_event_packet(vhci_fd, HCI_EV_LE_META, &le_conn, sizeof(le_conn)); 2896 2897 pthread_join(th, NULL); 2898 close(hci_sock); 2899 } 2900 #endif 2901 2902 #if SYZ_EXECUTOR || __NR_syz_emit_vhci && SYZ_VHCI_INJECTION 2903 static long syz_emit_vhci(volatile long a0, volatile long a1) 2904 { 2905 if (vhci_fd < 0) 2906 return (uintptr_t)-1; 2907 2908 char* data = (char*)a0; 2909 uint32 length = a1; 2910 2911 return write(vhci_fd, data, length); 2912 } 2913 #endif 2914 2915 #if SYZ_EXECUTOR || __NR_syz_genetlink_get_family_id 2916 #include <errno.h> 2917 #include <sys/socket.h> 2918 2919 static long syz_genetlink_get_family_id(volatile long name, volatile long sock_arg) 2920 { 2921 debug("syz_genetlink_get_family_id(%s, %d)\n", (char*)name, (int)sock_arg); 2922 int fd = sock_arg; 2923 if (fd < 0) { 2924 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 2925 if (fd == -1) { 2926 debug("syz_genetlink_get_family_id: socket failed: %d\n", errno); 2927 return -1; 2928 } 2929 } 2930 struct nlmsg nlmsg_tmp; 2931 int ret = netlink_query_family_id(&nlmsg_tmp, fd, (char*)name, false); 2932 if ((int)sock_arg < 0) 2933 close(fd); 2934 if (ret < 0) { 2935 debug("syz_genetlink_get_family_id: netlink_query_family_id failed: %d\n", ret); 2936 return -1; 2937 } 2938 2939 return ret; 2940 } 2941 #endif 2942 2943 #if SYZ_EXECUTOR || __NR_syz_mount_image || __NR_syz_read_part_table 2944 #include "common_zlib.h" 2945 #include <errno.h> 2946 #include <fcntl.h> 2947 #include <linux/loop.h> 2948 #include <stdbool.h> 2949 #include <sys/ioctl.h> 2950 #include <sys/stat.h> 2951 #include <sys/types.h> 2952 2953 // Setup the loop device needed for mounting a filesystem image. Takes care of 2954 // creating and initializing the underlying file backing the loop device and 2955 // returns the fds to the file and device. 2956 // Returns 0 on success, -1 otherwise. 2957 static int setup_loop_device(unsigned char* data, unsigned long size, const char* loopname, int* loopfd_p) 2958 { 2959 int err = 0, loopfd = -1; 2960 int memfd = syscall(__NR_memfd_create, "syzkaller", 0); 2961 if (memfd == -1) { 2962 err = errno; 2963 goto error; 2964 } 2965 if (puff_zlib_to_file(data, size, memfd)) { 2966 err = errno; 2967 debug("setup_loop_device: could not decompress data: %d\n", errno); 2968 goto error_close_memfd; 2969 } 2970 2971 loopfd = open(loopname, O_RDWR); 2972 if (loopfd == -1) { 2973 err = errno; 2974 debug("setup_loop_device: open failed: %d\n", errno); 2975 goto error_close_memfd; 2976 } 2977 if (ioctl(loopfd, LOOP_SET_FD, memfd)) { 2978 if (errno != EBUSY) { 2979 err = errno; 2980 goto error_close_loop; 2981 } 2982 ioctl(loopfd, LOOP_CLR_FD, 0); 2983 usleep(1000); 2984 if (ioctl(loopfd, LOOP_SET_FD, memfd)) { 2985 err = errno; 2986 goto error_close_loop; 2987 } 2988 } 2989 2990 close(memfd); 2991 *loopfd_p = loopfd; 2992 return 0; 2993 2994 error_close_loop: 2995 close(loopfd); 2996 error_close_memfd: 2997 close(memfd); 2998 error: 2999 errno = err; 3000 return -1; 3001 } 3002 3003 #if SYZ_EXECUTOR || __NR_syz_mount_image 3004 3005 static void reset_loop_device(const char* loopname) 3006 { 3007 int loopfd = open(loopname, O_RDWR); 3008 if (loopfd == -1) { 3009 debug("reset_loop_device: open failed: %d\n", errno); 3010 return; 3011 } 3012 if (ioctl(loopfd, LOOP_CLR_FD, 0)) { 3013 debug("reset_loop_device: LOOP_CLR_FD failed: %d\n", errno); 3014 } 3015 close(loopfd); 3016 } 3017 3018 #endif 3019 3020 #endif 3021 3022 #if SYZ_EXECUTOR || __NR_syz_read_part_table 3023 // syz_read_part_table(size len[img], img ptr[in, compressed_image]) 3024 static long syz_read_part_table(volatile unsigned long size, volatile long image) 3025 { 3026 unsigned char* data = (unsigned char*)image; 3027 int err = 0, res = -1, loopfd = -1; 3028 char loopname[64]; 3029 3030 snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid); 3031 if (setup_loop_device(data, size, loopname, &loopfd) == -1) 3032 return -1; 3033 3034 struct loop_info64 info; 3035 if (ioctl(loopfd, LOOP_GET_STATUS64, &info)) { 3036 err = errno; 3037 goto error_clear_loop; 3038 } 3039 #if SYZ_EXECUTOR 3040 cover_reset(0); 3041 #endif 3042 info.lo_flags |= LO_FLAGS_PARTSCAN; 3043 if (ioctl(loopfd, LOOP_SET_STATUS64, &info)) { 3044 err = errno; 3045 goto error_clear_loop; 3046 } 3047 res = 0; 3048 // If we managed to parse some partitions, symlink them into our work dir. 3049 for (unsigned long i = 1, j = 0; i < 8; i++) { 3050 snprintf(loopname, sizeof(loopname), "/dev/loop%llup%d", procid, (int)i); 3051 struct stat statbuf; 3052 if (stat(loopname, &statbuf) == 0) { 3053 char linkname[64]; 3054 snprintf(linkname, sizeof(linkname), "./file%d", (int)j++); 3055 if (symlink(loopname, linkname)) { 3056 debug("syz_read_part_table: symlink(%s, %s) failed: %d\n", loopname, linkname, errno); 3057 } 3058 } 3059 } 3060 error_clear_loop: 3061 if (res) 3062 ioctl(loopfd, LOOP_CLR_FD, 0); 3063 close(loopfd); 3064 errno = err; 3065 return res; 3066 } 3067 #endif 3068 3069 #if SYZ_EXECUTOR || __NR_syz_mount_image 3070 #include <stddef.h> 3071 #include <string.h> 3072 #include <sys/mount.h> 3073 3074 // syz_mount_image( 3075 // fs ptr[in, string[fs]], 3076 // dir ptr[in, filename], 3077 // flags flags[mount_flags], 3078 // opts ptr[in, fs_options], 3079 // chdir bool8, 3080 // size len[img], 3081 // img ptr[in, compressed_image] 3082 // ) fd_dir 3083 static long syz_mount_image( 3084 volatile long fsarg, 3085 volatile long dir, 3086 volatile long flags, 3087 volatile long optsarg, 3088 volatile long change_dir, 3089 volatile unsigned long size, 3090 volatile long image) 3091 { 3092 unsigned char* data = (unsigned char*)image; 3093 int res = -1, err = 0, need_loop_device = !!size; 3094 char* mount_opts = (char*)optsarg; 3095 char* target = (char*)dir; 3096 char* fs = (char*)fsarg; 3097 char* source = NULL; 3098 char loopname[64]; 3099 3100 if (need_loop_device) { 3101 int loopfd; 3102 // Some filesystems (e.g. FUSE) do not need a backing device or 3103 // filesystem image. 3104 memset(loopname, 0, sizeof(loopname)); 3105 snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid); 3106 if (setup_loop_device(data, size, loopname, &loopfd) == -1) 3107 return -1; 3108 // If BLK_DEV_WRITE_MOUNTED is set, we won't be able to mount() 3109 // while holding the loop device fd. 3110 close(loopfd); 3111 source = loopname; 3112 } 3113 3114 mkdir(target, 0777); 3115 char opts[256]; 3116 memset(opts, 0, sizeof(opts)); 3117 // Leave some space for the additional options we append below. 3118 if (strlen(mount_opts) > (sizeof(opts) - 32)) { 3119 debug("ERROR: syz_mount_image parameter optsarg bigger than internal opts\n"); 3120 } 3121 strncpy(opts, mount_opts, sizeof(opts) - 32); 3122 if (strcmp(fs, "iso9660") == 0) { 3123 flags |= MS_RDONLY; 3124 } else if (strncmp(fs, "ext", 3) == 0) { 3125 // For ext2/3/4 we have to have errors=continue because the image 3126 // can contain errors=panic flag and can legally crash kernel. 3127 bool has_remount_ro = false; 3128 char* remount_ro_start = strstr(opts, "errors=remount-ro"); 3129 if (remount_ro_start != NULL) { 3130 // syzkaller can sometimes break the options format, so we have to make sure this option can really be parsed. 3131 char after = *(remount_ro_start + strlen("errors=remount-ro")); 3132 char before = remount_ro_start == opts ? '\0' : *(remount_ro_start - 1); 3133 has_remount_ro = ((before == '\0' || before == ',') && (after == '\0' || after == ',')); 3134 } 3135 if (strstr(opts, "errors=panic") || !has_remount_ro) 3136 strcat(opts, ",errors=continue"); 3137 } else if (strcmp(fs, "xfs") == 0) { 3138 // For xfs we need nouuid because xfs has a global uuids table 3139 // and if two parallel executors mounts fs with the same uuid, second mount fails. 3140 strcat(opts, ",nouuid"); 3141 } 3142 debug("syz_mount_image: size=%llu loop='%s' dir='%s' fs='%s' flags=%llu opts='%s'\n", (uint64)size, loopname, target, fs, (uint64)flags, opts); 3143 #if SYZ_EXECUTOR 3144 cover_reset(0); 3145 #endif 3146 res = mount(source, target, fs, flags, opts); 3147 if (res == -1) { 3148 debug("syz_mount_image > mount error: %d\n", errno); 3149 err = errno; 3150 goto error_clear_loop; 3151 } 3152 res = open(target, O_RDONLY | O_DIRECTORY); 3153 if (res == -1) { 3154 debug("syz_mount_image > open error: %d\n", errno); 3155 err = errno; 3156 goto error_clear_loop; 3157 } 3158 if (change_dir) { 3159 res = chdir(target); 3160 if (res == -1) { 3161 debug("syz_mount_image > chdir error: %d\n", errno); 3162 err = errno; 3163 } 3164 } 3165 3166 error_clear_loop: 3167 if (need_loop_device) 3168 reset_loop_device(loopname); 3169 errno = err; 3170 return res; 3171 } 3172 #endif 3173 3174 #if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu 3175 // KVM is not yet supported on RISC-V 3176 #if !GOARCH_riscv64 && !GOARCH_arm 3177 #include <errno.h> 3178 #include <fcntl.h> 3179 #include <linux/kvm.h> 3180 #include <stdarg.h> 3181 #include <stddef.h> 3182 #include <sys/ioctl.h> 3183 #include <sys/stat.h> 3184 3185 #if GOARCH_amd64 3186 #include "common_kvm_amd64.h" 3187 #elif GOARCH_arm64 3188 #include "common_kvm_arm64.h" 3189 #elif GOARCH_ppc64 || GOARCH_ppc64le 3190 #include "common_kvm_ppc64.h" 3191 #elif !GOARCH_arm 3192 static volatile long syz_kvm_setup_cpu(volatile long a0, volatile long a1, volatile long a2, volatile long a3, volatile long a4, volatile long a5, volatile long a6, volatile long a7) 3193 { 3194 return 0; 3195 } 3196 #endif 3197 #endif 3198 #endif 3199 3200 #if (SYZ_EXECUTOR || SYZ_NET_RESET) && SYZ_EXECUTOR_USES_FORK_SERVER 3201 #include <errno.h> 3202 #include <net/if.h> 3203 #include <netinet/in.h> 3204 #include <string.h> 3205 #include <sys/socket.h> 3206 3207 #include <linux/net.h> 3208 3209 // checkpoint/reset_net_namespace partially resets net namespace to initial state 3210 // after each test. Currently it resets only ipv4 netfilter state. 3211 // Ideally, we just create a new net namespace for each test, 3212 // however it's too slow (1-1.5 seconds per namespace, not parallelizable). 3213 3214 // Linux headers do not compile for C++, so we have to define the structs manualy. 3215 #define XT_TABLE_SIZE 1536 3216 #define XT_MAX_ENTRIES 10 3217 3218 struct xt_counters { 3219 uint64 pcnt, bcnt; 3220 }; 3221 3222 struct ipt_getinfo { 3223 char name[32]; 3224 unsigned int valid_hooks; 3225 unsigned int hook_entry[5]; 3226 unsigned int underflow[5]; 3227 unsigned int num_entries; 3228 unsigned int size; 3229 }; 3230 3231 struct ipt_get_entries { 3232 char name[32]; 3233 unsigned int size; 3234 uint64 entrytable[XT_TABLE_SIZE / sizeof(uint64)]; 3235 }; 3236 3237 struct ipt_replace { 3238 char name[32]; 3239 unsigned int valid_hooks; 3240 unsigned int num_entries; 3241 unsigned int size; 3242 unsigned int hook_entry[5]; 3243 unsigned int underflow[5]; 3244 unsigned int num_counters; 3245 struct xt_counters* counters; 3246 uint64 entrytable[XT_TABLE_SIZE / sizeof(uint64)]; 3247 }; 3248 3249 struct ipt_table_desc { 3250 const char* name; 3251 struct ipt_getinfo info; 3252 struct ipt_replace replace; 3253 }; 3254 3255 static struct ipt_table_desc ipv4_tables[] = { 3256 {.name = "filter"}, 3257 {.name = "nat"}, 3258 {.name = "mangle"}, 3259 {.name = "raw"}, 3260 {.name = "security"}, 3261 }; 3262 3263 static struct ipt_table_desc ipv6_tables[] = { 3264 {.name = "filter"}, 3265 {.name = "nat"}, 3266 {.name = "mangle"}, 3267 {.name = "raw"}, 3268 {.name = "security"}, 3269 }; 3270 3271 #define IPT_BASE_CTL 64 3272 #define IPT_SO_SET_REPLACE (IPT_BASE_CTL) 3273 #define IPT_SO_GET_INFO (IPT_BASE_CTL) 3274 #define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1) 3275 3276 struct arpt_getinfo { 3277 char name[32]; 3278 unsigned int valid_hooks; 3279 unsigned int hook_entry[3]; 3280 unsigned int underflow[3]; 3281 unsigned int num_entries; 3282 unsigned int size; 3283 }; 3284 3285 struct arpt_get_entries { 3286 char name[32]; 3287 unsigned int size; 3288 uint64 entrytable[XT_TABLE_SIZE / sizeof(uint64)]; 3289 }; 3290 3291 struct arpt_replace { 3292 char name[32]; 3293 unsigned int valid_hooks; 3294 unsigned int num_entries; 3295 unsigned int size; 3296 unsigned int hook_entry[3]; 3297 unsigned int underflow[3]; 3298 unsigned int num_counters; 3299 struct xt_counters* counters; 3300 uint64 entrytable[XT_TABLE_SIZE / sizeof(uint64)]; 3301 }; 3302 3303 struct arpt_table_desc { 3304 const char* name; 3305 struct arpt_getinfo info; 3306 struct arpt_replace replace; 3307 }; 3308 3309 static struct arpt_table_desc arpt_tables[] = { 3310 {.name = "filter"}, 3311 }; 3312 3313 #define ARPT_BASE_CTL 96 3314 #define ARPT_SO_SET_REPLACE (ARPT_BASE_CTL) 3315 #define ARPT_SO_GET_INFO (ARPT_BASE_CTL) 3316 #define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1) 3317 3318 static void checkpoint_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level) 3319 { 3320 int fd = socket(family, SOCK_STREAM, IPPROTO_TCP); 3321 if (fd == -1) { 3322 switch (errno) { 3323 case EAFNOSUPPORT: 3324 case ENOPROTOOPT: 3325 // ENOENT can be returned if smack lsm is used. Smack tried to aplly netlbl to created sockets, 3326 // but the fuzzer can manage to remove netlbl entry for SOCK_STREAM/IPPROTO_TCP using 3327 // NLBL_MGMT_C_REMOVE, which is unfortunately global (not part of net namespace). In this state 3328 // creation of such sockets will fail all the time in all processes (so in some sense the machine 3329 // is indeed broken), but ignoring the error is still probably the best option given we allow 3330 // the fuzzer to invoke NLBL_MGMT_C_REMOVE in the first place. 3331 case ENOENT: 3332 return; 3333 } 3334 failmsg("iptable checkpoint: socket(SOCK_STREAM, IPPROTO_TCP) failed", "family=%d", family); 3335 } 3336 for (int i = 0; i < num_tables; i++) { 3337 struct ipt_table_desc* table = &tables[i]; 3338 strcpy(table->info.name, table->name); 3339 strcpy(table->replace.name, table->name); 3340 socklen_t optlen = sizeof(table->info); 3341 if (getsockopt(fd, level, IPT_SO_GET_INFO, &table->info, &optlen)) { 3342 switch (errno) { 3343 case EPERM: 3344 case ENOENT: 3345 case ENOPROTOOPT: 3346 continue; 3347 } 3348 failmsg("iptable checkpoint: getsockopt(IPT_SO_GET_INFO) failed", 3349 "table=%s, family=%d", table->name, family); 3350 } 3351 debug("iptable checkpoint %s/%d: checkpoint entries=%d hooks=%x size=%d\n", 3352 table->name, family, table->info.num_entries, 3353 table->info.valid_hooks, table->info.size); 3354 if (table->info.size > sizeof(table->replace.entrytable)) 3355 failmsg("iptable checkpoint: table size is too large", "table=%s, family=%d, size=%u", 3356 table->name, family, table->info.size); 3357 if (table->info.num_entries > XT_MAX_ENTRIES) 3358 failmsg("iptable checkpoint: too many counters", "table=%s, family=%d, counters=%d", 3359 table->name, family, table->info.num_entries); 3360 struct ipt_get_entries entries; 3361 memset(&entries, 0, sizeof(entries)); 3362 strcpy(entries.name, table->name); 3363 entries.size = table->info.size; 3364 optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size; 3365 if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen)) 3366 failmsg("iptable checkpoint: getsockopt(IPT_SO_GET_ENTRIES) failed", 3367 "table=%s, family=%d", table->name, family); 3368 table->replace.valid_hooks = table->info.valid_hooks; 3369 table->replace.num_entries = table->info.num_entries; 3370 table->replace.size = table->info.size; 3371 memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry)); 3372 memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow)); 3373 memcpy(table->replace.entrytable, entries.entrytable, table->info.size); 3374 } 3375 close(fd); 3376 } 3377 3378 static void reset_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level) 3379 { 3380 int fd = socket(family, SOCK_STREAM, IPPROTO_TCP); 3381 if (fd == -1) { 3382 switch (errno) { 3383 case EAFNOSUPPORT: 3384 case ENOPROTOOPT: 3385 case ENOENT: 3386 return; 3387 } 3388 failmsg("iptable: socket(SOCK_STREAM, IPPROTO_TCP) failed", "family=%d", family); 3389 } 3390 for (int i = 0; i < num_tables; i++) { 3391 struct ipt_table_desc* table = &tables[i]; 3392 if (table->info.valid_hooks == 0) 3393 continue; 3394 struct ipt_getinfo info; 3395 memset(&info, 0, sizeof(info)); 3396 strcpy(info.name, table->name); 3397 socklen_t optlen = sizeof(info); 3398 if (getsockopt(fd, level, IPT_SO_GET_INFO, &info, &optlen)) 3399 failmsg("iptable: getsockopt(IPT_SO_GET_INFO) failed", 3400 "table=%s, family=%d", table->name, family); 3401 if (memcmp(&table->info, &info, sizeof(table->info)) == 0) { 3402 struct ipt_get_entries entries; 3403 memset(&entries, 0, sizeof(entries)); 3404 strcpy(entries.name, table->name); 3405 entries.size = table->info.size; 3406 optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size; 3407 if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen)) 3408 failmsg("iptable: getsockopt(IPT_SO_GET_ENTRIES) failed", 3409 "table=%s, family=%d", table->name, family); 3410 if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0) 3411 continue; 3412 } 3413 debug("iptable %s/%d: resetting\n", table->name, family); 3414 struct xt_counters counters[XT_MAX_ENTRIES]; 3415 table->replace.num_counters = info.num_entries; 3416 table->replace.counters = counters; 3417 optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size; 3418 if (setsockopt(fd, level, IPT_SO_SET_REPLACE, &table->replace, optlen)) 3419 failmsg("iptable: setsockopt(IPT_SO_SET_REPLACE) failed", 3420 "table=%s, family=%d", table->name, family); 3421 } 3422 close(fd); 3423 } 3424 3425 static void checkpoint_arptables(void) 3426 { 3427 int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 3428 if (fd == -1) { 3429 switch (errno) { 3430 case EAFNOSUPPORT: 3431 case ENOPROTOOPT: 3432 case ENOENT: 3433 return; 3434 } 3435 fail("arptable checkpoint: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP) failed"); 3436 } 3437 for (unsigned i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) { 3438 struct arpt_table_desc* table = &arpt_tables[i]; 3439 strcpy(table->info.name, table->name); 3440 strcpy(table->replace.name, table->name); 3441 socklen_t optlen = sizeof(table->info); 3442 if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &table->info, &optlen)) { 3443 switch (errno) { 3444 case EPERM: 3445 case ENOENT: 3446 case ENOPROTOOPT: 3447 continue; 3448 } 3449 failmsg("arptable checkpoint: getsockopt(ARPT_SO_GET_INFO) failed", "table=%s", table->name); 3450 } 3451 debug("arptable checkpoint %s: entries=%d hooks=%x size=%d\n", 3452 table->name, table->info.num_entries, table->info.valid_hooks, table->info.size); 3453 if (table->info.size > sizeof(table->replace.entrytable)) 3454 failmsg("arptable checkpoint: table size is too large", 3455 "table=%s, size=%u", table->name, table->info.size); 3456 if (table->info.num_entries > XT_MAX_ENTRIES) 3457 failmsg("arptable checkpoint: too many counters", 3458 "table=%s, counters=%u", table->name, table->info.num_entries); 3459 struct arpt_get_entries entries; 3460 memset(&entries, 0, sizeof(entries)); 3461 strcpy(entries.name, table->name); 3462 entries.size = table->info.size; 3463 optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size; 3464 if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen)) 3465 failmsg("arptable checkpoint: getsockopt(ARPT_SO_GET_ENTRIES) failed", "table=%s", table->name); 3466 table->replace.valid_hooks = table->info.valid_hooks; 3467 table->replace.num_entries = table->info.num_entries; 3468 table->replace.size = table->info.size; 3469 memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry)); 3470 memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow)); 3471 memcpy(table->replace.entrytable, entries.entrytable, table->info.size); 3472 } 3473 close(fd); 3474 } 3475 3476 static void reset_arptables() 3477 { 3478 int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 3479 if (fd == -1) { 3480 switch (errno) { 3481 case EAFNOSUPPORT: 3482 case ENOPROTOOPT: 3483 case ENOENT: 3484 return; 3485 } 3486 fail("arptable: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)"); 3487 } 3488 for (unsigned i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) { 3489 struct arpt_table_desc* table = &arpt_tables[i]; 3490 if (table->info.valid_hooks == 0) 3491 continue; 3492 struct arpt_getinfo info; 3493 memset(&info, 0, sizeof(info)); 3494 strcpy(info.name, table->name); 3495 socklen_t optlen = sizeof(info); 3496 if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &info, &optlen)) 3497 failmsg("arptable: getsockopt(ARPT_SO_GET_INFO) failed", "table=%s", table->name); 3498 if (memcmp(&table->info, &info, sizeof(table->info)) == 0) { 3499 struct arpt_get_entries entries; 3500 memset(&entries, 0, sizeof(entries)); 3501 strcpy(entries.name, table->name); 3502 entries.size = table->info.size; 3503 optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size; 3504 if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen)) 3505 failmsg("arptable: getsockopt(ARPT_SO_GET_ENTRIES) failed", "table=%s", table->name); 3506 if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0) 3507 continue; 3508 debug("arptable %s: data changed\n", table->name); 3509 } else { 3510 debug("arptable %s: header changed\n", table->name); 3511 } 3512 debug("arptable %s: resetting\n", table->name); 3513 struct xt_counters counters[XT_MAX_ENTRIES]; 3514 table->replace.num_counters = info.num_entries; 3515 table->replace.counters = counters; 3516 optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size; 3517 if (setsockopt(fd, SOL_IP, ARPT_SO_SET_REPLACE, &table->replace, optlen)) 3518 failmsg("arptable: setsockopt(ARPT_SO_SET_REPLACE) failed", 3519 "table=%s", table->name); 3520 } 3521 close(fd); 3522 } 3523 3524 // ebtables.h is broken too: 3525 // ebtables.h: In function ‘ebt_entry_target* ebt_get_target(ebt_entry*)’: 3526 // ebtables.h:197:19: error: invalid conversion from ‘void*’ to ‘ebt_entry_target*’ 3527 3528 #define NF_BR_NUMHOOKS 6 3529 #define EBT_TABLE_MAXNAMELEN 32 3530 #define EBT_CHAIN_MAXNAMELEN 32 3531 #define EBT_BASE_CTL 128 3532 #define EBT_SO_SET_ENTRIES (EBT_BASE_CTL) 3533 #define EBT_SO_GET_INFO (EBT_BASE_CTL) 3534 #define EBT_SO_GET_ENTRIES (EBT_SO_GET_INFO + 1) 3535 #define EBT_SO_GET_INIT_INFO (EBT_SO_GET_ENTRIES + 1) 3536 #define EBT_SO_GET_INIT_ENTRIES (EBT_SO_GET_INIT_INFO + 1) 3537 3538 struct ebt_replace { 3539 char name[EBT_TABLE_MAXNAMELEN]; 3540 unsigned int valid_hooks; 3541 unsigned int nentries; 3542 unsigned int entries_size; 3543 struct ebt_entries* hook_entry[NF_BR_NUMHOOKS]; 3544 unsigned int num_counters; 3545 struct ebt_counter* counters; 3546 char* entries; 3547 }; 3548 3549 struct ebt_entries { 3550 unsigned int distinguisher; 3551 char name[EBT_CHAIN_MAXNAMELEN]; 3552 unsigned int counter_offset; 3553 int policy; 3554 unsigned int nentries; 3555 char data[0] __attribute__((aligned(__alignof__(struct ebt_replace)))); 3556 }; 3557 3558 struct ebt_table_desc { 3559 const char* name; 3560 struct ebt_replace replace; 3561 char entrytable[XT_TABLE_SIZE]; 3562 }; 3563 3564 static struct ebt_table_desc ebt_tables[] = { 3565 {.name = "filter"}, 3566 {.name = "nat"}, 3567 {.name = "broute"}, 3568 }; 3569 3570 static void checkpoint_ebtables(void) 3571 { 3572 int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 3573 if (fd == -1) { 3574 switch (errno) { 3575 case EAFNOSUPPORT: 3576 case ENOPROTOOPT: 3577 case ENOENT: 3578 return; 3579 } 3580 fail("ebtable checkpoint: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)"); 3581 } 3582 for (size_t i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) { 3583 struct ebt_table_desc* table = &ebt_tables[i]; 3584 strcpy(table->replace.name, table->name); 3585 socklen_t optlen = sizeof(table->replace); 3586 if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_INFO, &table->replace, &optlen)) { 3587 switch (errno) { 3588 case EPERM: 3589 case ENOENT: 3590 case ENOPROTOOPT: 3591 continue; 3592 } 3593 failmsg("ebtable checkpoint: getsockopt(EBT_SO_GET_INIT_INFO) failed", 3594 "table=%s", table->name); 3595 } 3596 debug("ebtable checkpoint %s: entries=%d hooks=%x size=%d\n", 3597 table->name, table->replace.nentries, table->replace.valid_hooks, 3598 table->replace.entries_size); 3599 if (table->replace.entries_size > sizeof(table->entrytable)) 3600 failmsg("ebtable checkpoint: table size is too large", "table=%s, size=%u", 3601 table->name, table->replace.entries_size); 3602 table->replace.num_counters = 0; 3603 table->replace.entries = table->entrytable; 3604 optlen = sizeof(table->replace) + table->replace.entries_size; 3605 if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_ENTRIES, &table->replace, &optlen)) 3606 failmsg("ebtable checkpoint: getsockopt(EBT_SO_GET_INIT_ENTRIES) failed", 3607 "table=%s", table->name); 3608 } 3609 close(fd); 3610 } 3611 3612 static void reset_ebtables() 3613 { 3614 int fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 3615 if (fd == -1) { 3616 switch (errno) { 3617 case EAFNOSUPPORT: 3618 case ENOPROTOOPT: 3619 case ENOENT: 3620 return; 3621 } 3622 fail("ebtable: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)"); 3623 } 3624 for (unsigned i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) { 3625 struct ebt_table_desc* table = &ebt_tables[i]; 3626 if (table->replace.valid_hooks == 0) 3627 continue; 3628 struct ebt_replace replace; 3629 memset(&replace, 0, sizeof(replace)); 3630 strcpy(replace.name, table->name); 3631 socklen_t optlen = sizeof(replace); 3632 if (getsockopt(fd, SOL_IP, EBT_SO_GET_INFO, &replace, &optlen)) 3633 failmsg("ebtable: getsockopt(EBT_SO_GET_INFO)", "table=%s", table->name); 3634 replace.num_counters = 0; 3635 table->replace.entries = 0; 3636 for (unsigned h = 0; h < NF_BR_NUMHOOKS; h++) 3637 table->replace.hook_entry[h] = 0; 3638 if (memcmp(&table->replace, &replace, sizeof(table->replace)) == 0) { 3639 char entrytable[XT_TABLE_SIZE]; 3640 memset(&entrytable, 0, sizeof(entrytable)); 3641 replace.entries = entrytable; 3642 optlen = sizeof(replace) + replace.entries_size; 3643 if (getsockopt(fd, SOL_IP, EBT_SO_GET_ENTRIES, &replace, &optlen)) 3644 failmsg("ebtable: getsockopt(EBT_SO_GET_ENTRIES) failed", "table=%s", table->name); 3645 if (memcmp(table->entrytable, entrytable, replace.entries_size) == 0) 3646 continue; 3647 } 3648 debug("ebtable %s: resetting\n", table->name); 3649 // Kernel does not seem to return actual entry points (wat?). 3650 for (unsigned j = 0, h = 0; h < NF_BR_NUMHOOKS; h++) { 3651 if (table->replace.valid_hooks & (1 << h)) { 3652 table->replace.hook_entry[h] = (struct ebt_entries*)table->entrytable + j; 3653 j++; 3654 } 3655 } 3656 table->replace.entries = table->entrytable; 3657 optlen = sizeof(table->replace) + table->replace.entries_size; 3658 if (setsockopt(fd, SOL_IP, EBT_SO_SET_ENTRIES, &table->replace, optlen)) 3659 failmsg("ebtable: setsockopt(EBT_SO_SET_ENTRIES) failed", "table=%s", table->name); 3660 } 3661 close(fd); 3662 } 3663 3664 static void checkpoint_net_namespace(void) 3665 { 3666 #if SYZ_EXECUTOR 3667 if (!flag_net_reset || flag_sandbox_setuid) 3668 return; 3669 #endif 3670 checkpoint_ebtables(); 3671 checkpoint_arptables(); 3672 checkpoint_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP); 3673 checkpoint_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6); 3674 } 3675 3676 static void reset_net_namespace(void) 3677 { 3678 #if SYZ_EXECUTOR 3679 if (!flag_net_reset || flag_sandbox_setuid) 3680 return; 3681 #endif 3682 reset_ebtables(); 3683 reset_arptables(); 3684 reset_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP); 3685 reset_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6); 3686 } 3687 #endif 3688 3689 #if SYZ_EXECUTOR || (SYZ_CGROUPS && (SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID)) 3690 #include <fcntl.h> 3691 #include <string.h> 3692 #include <sys/mount.h> 3693 #include <sys/stat.h> 3694 #include <sys/types.h> 3695 3696 static void mount_cgroups(const char* dir, const char** controllers, int count) 3697 { 3698 if (mkdir(dir, 0777)) { 3699 debug("mkdir(%s) failed: %d\n", dir, errno); 3700 return; 3701 } 3702 // First, probe one-by-one to understand what controllers are present. 3703 char enabled[128] = {0}; 3704 int i = 0; 3705 for (; i < count; i++) { 3706 if (mount("none", dir, "cgroup", 0, controllers[i])) { 3707 debug("mount(%s, %s) failed: %d\n", dir, controllers[i], errno); 3708 continue; 3709 } 3710 umount(dir); 3711 strcat(enabled, ","); 3712 strcat(enabled, controllers[i]); 3713 } 3714 if (enabled[0] == 0) { 3715 if (rmdir(dir) && errno != EBUSY) 3716 failmsg("rmdir failed", "dir=%s", dir); 3717 return; 3718 } 3719 // Now mount all at once. 3720 if (mount("none", dir, "cgroup", 0, enabled + 1)) { 3721 // In systemd/stretch images this is failing with EBUSY 3722 // (systemd starts messing with these mounts?), 3723 // so we don't fail, but just log the error. 3724 debug("mount(%s, %s) failed: %d\n", dir, enabled + 1, errno); 3725 if (rmdir(dir) && errno != EBUSY) 3726 failmsg("rmdir failed", "dir=%s enabled=%s", dir, enabled); 3727 } 3728 if (chmod(dir, 0777)) { 3729 debug("chmod(%s) failed: %d\n", dir, errno); 3730 } 3731 } 3732 3733 static void mount_cgroups2(const char** controllers, int count) 3734 { 3735 if (mkdir("/syzcgroup/unified", 0777)) { 3736 debug("mkdir(/syzcgroup/unified) failed: %d\n", errno); 3737 return; 3738 } 3739 if (mount("none", "/syzcgroup/unified", "cgroup2", 0, NULL)) { 3740 debug("mount(cgroup2) failed: %d\n", errno); 3741 // For all cases when we don't end up mounting cgroup/cgroup2 3742 // in /syzcgroup/{unified,net,cpu}, we need to remove the dir. 3743 // Otherwise these will end up as normal dirs and the fuzzer may 3744 // create huge files there. These files won't be cleaned up 3745 // after tests and may easily consume all disk space. 3746 // EBUSY usually means that cgroup is already mounted there 3747 // by a previous run of e.g. syz-execprog. 3748 if (rmdir("/syzcgroup/unified") && errno != EBUSY) 3749 fail("rmdir(/syzcgroup/unified) failed"); 3750 return; 3751 } 3752 if (chmod("/syzcgroup/unified", 0777)) { 3753 debug("chmod(/syzcgroup/unified) failed: %d\n", errno); 3754 } 3755 int control = open("/syzcgroup/unified/cgroup.subtree_control", O_WRONLY); 3756 if (control == -1) 3757 return; 3758 int i; 3759 for (i = 0; i < count; i++) 3760 if (write(control, controllers[i], strlen(controllers[i])) < 0) { 3761 debug("write(cgroup.subtree_control, %s) failed: %d\n", controllers[i], errno); 3762 } 3763 close(control); 3764 } 3765 3766 static void setup_cgroups() 3767 { 3768 // We want to cover both cgroup and cgroup2. 3769 // Each resource controller can be bound to only one of them, 3770 // so to cover both we divide all controllers into 3 arbitrary groups. 3771 // One group is then bound to cgroup2/unified, and 2 other groups 3772 // are bound to 2 cgroup hierarchies. 3773 // Note: we need to enable controllers one-by-one for both cgroup and cgroup2. 3774 // If we enable all at the same time and one of them fails (b/c of older kernel 3775 // or not enabled configs), then all will fail. 3776 const char* unified_controllers[] = {"+cpu", "+io", "+pids"}; 3777 const char* net_controllers[] = {"net", "net_prio", "devices", "blkio", "freezer"}; 3778 const char* cpu_controllers[] = {"cpuset", "cpuacct", "hugetlb", "rlimit", "memory"}; 3779 if (mkdir("/syzcgroup", 0777)) { 3780 // Can happen due to e.g. read-only file system (EROFS). 3781 debug("mkdir(/syzcgroup) failed: %d\n", errno); 3782 return; 3783 } 3784 mount_cgroups2(unified_controllers, sizeof(unified_controllers) / sizeof(unified_controllers[0])); 3785 mount_cgroups("/syzcgroup/net", net_controllers, sizeof(net_controllers) / sizeof(net_controllers[0])); 3786 mount_cgroups("/syzcgroup/cpu", cpu_controllers, sizeof(cpu_controllers) / sizeof(cpu_controllers[0])); 3787 write_file("/syzcgroup/cpu/cgroup.clone_children", "1"); 3788 write_file("/syzcgroup/cpu/cpuset.memory_pressure_enabled", "1"); 3789 } 3790 3791 #if (SYZ_EXECUTOR || SYZ_REPEAT) && SYZ_EXECUTOR_USES_FORK_SERVER 3792 static void setup_cgroups_loop() 3793 { 3794 #if SYZ_EXECUTOR 3795 if (!flag_cgroups) 3796 return; 3797 #endif 3798 int pid = getpid(); 3799 char file[128]; 3800 char cgroupdir[64]; 3801 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid); 3802 if (mkdir(cgroupdir, 0777)) { 3803 debug("mkdir(%s) failed: %d\n", cgroupdir, errno); 3804 } 3805 // Restrict number of pids per test process to prevent fork bombs. 3806 // We have up to 16 threads + main process + loop. 3807 // 32 pids should be enough for everyone. 3808 snprintf(file, sizeof(file), "%s/pids.max", cgroupdir); 3809 write_file(file, "32"); 3810 // Setup some v1 groups to make things more interesting. 3811 snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir); 3812 write_file(file, "%d", pid); 3813 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid); 3814 if (mkdir(cgroupdir, 0777)) { 3815 debug("mkdir(%s) failed: %d\n", cgroupdir, errno); 3816 } 3817 snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir); 3818 write_file(file, "%d", pid); 3819 // Restrict memory consumption. 3820 // We have some syscalls that inherently consume lots of memory, 3821 // e.g. mounting some filesystem images requires at least 128MB 3822 // image in memory. We restrict RLIMIT_AS to 200MB. Here we gradually 3823 // increase memory limits to make things more interesting. 3824 // Also this takes into account KASAN quarantine size. 3825 // If the limit is lower than KASAN quarantine size, then it can happen 3826 // so that we kill the process, but all of its memory is in quarantine 3827 // and is still accounted against memcg. As the result memcg won't 3828 // allow to allocate any memory in the parent and in the new test process. 3829 // The current limit of 300MB supports up to 9.6GB RAM (quarantine is 1/32). 3830 snprintf(file, sizeof(file), "%s/memory.soft_limit_in_bytes", cgroupdir); 3831 write_file(file, "%d", 299 << 20); 3832 snprintf(file, sizeof(file), "%s/memory.limit_in_bytes", cgroupdir); 3833 write_file(file, "%d", 300 << 20); 3834 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid); 3835 if (mkdir(cgroupdir, 0777)) { 3836 debug("mkdir(%s) failed: %d\n", cgroupdir, errno); 3837 } 3838 snprintf(file, sizeof(file), "%s/cgroup.procs", cgroupdir); 3839 write_file(file, "%d", pid); 3840 } 3841 3842 static void setup_cgroups_test() 3843 { 3844 #if SYZ_EXECUTOR 3845 if (!flag_cgroups) 3846 return; 3847 #endif 3848 char cgroupdir[64]; 3849 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid); 3850 if (symlink(cgroupdir, "./cgroup")) { 3851 debug("symlink(%s, ./cgroup) failed: %d\n", cgroupdir, errno); 3852 } 3853 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid); 3854 if (symlink(cgroupdir, "./cgroup.cpu")) { 3855 debug("symlink(%s, ./cgroup.cpu) failed: %d\n", cgroupdir, errno); 3856 } 3857 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid); 3858 if (symlink(cgroupdir, "./cgroup.net")) { 3859 debug("symlink(%s, ./cgroup.net) failed: %d\n", cgroupdir, errno); 3860 } 3861 } 3862 #endif 3863 3864 #if SYZ_EXECUTOR || SYZ_SANDBOX_NAMESPACE 3865 static void initialize_cgroups() 3866 { 3867 #if SYZ_EXECUTOR 3868 if (!flag_cgroups) 3869 return; 3870 #endif 3871 if (mkdir("./syz-tmp/newroot/syzcgroup", 0700)) 3872 fail("mkdir failed"); 3873 if (mkdir("./syz-tmp/newroot/syzcgroup/unified", 0700)) 3874 fail("mkdir failed"); 3875 if (mkdir("./syz-tmp/newroot/syzcgroup/cpu", 0700)) 3876 fail("mkdir failed"); 3877 if (mkdir("./syz-tmp/newroot/syzcgroup/net", 0700)) 3878 fail("mkdir failed"); 3879 unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE; 3880 if (mount("/syzcgroup/unified", "./syz-tmp/newroot/syzcgroup/unified", NULL, bind_mount_flags, NULL)) { 3881 debug("mount(cgroup2, MS_BIND) failed: %d\n", errno); 3882 } 3883 if (mount("/syzcgroup/cpu", "./syz-tmp/newroot/syzcgroup/cpu", NULL, bind_mount_flags, NULL)) { 3884 debug("mount(cgroup/cpu, MS_BIND) failed: %d\n", errno); 3885 } 3886 if (mount("/syzcgroup/net", "./syz-tmp/newroot/syzcgroup/net", NULL, bind_mount_flags, NULL)) { 3887 debug("mount(cgroup/net, MS_BIND) failed: %d\n", errno); 3888 } 3889 } 3890 #endif 3891 #endif 3892 3893 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID 3894 #include <errno.h> 3895 #include <sys/mount.h> 3896 #include <sys/stat.h> 3897 #include <unistd.h> 3898 3899 static void setup_common() 3900 { 3901 if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) { 3902 debug("mount(fusectl) failed: %d\n", errno); 3903 } 3904 } 3905 3906 static void setup_binderfs() 3907 { 3908 // NOTE: this function must be called after chroot. 3909 // Bind an instance of binderfs specific just to this executor - it will 3910 // only be visible in its mount namespace and will help isolate binder 3911 // devices during fuzzing. 3912 // These commands will just silently fail if binderfs is not supported. 3913 // Ideally it should have been added as a separate feature (with lots of 3914 // minor changes throughout the code base), but it seems to be an overkill 3915 // for just 2 simple lines of code. 3916 if (mkdir("/dev/binderfs", 0777)) { 3917 debug("mkdir(/dev/binderfs) failed: %d\n", errno); 3918 } 3919 3920 if (mount("binder", "/dev/binderfs", "binder", 0, NULL)) { 3921 debug("mount of binder at /dev/binderfs failed: %d\n", errno); 3922 } 3923 #if !SYZ_EXECUTOR && !SYZ_USE_TMP_DIR 3924 // Do a local symlink right away. 3925 if (symlink("/dev/binderfs", "./binderfs")) { 3926 debug("symlink(/dev/binderfs, ./binderfs) failed: %d\n", errno); 3927 } 3928 #endif 3929 } 3930 3931 #include <sched.h> 3932 #include <sys/prctl.h> 3933 #include <sys/resource.h> 3934 #include <sys/time.h> 3935 #include <sys/wait.h> 3936 3937 static void loop(); 3938 3939 static void sandbox_common() 3940 { 3941 prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); 3942 setsid(); 3943 3944 #if SYZ_EXECUTOR || __NR_syz_init_net_socket || SYZ_DEVLINK_PCI || __NR_syz_socket_connect_nvme_tcp 3945 int netns = open("/proc/self/ns/net", O_RDONLY); 3946 if (netns == -1) 3947 fail("open(/proc/self/ns/net) failed"); 3948 if (dup2(netns, kInitNetNsFd) < 0) 3949 fail("dup2(netns, kInitNetNsFd) failed"); 3950 close(netns); 3951 #endif 3952 3953 struct rlimit rlim; 3954 #if SYZ_EXECUTOR 3955 rlim.rlim_cur = rlim.rlim_max = (200 << 20) + 3956 (kMaxThreads * kCoverSize + kExtraCoverSize) * sizeof(void*); 3957 #else 3958 rlim.rlim_cur = rlim.rlim_max = (200 << 20); 3959 #endif 3960 setrlimit(RLIMIT_AS, &rlim); 3961 rlim.rlim_cur = rlim.rlim_max = 32 << 20; 3962 setrlimit(RLIMIT_MEMLOCK, &rlim); 3963 rlim.rlim_cur = rlim.rlim_max = 136 << 20; 3964 setrlimit(RLIMIT_FSIZE, &rlim); 3965 rlim.rlim_cur = rlim.rlim_max = 1 << 20; 3966 setrlimit(RLIMIT_STACK, &rlim); 3967 // Note: core size is also restricted by RLIMIT_FSIZE. 3968 rlim.rlim_cur = rlim.rlim_max = 128 << 20; 3969 setrlimit(RLIMIT_CORE, &rlim); 3970 rlim.rlim_cur = rlim.rlim_max = 256; // see kMaxFd 3971 setrlimit(RLIMIT_NOFILE, &rlim); 3972 3973 // CLONE_NEWNS/NEWCGROUP cause EINVAL on some systems, 3974 // so we do them separately of clone in do_sandbox_namespace. 3975 if (unshare(CLONE_NEWNS)) { 3976 debug("unshare(CLONE_NEWNS): %d\n", errno); 3977 } 3978 if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, NULL)) { 3979 debug("mount(\"/\", MS_REC | MS_PRIVATE): %d\n", errno); 3980 } 3981 if (unshare(CLONE_NEWIPC)) { 3982 debug("unshare(CLONE_NEWIPC): %d\n", errno); 3983 } 3984 if (unshare(0x02000000)) { 3985 debug("unshare(CLONE_NEWCGROUP): %d\n", errno); 3986 } 3987 if (unshare(CLONE_NEWUTS)) { 3988 debug("unshare(CLONE_NEWUTS): %d\n", errno); 3989 } 3990 if (unshare(CLONE_SYSVSEM)) { 3991 debug("unshare(CLONE_SYSVSEM): %d\n", errno); 3992 } 3993 // These sysctl's restrict ipc resource usage (by default it's possible 3994 // to eat all system memory by creating e.g. lots of large sem sets). 3995 // These sysctl's are per-namespace, so we need to set them inside 3996 // of the test ipc namespace (after CLONE_NEWIPC). 3997 typedef struct { 3998 const char* name; 3999 const char* value; 4000 } sysctl_t; 4001 static const sysctl_t sysctls[] = { 4002 {"/proc/sys/kernel/shmmax", "16777216"}, 4003 {"/proc/sys/kernel/shmall", "536870912"}, 4004 {"/proc/sys/kernel/shmmni", "1024"}, 4005 {"/proc/sys/kernel/msgmax", "8192"}, 4006 {"/proc/sys/kernel/msgmni", "1024"}, 4007 {"/proc/sys/kernel/msgmnb", "1024"}, 4008 {"/proc/sys/kernel/sem", "1024 1048576 500 1024"}, 4009 }; 4010 unsigned i; 4011 for (i = 0; i < sizeof(sysctls) / sizeof(sysctls[0]); i++) 4012 write_file(sysctls[i].name, sysctls[i].value); 4013 } 4014 #endif 4015 4016 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE 4017 static int wait_for_loop(int pid) 4018 { 4019 if (pid < 0) 4020 fail("sandbox fork failed"); 4021 debug("spawned loop pid %d\n", pid); 4022 int status = 0; 4023 while (waitpid(-1, &status, __WALL) != pid) { 4024 } 4025 return WEXITSTATUS(status); 4026 } 4027 #endif 4028 4029 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_NAMESPACE || SYZ_SANDBOX_ANDROID 4030 #include <linux/capability.h> 4031 4032 static void drop_caps(void) 4033 { 4034 struct __user_cap_header_struct cap_hdr = {}; 4035 struct __user_cap_data_struct cap_data[2] = {}; 4036 cap_hdr.version = _LINUX_CAPABILITY_VERSION_3; 4037 cap_hdr.pid = getpid(); 4038 if (syscall(SYS_capget, &cap_hdr, &cap_data)) 4039 fail("capget failed"); 4040 // Drop CAP_SYS_PTRACE so that test processes can't attach to parent processes. 4041 // Previously it lead to hangs because the loop process stopped due to SIGSTOP. 4042 // Note that a process can always ptrace its direct children, which is enough for testing purposes. 4043 // 4044 // A process with CAP_SYS_NICE can bring kernel down by asking for too high SCHED_DEADLINE priority, 4045 // as the result rcu and other system services that use kernel threads will stop functioning. 4046 // Some parameters for SCHED_DEADLINE should be OK, but we don't have means to enforce 4047 // values of indirect syscall arguments. Peter Zijlstra proposed sysctl_deadline_period_{min,max} 4048 // which could be used to enfore safe limits without droppping CAP_SYS_NICE, but we don't have it yet. 4049 // See the following bug for details: 4050 // https://groups.google.com/forum/#!topic/syzkaller-bugs/G6Wl_PKPIWI 4051 const int drop = (1 << CAP_SYS_PTRACE) | (1 << CAP_SYS_NICE); 4052 cap_data[0].effective &= ~drop; 4053 cap_data[0].permitted &= ~drop; 4054 cap_data[0].inheritable &= ~drop; 4055 if (syscall(SYS_capset, &cap_hdr, &cap_data)) 4056 fail("capset failed"); 4057 } 4058 #endif 4059 4060 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE 4061 #include <sched.h> 4062 #include <sys/types.h> 4063 4064 static int do_sandbox_none(void) 4065 { 4066 // CLONE_NEWPID takes effect for the first child of the current process, 4067 // so we do it before fork to make the loop "init" process of the namespace. 4068 // We ought to do fail here, but sandbox=none is used in pkg/ipc tests 4069 // and they are usually run under non-root. 4070 // Also since debug is stripped by pkg/csource, we need to do {} 4071 // even though we generally don't do {} around single statements. 4072 if (unshare(CLONE_NEWPID)) { 4073 debug("unshare(CLONE_NEWPID): %d\n", errno); 4074 } 4075 int pid = fork(); 4076 if (pid != 0) 4077 return wait_for_loop(pid); 4078 4079 setup_common(); 4080 #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION 4081 initialize_vhci(); 4082 #endif 4083 sandbox_common(); 4084 drop_caps(); 4085 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 4086 initialize_netdevices_init(); 4087 #endif 4088 if (unshare(CLONE_NEWNET)) { 4089 debug("unshare(CLONE_NEWNET): %d\n", errno); 4090 } 4091 // Enable access to IPPROTO_ICMP sockets, must be done after CLONE_NEWNET. 4092 write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535"); 4093 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI 4094 initialize_devlink_pci(); 4095 #endif 4096 #if SYZ_EXECUTOR || SYZ_NET_INJECTION 4097 initialize_tun(); 4098 #endif 4099 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 4100 initialize_netdevices(); 4101 #endif 4102 #if SYZ_EXECUTOR || SYZ_WIFI 4103 initialize_wifi_devices(); 4104 #endif 4105 setup_binderfs(); 4106 loop(); 4107 doexit(1); 4108 } 4109 #endif 4110 4111 #if SYZ_EXECUTOR || SYZ_SANDBOX_SETUID 4112 #include <grp.h> 4113 #include <sched.h> 4114 #include <sys/prctl.h> 4115 4116 #define SYZ_HAVE_SANDBOX_SETUID 1 4117 static int do_sandbox_setuid(void) 4118 { 4119 if (unshare(CLONE_NEWPID)) { 4120 debug("unshare(CLONE_NEWPID): %d\n", errno); 4121 } 4122 int pid = fork(); 4123 if (pid != 0) 4124 return wait_for_loop(pid); 4125 4126 setup_common(); 4127 #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION 4128 initialize_vhci(); 4129 #endif 4130 sandbox_common(); 4131 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 4132 initialize_netdevices_init(); 4133 #endif 4134 if (unshare(CLONE_NEWNET)) { 4135 debug("unshare(CLONE_NEWNET): %d\n", errno); 4136 } 4137 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI 4138 initialize_devlink_pci(); 4139 #endif 4140 #if SYZ_EXECUTOR || SYZ_NET_INJECTION 4141 initialize_tun(); 4142 #endif 4143 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 4144 initialize_netdevices(); 4145 #endif 4146 #if SYZ_EXECUTOR || SYZ_WIFI 4147 initialize_wifi_devices(); 4148 #endif 4149 setup_binderfs(); 4150 4151 const int nobody = 65534; 4152 if (setgroups(0, NULL)) 4153 fail("failed to setgroups"); 4154 if (syscall(SYS_setresgid, nobody, nobody, nobody)) 4155 fail("failed to setresgid"); 4156 if (syscall(SYS_setresuid, nobody, nobody, nobody)) 4157 fail("failed to setresuid"); 4158 4159 // setresuid and setresgid clear the parent-death signal. 4160 prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); 4161 // This is required to open /proc/self/ files. 4162 // Otherwise they are owned by root and we can't open them after setuid. 4163 // See task_dump_owner function in kernel. 4164 prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); 4165 4166 loop(); 4167 doexit(1); 4168 } 4169 #endif 4170 4171 #if SYZ_EXECUTOR || SYZ_SANDBOX_NAMESPACE 4172 #include <sched.h> 4173 #include <sys/mman.h> 4174 #include <sys/mount.h> 4175 4176 static int real_uid; 4177 static int real_gid; 4178 __attribute__((aligned(64 << 10))) static char sandbox_stack[1 << 20]; 4179 4180 static int namespace_sandbox_proc(void* arg) 4181 { 4182 sandbox_common(); 4183 4184 // /proc/self/setgroups is not present on some systems, ignore error. 4185 write_file("/proc/self/setgroups", "deny"); 4186 if (!write_file("/proc/self/uid_map", "0 %d 1\n", real_uid)) 4187 fail("write of /proc/self/uid_map failed"); 4188 if (!write_file("/proc/self/gid_map", "0 %d 1\n", real_gid)) 4189 fail("write of /proc/self/gid_map failed"); 4190 4191 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 4192 initialize_netdevices_init(); 4193 #endif 4194 // CLONE_NEWNET must always happen before tun setup, 4195 // because we want the tun device in the test namespace. 4196 if (unshare(CLONE_NEWNET)) 4197 fail("unshare(CLONE_NEWNET)"); 4198 // Enable access to IPPROTO_ICMP sockets, must be done after CLONE_NEWNET. 4199 write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535"); 4200 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI 4201 initialize_devlink_pci(); 4202 #endif 4203 #if SYZ_EXECUTOR || SYZ_NET_INJECTION 4204 // We setup tun here as it needs to be in the test net namespace, 4205 // which in turn needs to be in the test user namespace. 4206 // However, IFF_NAPI_FRAGS will fail as we are not root already. 4207 // TODO: we should create tun in the init net namespace and use setns 4208 // to move it to the target namespace. 4209 initialize_tun(); 4210 #endif 4211 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 4212 initialize_netdevices(); 4213 #endif 4214 #if SYZ_EXECUTOR || SYZ_WIFI 4215 initialize_wifi_devices(); 4216 #endif 4217 4218 if (mkdir("./syz-tmp", 0777)) 4219 fail("mkdir(syz-tmp) failed"); 4220 if (mount("", "./syz-tmp", "tmpfs", 0, NULL)) 4221 fail("mount(tmpfs) failed"); 4222 if (mkdir("./syz-tmp/newroot", 0777)) 4223 fail("mkdir failed"); 4224 if (mkdir("./syz-tmp/newroot/dev", 0700)) 4225 fail("mkdir failed"); 4226 unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE; 4227 if (mount("/dev", "./syz-tmp/newroot/dev", NULL, bind_mount_flags, NULL)) 4228 fail("mount(dev) failed"); 4229 if (mkdir("./syz-tmp/newroot/proc", 0700)) 4230 fail("mkdir failed"); 4231 if (mount(NULL, "./syz-tmp/newroot/proc", "proc", 0, NULL)) 4232 fail("mount(proc) failed"); 4233 if (mkdir("./syz-tmp/newroot/selinux", 0700)) 4234 fail("mkdir failed"); 4235 // selinux mount used to be at /selinux, but then moved to /sys/fs/selinux. 4236 const char* selinux_path = "./syz-tmp/newroot/selinux"; 4237 if (mount("/selinux", selinux_path, NULL, bind_mount_flags, NULL)) { 4238 if (errno != ENOENT) 4239 fail("mount(/selinux) failed"); 4240 if (mount("/sys/fs/selinux", selinux_path, NULL, bind_mount_flags, NULL) && errno != ENOENT) 4241 fail("mount(/sys/fs/selinux) failed"); 4242 } 4243 if (mkdir("./syz-tmp/newroot/sys", 0700)) 4244 fail("mkdir failed"); 4245 if (mount("/sys", "./syz-tmp/newroot/sys", 0, bind_mount_flags, NULL)) 4246 fail("mount(sysfs) failed"); 4247 #if SYZ_EXECUTOR || SYZ_CGROUPS 4248 initialize_cgroups(); 4249 #endif 4250 if (mkdir("./syz-tmp/pivot", 0777)) 4251 fail("mkdir failed"); 4252 if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) { 4253 debug("pivot_root failed\n"); 4254 if (chdir("./syz-tmp")) 4255 fail("chdir failed"); 4256 } else { 4257 debug("pivot_root OK\n"); 4258 if (chdir("/")) 4259 fail("chdir failed"); 4260 if (umount2("./pivot", MNT_DETACH)) 4261 fail("umount failed"); 4262 } 4263 if (chroot("./newroot")) 4264 fail("chroot failed"); 4265 if (chdir("/")) 4266 fail("chdir failed"); 4267 setup_binderfs(); 4268 drop_caps(); 4269 4270 loop(); 4271 doexit(1); 4272 } 4273 4274 #define SYZ_HAVE_SANDBOX_NAMESPACE 1 4275 static int do_sandbox_namespace(void) 4276 { 4277 setup_common(); 4278 #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION 4279 // HCIDEVUP requires CAP_ADMIN, so this needs to happen early. 4280 initialize_vhci(); 4281 #endif 4282 real_uid = getuid(); 4283 real_gid = getgid(); 4284 mprotect(sandbox_stack, 4096, PROT_NONE); // to catch stack underflows 4285 int pid = clone(namespace_sandbox_proc, &sandbox_stack[sizeof(sandbox_stack) - 64], 4286 CLONE_NEWUSER | CLONE_NEWPID, 0); 4287 return wait_for_loop(pid); 4288 } 4289 #endif 4290 4291 #if SYZ_EXECUTOR || SYZ_SANDBOX_ANDROID 4292 // seccomp only supported for Arm, Arm64, X86, and X86_64 archs 4293 #if GOARCH_arm || GOARCH_arm64 || GOARCH_386 || GOARCH_amd64 4294 #include <assert.h> 4295 #include <errno.h> 4296 #include <linux/audit.h> 4297 #include <linux/filter.h> 4298 #include <linux/seccomp.h> 4299 #include <stddef.h> 4300 #include <stdlib.h> 4301 #include <sys/prctl.h> 4302 #include <sys/syscall.h> 4303 4304 #include "android/android_seccomp.h" 4305 4306 #if GOARCH_amd64 || GOARCH_386 4307 // Syz-executor is linked against glibc when fuzzing runs on Cuttlefish x86-x64. 4308 // However Android blocks calls into mkdir, rmdir, symlink which causes 4309 // syz-executor to crash. When fuzzing runs on Android device this issue 4310 // is not observed, because syz-executor is linked against Bionic. Under 4311 // the hood Bionic invokes mkdirat, inlinkat and symlinkat, which are 4312 // allowed by seccomp-bpf. 4313 // This issue may exist not only in Android, but also in Linux in general 4314 // where seccomp filtering is enforced. 4315 // 4316 // This trick makes linker believe it matched the correct version of mkdir, 4317 // rmdir, symlink. So now behavior is the same across ARM and non-ARM builds. 4318 inline int mkdir(const char* path, mode_t mode) 4319 { 4320 return mkdirat(AT_FDCWD, path, mode); 4321 } 4322 4323 inline int rmdir(const char* path) 4324 { 4325 return unlinkat(AT_FDCWD, path, AT_REMOVEDIR); 4326 } 4327 4328 inline int symlink(const char* old_path, const char* new_path) 4329 { 4330 return symlinkat(old_path, AT_FDCWD, new_path); 4331 } 4332 #endif 4333 4334 #endif 4335 #include <fcntl.h> // open(2) 4336 #include <grp.h> // setgroups 4337 #include <sys/xattr.h> // setxattr, getxattr 4338 4339 #define AID_NET_BT_ADMIN 3001 4340 #define AID_NET_BT 3002 4341 #define AID_INET 3003 4342 #define AID_EVERYBODY 9997 4343 #define AID_APP 10000 4344 4345 #define UNTRUSTED_APP_UID (AID_APP + 999) 4346 #define UNTRUSTED_APP_GID (AID_APP + 999) 4347 4348 #define SYSTEM_UID 1000 4349 #define SYSTEM_GID 1000 4350 4351 const char* const SELINUX_CONTEXT_UNTRUSTED_APP = "u:r:untrusted_app:s0:c512,c768"; 4352 const char* const SELINUX_LABEL_APP_DATA_FILE = "u:object_r:app_data_file:s0:c512,c768"; 4353 const char* const SELINUX_CONTEXT_FILE = "/proc/thread-self/attr/current"; 4354 const char* const SELINUX_XATTR_NAME = "security.selinux"; 4355 4356 const gid_t UNTRUSTED_APP_GROUPS[] = {UNTRUSTED_APP_GID, AID_NET_BT_ADMIN, AID_NET_BT, AID_INET, AID_EVERYBODY}; 4357 const size_t UNTRUSTED_APP_NUM_GROUPS = sizeof(UNTRUSTED_APP_GROUPS) / sizeof(UNTRUSTED_APP_GROUPS[0]); 4358 4359 const gid_t SYSTEM_GROUPS[] = {SYSTEM_GID, AID_NET_BT_ADMIN, AID_NET_BT, AID_INET, AID_EVERYBODY}; 4360 const size_t SYSTEM_NUM_GROUPS = sizeof(SYSTEM_GROUPS) / sizeof(SYSTEM_GROUPS[0]); 4361 4362 // Similar to libselinux getcon(3), but: 4363 // - No library dependency 4364 // - No dynamic memory allocation 4365 // - Uses fail() instead of returning an error code 4366 static void getcon(char* context, size_t context_size) 4367 { 4368 int fd = open(SELINUX_CONTEXT_FILE, O_RDONLY); 4369 if (fd < 0) 4370 fail("getcon: couldn't open context file"); 4371 4372 ssize_t nread = read(fd, context, context_size); 4373 4374 close(fd); 4375 4376 if (nread <= 0) 4377 fail("getcon: failed to read context file"); 4378 4379 // The contents of the context file MAY end with a newline 4380 // and MAY not have a null terminator. Handle this here. 4381 if (context[nread - 1] == '\n') 4382 context[nread - 1] = '\0'; 4383 } 4384 4385 // Similar to libselinux setcon(3), but: 4386 // - No library dependency 4387 // - No dynamic memory allocation 4388 // - Uses fail() instead of returning an error code 4389 static void setcon(const char* context) 4390 { 4391 char new_context[512]; 4392 4393 // Attempt to write the new context 4394 int fd = open(SELINUX_CONTEXT_FILE, O_WRONLY); 4395 4396 if (fd < 0) 4397 fail("setcon: could not open context file"); 4398 4399 ssize_t bytes_written = write(fd, context, strlen(context)); 4400 4401 // N.B.: We cannot reuse this file descriptor, since the target SELinux context 4402 // may not be able to read from it. 4403 close(fd); 4404 4405 if (bytes_written != (ssize_t)strlen(context)) 4406 failmsg("setcon: could not write entire context", "wrote=%zi, expected=%zu", bytes_written, strlen(context)); 4407 4408 // Validate the transition by checking the context 4409 getcon(new_context, sizeof(new_context)); 4410 4411 if (strcmp(context, new_context) != 0) 4412 failmsg("setcon: failed to change", "want=%s, context=%s", context, new_context); 4413 } 4414 4415 // Similar to libselinux setfilecon(3), but: 4416 // - No library dependency 4417 // - No dynamic memory allocation 4418 // - Uses fail() instead of returning an error code 4419 static void setfilecon(const char* path, const char* context) 4420 { 4421 char new_context[512]; 4422 4423 if (setxattr(path, SELINUX_XATTR_NAME, context, strlen(context) + 1, 0) != 0) 4424 fail("setfilecon: setxattr failed"); 4425 if (getxattr(path, SELINUX_XATTR_NAME, new_context, sizeof(new_context)) < 0) 4426 fail("setfilecon: getxattr failed"); 4427 if (strcmp(context, new_context) != 0) 4428 failmsg("setfilecon: could not set context", "want=%s, got=%s", context, new_context); 4429 } 4430 4431 #define SYZ_HAVE_SANDBOX_ANDROID 1 4432 4433 static int do_sandbox_android(uint64 sandbox_arg) 4434 { 4435 setup_common(); 4436 #if SYZ_EXECUTOR || SYZ_VHCI_INJECTION 4437 initialize_vhci(); 4438 #endif 4439 sandbox_common(); 4440 drop_caps(); 4441 4442 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 4443 initialize_netdevices_init(); 4444 #endif 4445 // CLONE_NEWNET must always happen before tun setup, because we want the tun 4446 // device in the test namespace. If we don't do this, executor will crash with 4447 // SYZFATAL: executor NUM failed NUM times: executor NUM: EOF 4448 if (unshare(CLONE_NEWNET)) { 4449 debug("unshare(CLONE_NEWNET): %d\n", errno); 4450 } 4451 // Enable access to IPPROTO_ICMP sockets, must be done after CLONE_NEWNET. 4452 write_file("/proc/sys/net/ipv4/ping_group_range", "0 65535"); 4453 #if SYZ_EXECUTOR || SYZ_DEVLINK_PCI 4454 initialize_devlink_pci(); 4455 #endif 4456 #if SYZ_EXECUTOR || SYZ_NET_INJECTION 4457 initialize_tun(); 4458 #endif 4459 #if SYZ_EXECUTOR || SYZ_NET_DEVICES 4460 initialize_netdevices(); 4461 #endif 4462 uid_t uid = UNTRUSTED_APP_UID; 4463 size_t num_groups = UNTRUSTED_APP_NUM_GROUPS; 4464 const gid_t* groups = UNTRUSTED_APP_GROUPS; 4465 gid_t gid = UNTRUSTED_APP_GID; 4466 debug("executor received sandbox_arg=%llu\n", sandbox_arg); 4467 if (sandbox_arg == 1) { 4468 uid = SYSTEM_UID; 4469 num_groups = SYSTEM_NUM_GROUPS; 4470 groups = SYSTEM_GROUPS; 4471 gid = SYSTEM_GID; 4472 4473 debug("fuzzing under SYSTEM account\n"); 4474 } 4475 if (chown(".", uid, uid) != 0) 4476 failmsg("do_sandbox_android: chmod failed", "sandbox_arg=%llu", sandbox_arg); 4477 4478 if (setgroups(num_groups, groups) != 0) 4479 failmsg("do_sandbox_android: setgroups failed", "sandbox_arg=%llu", sandbox_arg); 4480 4481 if (setresgid(gid, gid, gid) != 0) 4482 failmsg("do_sandbox_android: setresgid failed", "sandbox_arg=%llu", sandbox_arg); 4483 4484 setup_binderfs(); 4485 4486 #if GOARCH_arm || GOARCH_arm64 || GOARCH_386 || GOARCH_amd64 4487 // Will fail() if anything fails. 4488 // Must be called when the new process still has CAP_SYS_ADMIN, in this case, 4489 // before changing uid from 0, which clears capabilities. 4490 int account = SCFS_RestrictedApp; 4491 if (sandbox_arg == 1) 4492 account = SCFS_SystemAccount; 4493 set_app_seccomp_filter(account); 4494 #endif 4495 4496 if (setresuid(uid, uid, uid) != 0) 4497 failmsg("do_sandbox_android: setresuid failed", "sandbox_arg=%llu", sandbox_arg); 4498 4499 // setresuid and setresgid clear the parent-death signal. 4500 prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); 4501 4502 setfilecon(".", SELINUX_LABEL_APP_DATA_FILE); 4503 if (uid == UNTRUSTED_APP_UID) 4504 setcon(SELINUX_CONTEXT_UNTRUSTED_APP); 4505 4506 loop(); 4507 doexit(1); 4508 } 4509 #endif 4510 4511 #if SYZ_EXECUTOR || SYZ_REPEAT && SYZ_USE_TMP_DIR 4512 #include <dirent.h> 4513 #include <errno.h> 4514 #include <string.h> 4515 #include <sys/ioctl.h> 4516 #include <sys/mount.h> 4517 4518 #define FS_IOC_SETFLAGS _IOW('f', 2, long) 4519 4520 // One does not simply remove a directory. 4521 // There can be mounts, so we need to try to umount. 4522 // Moreover, a mount can be mounted several times, so we need to try to umount in a loop. 4523 // Moreover, after umount a dir can become non-empty again, so we need another loop. 4524 // Moreover, a mount can be re-mounted as read-only and then we will fail to make a dir empty. 4525 static void remove_dir(const char* dir) 4526 { 4527 int iter = 0; 4528 DIR* dp = 0; 4529 retry: 4530 #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID 4531 // Starting from v6.9, it does no longer make sense to use MNT_DETACH, because 4532 // a loop device may only be reused in RW mode if no mounted filesystem keeps a 4533 // reference to it. So we have to umount them synchronously. 4534 // MNT_FORCE should hopefully prevent hangs for filesystems that may require a complex cleanup. 4535 const int umount_flags = MNT_FORCE | UMOUNT_NOFOLLOW; 4536 #if SYZ_EXECUTOR 4537 if (!flag_sandbox_android) 4538 #endif 4539 while (umount2(dir, umount_flags) == 0) { 4540 debug("umount(%s)\n", dir); 4541 } 4542 #endif 4543 dp = opendir(dir); 4544 if (dp == NULL) { 4545 if (errno == EMFILE) { 4546 // This happens when the test process casts prlimit(NOFILE) on us. 4547 // Ideally we somehow prevent test processes from messing with parent processes. 4548 // But full sandboxing is expensive, so let's ignore this error for now. 4549 exitf("opendir(%s) failed due to NOFILE, exiting", dir); 4550 } 4551 exitf("opendir(%s) failed", dir); 4552 } 4553 struct dirent* ep = 0; 4554 while ((ep = readdir(dp))) { 4555 if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0) 4556 continue; 4557 char filename[FILENAME_MAX]; 4558 snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name); 4559 // If it's 9p mount with broken transport, lstat will fail. 4560 // So try to umount first. 4561 #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID 4562 #if SYZ_EXECUTOR 4563 if (!flag_sandbox_android) 4564 #endif 4565 while (umount2(filename, umount_flags) == 0) { 4566 debug("umount(%s)\n", filename); 4567 } 4568 #endif 4569 struct stat st; 4570 if (lstat(filename, &st)) 4571 exitf("lstat(%s) failed", filename); 4572 if (S_ISDIR(st.st_mode)) { 4573 remove_dir(filename); 4574 continue; 4575 } 4576 int i; 4577 for (i = 0;; i++) { 4578 if (unlink(filename) == 0) 4579 break; 4580 if (errno == EPERM) { 4581 // Try to reset FS_XFLAG_IMMUTABLE. 4582 int fd = open(filename, O_RDONLY); 4583 if (fd != -1) { 4584 long flags = 0; 4585 if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) { 4586 debug("reset FS_XFLAG_IMMUTABLE\n"); 4587 } 4588 close(fd); 4589 continue; 4590 } 4591 } 4592 if (errno == EROFS) { 4593 debug("ignoring EROFS\n"); 4594 break; 4595 } 4596 if (errno != EBUSY || i > 100) 4597 exitf("unlink(%s) failed", filename); 4598 #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID 4599 #if SYZ_EXECUTOR 4600 if (!flag_sandbox_android) { 4601 #endif 4602 debug("umount(%s)\n", filename); 4603 if (umount2(filename, umount_flags)) 4604 exitf("umount(%s) failed", filename); 4605 #if SYZ_EXECUTOR 4606 } 4607 #endif 4608 #endif 4609 } 4610 } 4611 closedir(dp); 4612 for (int i = 0;; i++) { 4613 if (rmdir(dir) == 0) 4614 break; 4615 if (i < 100) { 4616 if (errno == EPERM) { 4617 // Try to reset FS_XFLAG_IMMUTABLE. 4618 int fd = open(dir, O_RDONLY); 4619 if (fd != -1) { 4620 long flags = 0; 4621 if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0) { 4622 debug("reset FS_XFLAG_IMMUTABLE\n"); 4623 } 4624 close(fd); 4625 continue; 4626 } 4627 } 4628 if (errno == EROFS) { 4629 debug("ignoring EROFS\n"); 4630 break; 4631 } 4632 if (errno == EBUSY) { 4633 #if SYZ_EXECUTOR || !SYZ_SANDBOX_ANDROID 4634 #if SYZ_EXECUTOR 4635 if (!flag_sandbox_android) { 4636 #endif 4637 debug("umount(%s)\n", dir); 4638 if (umount2(dir, umount_flags)) 4639 exitf("umount(%s) failed", dir); 4640 #if SYZ_EXECUTOR 4641 } 4642 #endif 4643 #endif 4644 continue; 4645 } 4646 if (errno == ENOTEMPTY) { 4647 if (iter < 100) { 4648 iter++; 4649 goto retry; 4650 } 4651 } 4652 } 4653 exitf("rmdir(%s) failed", dir); 4654 } 4655 } 4656 #endif 4657 4658 #if SYZ_EXECUTOR || SYZ_FAULT 4659 #include <fcntl.h> 4660 #include <string.h> 4661 #include <sys/stat.h> 4662 #include <sys/types.h> 4663 4664 static int inject_fault(int nth) 4665 { 4666 int fd; 4667 fd = open("/proc/thread-self/fail-nth", O_RDWR); 4668 // We treat errors here as temporal/non-critical because we see 4669 // occasional ENOENT/EACCES errors returned. It seems that fuzzer 4670 // somehow gets its hands to it. 4671 if (fd == -1) 4672 exitf("failed to open /proc/thread-self/fail-nth"); 4673 char buf[16]; 4674 sprintf(buf, "%d", nth); 4675 if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf)) 4676 exitf("failed to write /proc/thread-self/fail-nth"); 4677 return fd; 4678 } 4679 #endif 4680 4681 #if SYZ_EXECUTOR 4682 static int fault_injected(int fail_fd) 4683 { 4684 char buf[16]; 4685 int n = read(fail_fd, buf, sizeof(buf) - 1); 4686 if (n <= 0) 4687 exitf("failed to read /proc/thread-self/fail-nth"); 4688 int res = n == 2 && buf[0] == '0' && buf[1] == '\n'; 4689 buf[0] = '0'; 4690 if (write(fail_fd, buf, 1) != 1) 4691 exitf("failed to write /proc/thread-self/fail-nth"); 4692 close(fail_fd); 4693 return res; 4694 } 4695 #endif 4696 4697 #if (SYZ_EXECUTOR || SYZ_REPEAT) && SYZ_EXECUTOR_USES_FORK_SERVER 4698 #include <dirent.h> 4699 #include <errno.h> 4700 #include <fcntl.h> 4701 #include <signal.h> 4702 #include <string.h> 4703 #include <sys/stat.h> 4704 #include <sys/types.h> 4705 #include <sys/wait.h> 4706 4707 static void kill_and_wait(int pid, int* status) 4708 { 4709 kill(-pid, SIGKILL); 4710 kill(pid, SIGKILL); 4711 // First, give it up to 100 ms to surrender. 4712 for (int i = 0; i < 100; i++) { 4713 if (waitpid(-1, status, WNOHANG | __WALL) == pid) 4714 return; 4715 usleep(1000); 4716 } 4717 // Now, try to abort fuse connections as they cause deadlocks, 4718 // see Documentation/filesystems/fuse.txt for details. 4719 // There is no good way to figure out the right connections 4720 // provided that the process could use unshare(CLONE_NEWNS), 4721 // so we abort all. 4722 debug("kill is not working\n"); 4723 DIR* dir = opendir("/sys/fs/fuse/connections"); 4724 if (dir) { 4725 for (;;) { 4726 struct dirent* ent = readdir(dir); 4727 if (!ent) 4728 break; 4729 if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) 4730 continue; 4731 char abort[300]; 4732 snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name); 4733 int fd = open(abort, O_WRONLY); 4734 if (fd == -1) { 4735 debug("failed to open %s: %d\n", abort, errno); 4736 continue; 4737 } 4738 debug("aborting fuse conn %s\n", ent->d_name); 4739 if (write(fd, abort, 1) < 0) { 4740 debug("failed to abort: %d\n", errno); 4741 } 4742 close(fd); 4743 } 4744 closedir(dir); 4745 } else { 4746 debug("failed to open /sys/fs/fuse/connections: %d\n", errno); 4747 } 4748 // Now, just wait, no other options. 4749 while (waitpid(-1, status, __WALL) != pid) { 4750 } 4751 } 4752 #endif 4753 4754 #if (SYZ_EXECUTOR || SYZ_REPEAT && (SYZ_CGROUPS || SYZ_NET_RESET)) && SYZ_EXECUTOR_USES_FORK_SERVER 4755 #include <fcntl.h> 4756 #include <sys/ioctl.h> 4757 #include <sys/stat.h> 4758 #include <sys/types.h> 4759 #include <unistd.h> 4760 4761 #define SYZ_HAVE_SETUP_LOOP 1 4762 static void setup_loop() 4763 { 4764 #if SYZ_EXECUTOR || SYZ_CGROUPS 4765 setup_cgroups_loop(); 4766 #endif 4767 #if SYZ_EXECUTOR || SYZ_NET_RESET 4768 checkpoint_net_namespace(); 4769 #endif 4770 } 4771 #endif 4772 4773 #if (SYZ_EXECUTOR || SYZ_REPEAT && (SYZ_NET_RESET || __NR_syz_mount_image || __NR_syz_read_part_table)) && SYZ_EXECUTOR_USES_FORK_SERVER 4774 #define SYZ_HAVE_RESET_LOOP 1 4775 static void reset_loop() 4776 { 4777 #if SYZ_EXECUTOR || __NR_syz_mount_image || __NR_syz_read_part_table 4778 char buf[64]; 4779 snprintf(buf, sizeof(buf), "/dev/loop%llu", procid); 4780 int loopfd = open(buf, O_RDWR); 4781 if (loopfd != -1) { 4782 ioctl(loopfd, LOOP_CLR_FD, 0); 4783 close(loopfd); 4784 } 4785 #endif 4786 #if SYZ_EXECUTOR || SYZ_NET_RESET 4787 reset_net_namespace(); 4788 #endif 4789 } 4790 #endif 4791 4792 #if (SYZ_EXECUTOR || SYZ_REPEAT) && SYZ_EXECUTOR_USES_FORK_SERVER 4793 #include <sys/prctl.h> 4794 #include <unistd.h> 4795 4796 #define SYZ_HAVE_SETUP_TEST 1 4797 static void setup_test() 4798 { 4799 prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0); 4800 setpgrp(); 4801 #if SYZ_EXECUTOR || SYZ_CGROUPS 4802 setup_cgroups_test(); 4803 #endif 4804 // It's the leaf test process we want to be always killed first. 4805 write_file("/proc/self/oom_score_adj", "1000"); 4806 #if SYZ_EXECUTOR || SYZ_NET_INJECTION 4807 // Read all remaining packets from tun to better 4808 // isolate consequently executing programs. 4809 flush_tun(); 4810 #endif 4811 #if SYZ_EXECUTOR || SYZ_USE_TMP_DIR 4812 // Add a binderfs symlink to the tmp folder. 4813 if (symlink("/dev/binderfs", "./binderfs")) { 4814 debug("symlink(/dev/binderfs, ./binderfs) failed: %d", errno); 4815 } 4816 #endif 4817 } 4818 #endif 4819 4820 #if SYZ_EXECUTOR || SYZ_CLOSE_FDS 4821 #define SYZ_HAVE_CLOSE_FDS 1 4822 static void close_fds() 4823 { 4824 #if SYZ_EXECUTOR 4825 if (!flag_close_fds) 4826 return; 4827 #endif 4828 // Keeping a 9p transport pipe open will hang the proccess dead, 4829 // so close all opened file descriptors. 4830 // Also close all USB emulation descriptors to trigger exit from USB 4831 // event loop to collect coverage. 4832 for (int fd = 3; fd < MAX_FDS; fd++) 4833 close(fd); 4834 } 4835 #endif 4836 4837 #if SYZ_EXECUTOR || SYZ_FAULT 4838 #include <errno.h> 4839 4840 static void setup_fault() 4841 { 4842 int fd = open("/proc/self/make-it-fail", O_WRONLY); 4843 if (fd == -1) 4844 fail("CONFIG_FAULT_INJECTION is not enabled"); 4845 close(fd); 4846 4847 fd = open("/proc/thread-self/fail-nth", O_WRONLY); 4848 if (fd == -1) 4849 fail("kernel does not have systematic fault injection support"); 4850 close(fd); 4851 4852 static struct { 4853 const char* file; 4854 const char* val; 4855 bool fatal; 4856 } files[] = { 4857 {"/sys/kernel/debug/failslab/ignore-gfp-wait", "N", true}, 4858 // These are enabled by separate configs (e.g. CONFIG_FAIL_FUTEX) 4859 // and we did not check all of them in host.checkFaultInjection, so we ignore errors. 4860 {"/sys/kernel/debug/fail_futex/ignore-private", "N", false}, 4861 {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem", "N", false}, 4862 {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-wait", "N", false}, 4863 {"/sys/kernel/debug/fail_page_alloc/min-order", "0", false}, 4864 }; 4865 unsigned i; 4866 for (i = 0; i < sizeof(files) / sizeof(files[0]); i++) { 4867 if (!write_file(files[i].file, files[i].val)) { 4868 debug("failed to write %s: %d\n", files[i].file, errno); 4869 if (files[i].fatal) 4870 failmsg("failed to write fault injection file", "file=%s", files[i].file); 4871 } 4872 } 4873 } 4874 #endif 4875 4876 #if SYZ_EXECUTOR || SYZ_LEAK 4877 #include <fcntl.h> 4878 #include <stdio.h> 4879 #include <string.h> 4880 #include <sys/stat.h> 4881 #include <sys/types.h> 4882 4883 #define KMEMLEAK_FILE "/sys/kernel/debug/kmemleak" 4884 4885 static void setup_leak() 4886 { 4887 if (!write_file(KMEMLEAK_FILE, "scan=off")) { 4888 if (errno == EBUSY) 4889 fail("KMEMLEAK disabled: increase CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE" 4890 " or unset CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF"); 4891 fail("failed to write(kmemleak, \"scan=off\")"); 4892 } 4893 // Flush boot leaks. 4894 if (!write_file(KMEMLEAK_FILE, "scan")) 4895 fail("failed to write(kmemleak, \"scan\")"); 4896 sleep(5); // account for MSECS_MIN_AGE 4897 if (!write_file(KMEMLEAK_FILE, "scan")) 4898 fail("failed to write(kmemleak, \"scan\")"); 4899 if (!write_file(KMEMLEAK_FILE, "clear")) 4900 fail("failed to write(kmemleak, \"clear\")"); 4901 } 4902 4903 #define SYZ_HAVE_LEAK_CHECK 1 4904 #if SYZ_EXECUTOR 4905 static void check_leaks(char** frames, int nframes) 4906 #else 4907 static void check_leaks(void) 4908 #endif 4909 { 4910 int fd = open(KMEMLEAK_FILE, O_RDWR); 4911 if (fd == -1) 4912 fail("failed to open(kmemleak)"); 4913 // KMEMLEAK has false positives. To mitigate most of them, it checksums 4914 // potentially leaked objects, and reports them only on the next scan 4915 // iff the checksum does not change. Because of that we do the following 4916 // intricate dance: 4917 // Scan, sleep, scan again. At this point we can get some leaks. 4918 // If there are leaks, we sleep and scan again, this can remove 4919 // false leaks. Then, read kmemleak again. If we get leaks now, then 4920 // hopefully these are true positives during the previous testing cycle. 4921 uint64 start = current_time_ms(); 4922 if (write(fd, "scan", 4) != 4) 4923 fail("failed to write(kmemleak, \"scan\")"); 4924 sleep(1); 4925 // Account for MSECS_MIN_AGE 4926 // (1 second less because scanning will take at least a second). 4927 while (current_time_ms() - start < 4 * 1000) 4928 sleep(1); 4929 if (write(fd, "scan", 4) != 4) 4930 fail("failed to write(kmemleak, \"scan\")"); 4931 static char buf[128 << 10]; 4932 ssize_t n = read(fd, buf, sizeof(buf) - 1); 4933 if (n < 0) 4934 fail("failed to read(kmemleak)"); 4935 int nleaks = 0; 4936 if (n != 0) { 4937 sleep(1); 4938 if (write(fd, "scan", 4) != 4) 4939 fail("failed to write(kmemleak, \"scan\")"); 4940 if (lseek(fd, 0, SEEK_SET) < 0) 4941 fail("failed to lseek(kmemleak)"); 4942 n = read(fd, buf, sizeof(buf) - 1); 4943 if (n < 0) 4944 fail("failed to read(kmemleak)"); 4945 buf[n] = 0; 4946 char* pos = buf; 4947 char* end = buf + n; 4948 while (pos < end) { 4949 char* next = strstr(pos + 1, "unreferenced object"); 4950 if (!next) 4951 next = end; 4952 char prev = *next; 4953 *next = 0; 4954 #if SYZ_EXECUTOR 4955 int f; 4956 for (f = 0; f < nframes; f++) { 4957 if (strstr(pos, frames[f])) 4958 break; 4959 } 4960 if (f != nframes) { 4961 *next = prev; 4962 pos = next; 4963 continue; 4964 } 4965 #endif 4966 // BUG in output should be recognized by manager. 4967 fprintf(stderr, "BUG: memory leak\n%s\n", pos); 4968 *next = prev; 4969 pos = next; 4970 nleaks++; 4971 } 4972 } 4973 if (write(fd, "clear", 5) != 5) 4974 fail("failed to write(kmemleak, \"clear\")"); 4975 close(fd); 4976 if (nleaks) 4977 doexit(1); 4978 } 4979 #endif 4980 4981 #if SYZ_EXECUTOR || SYZ_BINFMT_MISC 4982 #include <fcntl.h> 4983 #include <sys/mount.h> 4984 #include <sys/stat.h> 4985 #include <sys/types.h> 4986 4987 static void setup_binfmt_misc() 4988 { 4989 if (mount(0, "/proc/sys/fs/binfmt_misc", "binfmt_misc", 0, 0)) { 4990 debug("mount(binfmt_misc) failed: %d\n", errno); 4991 return; 4992 } 4993 if (!write_file("/proc/sys/fs/binfmt_misc/register", ":syz0:M:0:\x01::./file0:") || 4994 !write_file("/proc/sys/fs/binfmt_misc/register", ":syz1:M:1:\x02::./file0:POC")) 4995 fail("write(/proc/sys/fs/binfmt_misc/register) failed"); 4996 } 4997 #endif 4998 4999 #if SYZ_EXECUTOR || SYZ_KCSAN 5000 #define KCSAN_DEBUGFS_FILE "/sys/kernel/debug/kcsan" 5001 5002 static void setup_kcsan() 5003 { 5004 if (!write_file(KCSAN_DEBUGFS_FILE, "on")) 5005 fail("write(/sys/kernel/debug/kcsan, on) failed"); 5006 } 5007 5008 #if SYZ_EXECUTOR // currently only used by executor 5009 static void setup_kcsan_filterlist(char** frames, int nframes, bool suppress) 5010 { 5011 int fd = open(KCSAN_DEBUGFS_FILE, O_WRONLY); 5012 if (fd == -1) 5013 fail("failed to open kcsan debugfs file"); 5014 5015 printf("%s KCSAN reports in functions: ", 5016 suppress ? "suppressing" : "only showing"); 5017 if (!suppress) 5018 dprintf(fd, "whitelist\n"); 5019 for (int i = 0; i < nframes; ++i) { 5020 printf("'%s' ", frames[i]); 5021 dprintf(fd, "!%s\n", frames[i]); 5022 } 5023 printf("\n"); 5024 5025 close(fd); 5026 } 5027 5028 #define SYZ_HAVE_KCSAN 1 5029 #endif 5030 #endif 5031 5032 #if SYZ_EXECUTOR || SYZ_USB 5033 static void setup_usb() 5034 { 5035 if (chmod("/dev/raw-gadget", 0666)) 5036 fail("failed to chmod /dev/raw-gadget"); 5037 } 5038 #endif 5039 5040 #if SYZ_EXECUTOR || SYZ_SYSCTL 5041 #include <errno.h> 5042 #include <stdio.h> 5043 #include <string.h> 5044 5045 static void setup_sysctl() 5046 { 5047 char mypid[32]; 5048 snprintf(mypid, sizeof(mypid), "%d", getpid()); 5049 5050 // TODO: consider moving all sysctl's into CMDLINE config later. 5051 // Kernel has support for setting sysctl's via command line since 3db978d480e28 (v5.8). 5052 struct { 5053 const char* name; 5054 const char* data; 5055 } files[] = { 5056 #if GOARCH_amd64 || GOARCH_386 5057 // nmi_check_duration() prints "INFO: NMI handler took too long" on slow debug kernels. 5058 // It happens a lot in qemu, and the messages are frequently corrupted 5059 // (intermixed with other kernel output as they are printed from NMI) 5060 // and are not matched against the suppression in pkg/report. 5061 // This write prevents these messages from being printed. 5062 {"/sys/kernel/debug/x86/nmi_longest_ns", "10000000000"}, 5063 #endif 5064 {"/proc/sys/kernel/hung_task_check_interval_secs", "20"}, 5065 // bpf_jit_kallsyms and disabling bpf_jit_harden are required 5066 // for unwinding through bpf functions. 5067 {"/proc/sys/net/core/bpf_jit_kallsyms", "1"}, 5068 {"/proc/sys/net/core/bpf_jit_harden", "0"}, 5069 // This is to provide more useful info in crash reports. 5070 {"/proc/sys/kernel/kptr_restrict", "0"}, 5071 {"/proc/sys/kernel/softlockup_all_cpu_backtrace", "1"}, 5072 // This is to restrict effects of recursive exponential mounts, for details see 5073 // "mnt: Add a per mount namespace limit on the number of mounts" commit. 5074 {"/proc/sys/fs/mount-max", "100"}, 5075 // Dumping all tasks to console can take too long. 5076 {"/proc/sys/vm/oom_dump_tasks", "0"}, 5077 // Executor hits lots of SIGSEGVs, no point in logging them. 5078 {"/proc/sys/debug/exception-trace", "0"}, 5079 {"/proc/sys/kernel/printk", "7 4 1 3"}, 5080 // Faster gc (1 second) is intended to make tests more repeatable. 5081 {"/proc/sys/kernel/keys/gc_delay", "1"}, 5082 // We always want to prefer killing the allocating test process rather than somebody else 5083 // (sshd or another random test process). 5084 {"/proc/sys/vm/oom_kill_allocating_task", "1"}, 5085 // This blocks some of the ways the fuzzer can trigger a reboot. 5086 // ctrl-alt-del=0 tells kernel to signal cad_pid instead of rebooting 5087 // and setting cad_pid to the current pid (transient "syz-executor setup") makes it a no-op. 5088 // For context see: https://groups.google.com/g/syzkaller-bugs/c/WqOY4TiRnFg/m/6P9u8lWZAQAJ 5089 {"/proc/sys/kernel/ctrl-alt-del", "0"}, 5090 {"/proc/sys/kernel/cad_pid", mypid}, 5091 }; 5092 for (size_t i = 0; i < sizeof(files) / sizeof(files[0]); i++) { 5093 if (!write_file(files[i].name, files[i].data)) 5094 printf("write to %s failed: %s\n", files[i].name, strerror(errno)); 5095 } 5096 } 5097 #endif 5098 5099 #if SYZ_EXECUTOR || SYZ_802154 5100 #include <net/if.h> 5101 #include <string.h> 5102 #include <sys/socket.h> 5103 #include <sys/types.h> 5104 5105 #define NL802154_CMD_SET_SHORT_ADDR 11 5106 #define NL802154_ATTR_IFINDEX 3 5107 #define NL802154_ATTR_SHORT_ADDR 10 5108 5109 static void setup_802154() 5110 { 5111 int sock_route = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); 5112 if (sock_route == -1) 5113 fail("socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE) failed"); 5114 int sock_generic = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 5115 if (sock_generic < 0) 5116 fail("socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC) failed"); 5117 int nl802154_family_id = netlink_query_family_id(&nlmsg, sock_generic, "nl802154", true); 5118 for (int i = 0; i < 2; i++) { 5119 // wpan0/1 are created by CONFIG_IEEE802154_HWSIM. 5120 // sys/linux/socket_ieee802154.txt knowns about these names and consts. 5121 char devname[] = "wpan0"; 5122 devname[strlen(devname) - 1] += i; 5123 uint64 hwaddr = 0xaaaaaaaaaaaa0002 + (i << 8); 5124 uint16 shortaddr = 0xaaa0 + i; 5125 int ifindex = if_nametoindex(devname); 5126 struct genlmsghdr genlhdr; 5127 memset(&genlhdr, 0, sizeof(genlhdr)); 5128 genlhdr.cmd = NL802154_CMD_SET_SHORT_ADDR; 5129 netlink_init(&nlmsg, nl802154_family_id, 0, &genlhdr, sizeof(genlhdr)); 5130 netlink_attr(&nlmsg, NL802154_ATTR_IFINDEX, &ifindex, sizeof(ifindex)); 5131 netlink_attr(&nlmsg, NL802154_ATTR_SHORT_ADDR, &shortaddr, sizeof(shortaddr)); 5132 int err = netlink_send(&nlmsg, sock_generic); 5133 if (err < 0) 5134 fail("NL802154_CMD_SET_SHORT_ADDR failed"); 5135 netlink_device_change(&nlmsg, sock_route, devname, true, 0, &hwaddr, sizeof(hwaddr), 0); 5136 if (i == 0) { 5137 netlink_add_device_impl(&nlmsg, "lowpan", "lowpan0", false); 5138 netlink_done(&nlmsg); 5139 netlink_attr(&nlmsg, IFLA_LINK, &ifindex, sizeof(ifindex)); 5140 int err = netlink_send(&nlmsg, sock_route); 5141 if (err < 0) 5142 fail("netlink: adding device lowpan0 type lowpan link wpan0"); 5143 } 5144 } 5145 close(sock_route); 5146 close(sock_generic); 5147 } 5148 #endif 5149 5150 #if GOARCH_s390x 5151 #include <sys/mman.h> 5152 // Ugly way to work around gcc's "error: function called through a non-compatible type". 5153 // Simply casting via (void*) inline does not work b/c gcc sees through a chain of casts. 5154 // The macro is used in generated C code. 5155 #define CAST(f) ({void* p = (void*)f; p; }) 5156 #endif 5157 5158 #if SYZ_EXECUTOR || __NR_syz_fuse_handle_req 5159 #include <fcntl.h> 5160 #include <stddef.h> 5161 #include <stdio.h> 5162 #include <sys/stat.h> 5163 #include <sys/types.h> 5164 5165 // From linux/fuse.h 5166 #define FUSE_MIN_READ_BUFFER 8192 5167 5168 // From linux/fuse.h 5169 enum fuse_opcode { 5170 FUSE_LOOKUP = 1, 5171 FUSE_FORGET = 2, // no reply 5172 FUSE_GETATTR = 3, 5173 FUSE_SETATTR = 4, 5174 FUSE_READLINK = 5, 5175 FUSE_SYMLINK = 6, 5176 FUSE_MKNOD = 8, 5177 FUSE_MKDIR = 9, 5178 FUSE_UNLINK = 10, 5179 FUSE_RMDIR = 11, 5180 FUSE_RENAME = 12, 5181 FUSE_LINK = 13, 5182 FUSE_OPEN = 14, 5183 FUSE_READ = 15, 5184 FUSE_WRITE = 16, 5185 FUSE_STATFS = 17, 5186 FUSE_RELEASE = 18, 5187 FUSE_FSYNC = 20, 5188 FUSE_SETXATTR = 21, 5189 FUSE_GETXATTR = 22, 5190 FUSE_LISTXATTR = 23, 5191 FUSE_REMOVEXATTR = 24, 5192 FUSE_FLUSH = 25, 5193 FUSE_INIT = 26, 5194 FUSE_OPENDIR = 27, 5195 FUSE_READDIR = 28, 5196 FUSE_RELEASEDIR = 29, 5197 FUSE_FSYNCDIR = 30, 5198 FUSE_GETLK = 31, 5199 FUSE_SETLK = 32, 5200 FUSE_SETLKW = 33, 5201 FUSE_ACCESS = 34, 5202 FUSE_CREATE = 35, 5203 FUSE_INTERRUPT = 36, 5204 FUSE_BMAP = 37, 5205 FUSE_DESTROY = 38, 5206 FUSE_IOCTL = 39, 5207 FUSE_POLL = 40, 5208 FUSE_NOTIFY_REPLY = 41, 5209 FUSE_BATCH_FORGET = 42, 5210 FUSE_FALLOCATE = 43, 5211 FUSE_READDIRPLUS = 44, 5212 FUSE_RENAME2 = 45, 5213 FUSE_LSEEK = 46, 5214 FUSE_COPY_FILE_RANGE = 47, 5215 FUSE_SETUPMAPPING = 48, 5216 FUSE_REMOVEMAPPING = 49, 5217 5218 // CUSE specific operations 5219 CUSE_INIT = 4096, 5220 5221 // Reserved opcodes: helpful to detect structure endian-ness 5222 CUSE_INIT_BSWAP_RESERVED = 1048576, // CUSE_INIT << 8 5223 FUSE_INIT_BSWAP_RESERVED = 436207616, // FUSE_INIT << 24 5224 }; 5225 5226 // From linux/fuse.h 5227 struct fuse_in_header { 5228 uint32 len; 5229 uint32 opcode; 5230 uint64 unique; 5231 uint64 nodeid; 5232 uint32 uid; 5233 uint32 gid; 5234 uint32 pid; 5235 uint32 padding; 5236 }; 5237 5238 // From linux/fuse.h 5239 struct fuse_out_header { 5240 uint32 len; 5241 // This is actually a int32_t but *_t variants fail to compile inside 5242 // the executor (it appends an additional _t for some reason) and int32 5243 // does not exist. Since we don't touch this field, defining it as 5244 // unsigned should not cause any problems. 5245 uint32 error; 5246 uint64 unique; 5247 }; 5248 5249 // Struct shared between syz_fuse_handle_req() and the fuzzer. Used to provide 5250 // a fuzzed response for each request type. 5251 struct syz_fuse_req_out { 5252 struct fuse_out_header* init; 5253 struct fuse_out_header* lseek; 5254 struct fuse_out_header* bmap; 5255 struct fuse_out_header* poll; 5256 struct fuse_out_header* getxattr; 5257 struct fuse_out_header* lk; 5258 struct fuse_out_header* statfs; 5259 struct fuse_out_header* write; 5260 struct fuse_out_header* read; 5261 struct fuse_out_header* open; 5262 struct fuse_out_header* attr; 5263 struct fuse_out_header* entry; 5264 struct fuse_out_header* dirent; 5265 struct fuse_out_header* direntplus; 5266 struct fuse_out_header* create_open; 5267 struct fuse_out_header* ioctl; 5268 }; 5269 5270 // Link the reponse to the request and send it to /dev/fuse. 5271 static int fuse_send_response(int fd, 5272 const struct fuse_in_header* in_hdr, 5273 struct fuse_out_header* out_hdr) 5274 { 5275 if (!out_hdr) { 5276 debug("fuse_send_response: received a NULL out_hdr\n"); 5277 return -1; 5278 } 5279 5280 out_hdr->unique = in_hdr->unique; 5281 if (write(fd, out_hdr, out_hdr->len) == -1) { 5282 debug("fuse_send_response > write failed: %d\n", errno); 5283 return -1; 5284 } 5285 5286 return 0; 5287 } 5288 5289 // This function reads a request from /dev/fuse and tries to pick the correct 5290 // response from the input struct syz_fuse_req_out (a3). Responses are still 5291 // generated by the fuzzer. 5292 static volatile long syz_fuse_handle_req(volatile long a0, // /dev/fuse fd. 5293 volatile long a1, // Read buffer. 5294 volatile long a2, // Buffer len. 5295 volatile long a3) // syz_fuse_req_out. 5296 { 5297 struct syz_fuse_req_out* req_out = (struct syz_fuse_req_out*)a3; 5298 struct fuse_out_header* out_hdr = NULL; 5299 char* buf = (char*)a1; 5300 int buf_len = (int)a2; 5301 int fd = (int)a0; 5302 5303 if (!req_out) { 5304 debug("syz_fuse_handle_req: received a NULL syz_fuse_req_out\n"); 5305 return -1; 5306 } 5307 if (buf_len < FUSE_MIN_READ_BUFFER) { 5308 debug("FUSE requires the read buffer to be at least %u\n", FUSE_MIN_READ_BUFFER); 5309 return -1; 5310 } 5311 5312 int ret = read(fd, buf, buf_len); 5313 if (ret == -1) { 5314 debug("syz_fuse_handle_req > read failed: %d\n", errno); 5315 return -1; 5316 } 5317 // Safe to do because ret > 0 (!= -1) and < FUSE_MIN_READ_BUFFER (= 8192). 5318 if ((size_t)ret < sizeof(struct fuse_in_header)) { 5319 debug("syz_fuse_handle_req: received a truncated FUSE header\n"); 5320 return -1; 5321 } 5322 5323 const struct fuse_in_header* in_hdr = (const struct fuse_in_header*)buf; 5324 debug("syz_fuse_handle_req: received opcode %d\n", in_hdr->opcode); 5325 if (in_hdr->len > (uint32)ret) { 5326 debug("syz_fuse_handle_req: received a truncated message\n"); 5327 return -1; 5328 } 5329 5330 switch (in_hdr->opcode) { 5331 case FUSE_GETATTR: 5332 case FUSE_SETATTR: 5333 out_hdr = req_out->attr; 5334 break; 5335 case FUSE_LOOKUP: 5336 case FUSE_SYMLINK: 5337 case FUSE_LINK: 5338 case FUSE_MKNOD: 5339 case FUSE_MKDIR: 5340 out_hdr = req_out->entry; 5341 break; 5342 case FUSE_OPEN: 5343 case FUSE_OPENDIR: 5344 out_hdr = req_out->open; 5345 break; 5346 case FUSE_STATFS: 5347 out_hdr = req_out->statfs; 5348 break; 5349 case FUSE_RMDIR: 5350 case FUSE_RENAME: 5351 case FUSE_RENAME2: 5352 case FUSE_FALLOCATE: 5353 case FUSE_SETXATTR: 5354 case FUSE_REMOVEXATTR: 5355 case FUSE_FSYNCDIR: 5356 case FUSE_FSYNC: 5357 case FUSE_SETLKW: 5358 case FUSE_SETLK: 5359 case FUSE_ACCESS: 5360 case FUSE_FLUSH: 5361 case FUSE_RELEASE: 5362 case FUSE_RELEASEDIR: 5363 case FUSE_UNLINK: 5364 case FUSE_DESTROY: 5365 // These opcodes do not have any reply data. Hence, we pick 5366 // another response and only use the shared header. 5367 out_hdr = req_out->init; 5368 if (!out_hdr) { 5369 debug("syz_fuse_handle_req: received a NULL out_hdr\n"); 5370 return -1; 5371 } 5372 out_hdr->len = sizeof(struct fuse_out_header); 5373 break; 5374 case FUSE_READ: 5375 out_hdr = req_out->read; 5376 break; 5377 case FUSE_READDIR: 5378 out_hdr = req_out->dirent; 5379 break; 5380 case FUSE_READDIRPLUS: 5381 out_hdr = req_out->direntplus; 5382 break; 5383 case FUSE_INIT: 5384 out_hdr = req_out->init; 5385 break; 5386 case FUSE_LSEEK: 5387 out_hdr = req_out->lseek; 5388 break; 5389 case FUSE_GETLK: 5390 out_hdr = req_out->lk; 5391 break; 5392 case FUSE_BMAP: 5393 out_hdr = req_out->bmap; 5394 break; 5395 case FUSE_POLL: 5396 out_hdr = req_out->poll; 5397 break; 5398 case FUSE_GETXATTR: 5399 case FUSE_LISTXATTR: 5400 out_hdr = req_out->getxattr; 5401 break; 5402 case FUSE_WRITE: 5403 case FUSE_COPY_FILE_RANGE: 5404 out_hdr = req_out->write; 5405 break; 5406 case FUSE_FORGET: 5407 case FUSE_BATCH_FORGET: 5408 // FUSE_FORGET and FUSE_BATCH_FORGET expect no reply. 5409 return 0; 5410 case FUSE_CREATE: 5411 out_hdr = req_out->create_open; 5412 break; 5413 case FUSE_IOCTL: 5414 out_hdr = req_out->ioctl; 5415 break; 5416 default: 5417 debug("syz_fuse_handle_req: unknown FUSE opcode\n"); 5418 return -1; 5419 } 5420 5421 return fuse_send_response(fd, in_hdr, out_hdr); 5422 } 5423 #endif 5424 5425 #if SYZ_EXECUTOR || __NR_syz_80211_inject_frame 5426 #include <errno.h> 5427 #include <linux/genetlink.h> 5428 #include <linux/if_ether.h> 5429 #include <linux/nl80211.h> 5430 #include <net/if.h> 5431 #include <sys/ioctl.h> 5432 5433 // This pseudo syscall performs 802.11 frame injection. 5434 // 5435 // Its current implementation performs the injection by means of mac80211_hwsim. 5436 // The procedure consists of the following steps: 5437 // 1. Open a netlink socket 5438 // 2. Register as an application responsible for wireless medium simulation by executing 5439 // HWSIM_CMD_REGISTER. This is a preq-requisite for the following step. After HWSIM_CMD_REGISTER 5440 // is executed, mac80211_hwsim stops simulating a perfect medium. 5441 // It is also important to note that this command registers a specific socket, not a netlink port. 5442 // 3. Inject a frame to the required interface by executing HWSIM_CMD_FRAME. 5443 // 4. Close the socket. mac80211_hwsim will detect this and return to perfect medium simulation. 5444 // 5445 // Note that we cannot (should not) open a socket, register it once and then use it for frame injection 5446 // throughout the lifetime of a proc. When some socket is registered, mac80211_hwsim does not broadcast 5447 // frames to all interfaces itself. As we do not perform this activity either, a permanently registered 5448 // socket will disrupt normal network operation. 5449 5450 #define HWSIM_ATTR_RX_RATE 5 5451 #define HWSIM_ATTR_SIGNAL 6 5452 #define HWSIM_ATTR_ADDR_RECEIVER 1 5453 #define HWSIM_ATTR_FRAME 3 5454 5455 #define WIFI_MAX_INJECT_LEN 2048 5456 5457 static int hwsim_register_socket(struct nlmsg* nlmsg, int sock, int hwsim_family) 5458 { 5459 struct genlmsghdr genlhdr; 5460 memset(&genlhdr, 0, sizeof(genlhdr)); 5461 genlhdr.cmd = HWSIM_CMD_REGISTER; 5462 netlink_init(nlmsg, hwsim_family, 0, &genlhdr, sizeof(genlhdr)); 5463 int err = netlink_send_ext(nlmsg, sock, 0, NULL, false); 5464 if (err < 0) { 5465 debug("hwsim_register_device failed: %s\n", strerror(errno)); 5466 } 5467 return err; 5468 } 5469 5470 static int hwsim_inject_frame(struct nlmsg* nlmsg, int sock, int hwsim_family, uint8* mac_addr, uint8* data, int len) 5471 { 5472 struct genlmsghdr genlhdr; 5473 uint32 rx_rate = WIFI_DEFAULT_RX_RATE; 5474 uint32 signal = WIFI_DEFAULT_SIGNAL; 5475 5476 memset(&genlhdr, 0, sizeof(genlhdr)); 5477 genlhdr.cmd = HWSIM_CMD_FRAME; 5478 netlink_init(nlmsg, hwsim_family, 0, &genlhdr, sizeof(genlhdr)); 5479 netlink_attr(nlmsg, HWSIM_ATTR_RX_RATE, &rx_rate, sizeof(rx_rate)); 5480 netlink_attr(nlmsg, HWSIM_ATTR_SIGNAL, &signal, sizeof(signal)); 5481 netlink_attr(nlmsg, HWSIM_ATTR_ADDR_RECEIVER, mac_addr, ETH_ALEN); 5482 netlink_attr(nlmsg, HWSIM_ATTR_FRAME, data, len); 5483 int err = netlink_send_ext(nlmsg, sock, 0, NULL, false); 5484 if (err < 0) { 5485 debug("hwsim_inject_frame failed: %s\n", strerror(errno)); 5486 } 5487 return err; 5488 } 5489 5490 static long syz_80211_inject_frame(volatile long a0, volatile long a1, volatile long a2) 5491 { 5492 uint8* mac_addr = (uint8*)a0; 5493 uint8* buf = (uint8*)a1; 5494 int buf_len = (int)a2; 5495 struct nlmsg tmp_msg; 5496 5497 if (buf_len < 0 || buf_len > WIFI_MAX_INJECT_LEN) { 5498 debug("syz_80211_inject_frame: wrong buffer size %d\n", buf_len); 5499 return -1; 5500 } 5501 5502 int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 5503 if (sock < 0) { 5504 debug("syz_80211_inject_frame: socket creation failed, errno %d\n", errno); 5505 return -1; 5506 } 5507 5508 int hwsim_family_id = netlink_query_family_id(&tmp_msg, sock, "MAC80211_HWSIM", false); 5509 int ret = hwsim_register_socket(&tmp_msg, sock, hwsim_family_id); 5510 if (ret < 0) { 5511 debug("syz_80211_inject_frame: failed to register socket, ret %d\n", ret); 5512 close(sock); 5513 return -1; 5514 } 5515 5516 ret = hwsim_inject_frame(&tmp_msg, sock, hwsim_family_id, mac_addr, buf, buf_len); 5517 close(sock); 5518 if (ret < 0) { 5519 debug("syz_80211_inject_frame: failed to inject message, ret %d\n", ret); 5520 return -1; 5521 } 5522 5523 return 0; 5524 } 5525 5526 #endif 5527 5528 #if SYZ_EXECUTOR || __NR_syz_80211_join_ibss 5529 5530 #define WIFI_MAX_SSID_LEN 32 5531 5532 #define WIFI_JOIN_IBSS_NO_SCAN 0 5533 #define WIFI_JOIN_IBSS_BG_SCAN 1 5534 #define WIFI_JOIN_IBSS_BG_NO_SCAN 2 5535 5536 static long syz_80211_join_ibss(volatile long a0, volatile long a1, volatile long a2, volatile long a3) 5537 { 5538 char* interface = (char*)a0; 5539 uint8* ssid = (uint8*)a1; 5540 int ssid_len = (int)a2; 5541 int mode = (int)a3; // This parameter essentially determines whether it will perform a scan 5542 5543 struct nlmsg tmp_msg; 5544 uint8 bssid[ETH_ALEN] = WIFI_IBSS_BSSID; 5545 5546 if (ssid_len < 0 || ssid_len > WIFI_MAX_SSID_LEN) { 5547 debug("syz_80211_join_ibss: invalid ssid len %d\n", ssid_len); 5548 return -1; 5549 } 5550 5551 if (mode < 0 || mode > WIFI_JOIN_IBSS_BG_NO_SCAN) { 5552 debug("syz_80211_join_ibss: invalid mode %d\n", mode); 5553 return -1; 5554 } 5555 5556 int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 5557 if (sock < 0) { 5558 debug("syz_80211_join_ibss: socket creation failed, errno %d\n", errno); 5559 return -1; 5560 } 5561 5562 int nl80211_family_id = netlink_query_family_id(&tmp_msg, sock, "nl80211", false); 5563 struct join_ibss_props ibss_props = { 5564 .wiphy_freq = WIFI_DEFAULT_FREQUENCY, 5565 .wiphy_freq_fixed = (mode == WIFI_JOIN_IBSS_NO_SCAN || mode == WIFI_JOIN_IBSS_BG_NO_SCAN), 5566 .mac = bssid, 5567 .ssid = ssid, 5568 .ssid_len = ssid_len}; 5569 5570 int ret = nl80211_setup_ibss_interface(&tmp_msg, sock, nl80211_family_id, interface, &ibss_props, false); 5571 close(sock); 5572 if (ret < 0) { 5573 debug("syz_80211_join_ibss: failed set up IBSS network for %.32s\n", interface); 5574 return -1; 5575 } 5576 5577 if (mode == WIFI_JOIN_IBSS_NO_SCAN) { 5578 ret = await_ifla_operstate(&tmp_msg, interface, IF_OPER_UP, false); 5579 if (ret < 0) { 5580 debug("syz_80211_join_ibss: await_ifla_operstate failed for %.32s, ret %d\n", interface, ret); 5581 return -1; 5582 } 5583 } 5584 5585 return 0; 5586 } 5587 5588 #endif 5589 5590 #if SYZ_EXECUTOR || __NR_syz_clone || __NR_syz_clone3 5591 #if SYZ_EXECUTOR 5592 // The slowdown multiplier is already taken into account. 5593 #define USLEEP_FORKED_CHILD (3 * syscall_timeout_ms * 1000) 5594 #else 5595 #define USLEEP_FORKED_CHILD (3 * /*{{{BASE_CALL_TIMEOUT_MS}}}*/ *1000) 5596 #endif 5597 5598 static long handle_clone_ret(long ret) 5599 { 5600 if (ret != 0) { 5601 #if SYZ_EXECUTOR || SYZ_HANDLE_SEGV 5602 __atomic_store_n(&clone_ongoing, 0, __ATOMIC_RELAXED); 5603 #endif 5604 return ret; 5605 } 5606 // Exit if we're in the child process - not all kernels provide the proper means 5607 // to prevent fork-bombs. 5608 // But first sleep for some time. This will hopefully foster IPC fuzzing. 5609 usleep(USLEEP_FORKED_CHILD); 5610 // Note that exit_group is a bad choice here because if we created just a thread, then 5611 // the whole process will be killed. A plain exit will work fine in any case. 5612 syscall(__NR_exit, 0); 5613 while (1) { 5614 } 5615 } 5616 #endif 5617 5618 #if SYZ_EXECUTOR || __NR_syz_clone 5619 #include <sched.h> 5620 5621 // syz_clone is mostly needed on kernels which do not suport clone3. 5622 static long syz_clone(volatile long flags, volatile long stack, volatile long stack_len, 5623 volatile long ptid, volatile long ctid, volatile long tls) 5624 { 5625 // ABI requires 16-byte stack alignment. 5626 long sp = (stack + stack_len) & ~15; 5627 #if SYZ_EXECUTOR || SYZ_HANDLE_SEGV 5628 __atomic_store_n(&clone_ongoing, 1, __ATOMIC_RELAXED); 5629 #endif 5630 // Clear the CLONE_VM flag. Otherwise it'll very likely corrupt syz-executor. 5631 long ret = (long)syscall(__NR_clone, flags & ~CLONE_VM, sp, ptid, ctid, tls); 5632 return handle_clone_ret(ret); 5633 } 5634 #endif 5635 5636 #if SYZ_EXECUTOR || __NR_syz_clone3 5637 #include <linux/sched.h> 5638 #include <sched.h> 5639 5640 #define MAX_CLONE_ARGS_BYTES 256 5641 static long syz_clone3(volatile long a0, volatile long a1) 5642 { 5643 unsigned long copy_size = a1; 5644 if (copy_size < sizeof(uint64) || copy_size > MAX_CLONE_ARGS_BYTES) 5645 return -1; 5646 // The structure may have different sizes on different kernel versions, so copy it as raw bytes. 5647 char clone_args[MAX_CLONE_ARGS_BYTES]; 5648 memcpy(&clone_args, (void*)a0, copy_size); 5649 5650 // As in syz_clone, clear the CLONE_VM flag. Flags are in the first 8-byte integer field. 5651 uint64* flags = (uint64*)&clone_args; 5652 *flags &= ~CLONE_VM; 5653 #if SYZ_EXECUTOR || SYZ_HANDLE_SEGV 5654 __atomic_store_n(&clone_ongoing, 1, __ATOMIC_RELAXED); 5655 #endif 5656 return handle_clone_ret((long)syscall(__NR_clone3, &clone_args, copy_size)); 5657 } 5658 5659 #endif 5660 5661 #if SYZ_EXECUTOR || __NR_syz_pkey_set 5662 // syz_pkey_set(key pkey, val flags[pkey_flags]) 5663 static long syz_pkey_set(volatile long pkey, volatile long val) 5664 { 5665 #if GOARCH_amd64 || GOARCH_386 5666 uint32 eax = 0; 5667 uint32 ecx = 0; 5668 asm volatile("rdpkru" 5669 : "=a"(eax) 5670 : "c"(ecx) 5671 : "edx"); 5672 // PKRU register contains 2 bits per key. 5673 // Max number of keys is 16. 5674 // Clear old bits for the key: 5675 eax &= ~(3 << ((pkey % 16) * 2)); 5676 // Set new bits for the key: 5677 eax |= (val & 3) << ((pkey % 16) * 2); 5678 uint32 edx = 0; 5679 asm volatile("wrpkru" ::"a"(eax), "c"(ecx), "d"(edx)); 5680 #endif 5681 return 0; 5682 } 5683 #endif 5684 5685 #if SYZ_EXECUTOR || SYZ_SWAP 5686 #include <fcntl.h> 5687 #include <linux/falloc.h> 5688 #include <stdio.h> 5689 #include <string.h> 5690 #include <sys/stat.h> 5691 #include <sys/swap.h> 5692 #include <sys/types.h> 5693 5694 #define SWAP_FILE "./swap-file" 5695 #define SWAP_FILE_SIZE (128 * 1000 * 1000) // 128 MB. 5696 5697 static void setup_swap() 5698 { 5699 // The call must be idempotent, so first disable swap and remove the swap file. 5700 swapoff(SWAP_FILE); 5701 unlink(SWAP_FILE); 5702 // Zero-fill the file. 5703 int fd = open(SWAP_FILE, O_CREAT | O_WRONLY | O_CLOEXEC, 0600); 5704 if (fd == -1) 5705 failmsg("swap file open failed", "file: %s", SWAP_FILE); 5706 // We cannot do ftruncate -- swapon complains about this. Do fallocate instead. 5707 fallocate(fd, FALLOC_FL_ZERO_RANGE, 0, SWAP_FILE_SIZE); 5708 close(fd); 5709 // Set up the swap file. 5710 char cmdline[64]; 5711 sprintf(cmdline, "mkswap %s", SWAP_FILE); 5712 if (runcmdline(cmdline)) 5713 fail("mkswap failed"); 5714 if (swapon(SWAP_FILE, SWAP_FLAG_PREFER) == 1) 5715 failmsg("swapon failed", "file: %s", SWAP_FILE); 5716 } 5717 5718 #endif 5719 5720 #if SYZ_EXECUTOR || __NR_syz_pidfd_open 5721 #include <sys/syscall.h> 5722 5723 // TODO: long-term we should improve our sandboxing rules since there are also 5724 // many other opportunities for a fuzzer process to access what it shouldn't. 5725 // Here we only shut down one of the recently discovered ways. 5726 static long syz_pidfd_open(volatile long pid, volatile long flags) 5727 { 5728 if (pid == 1) { 5729 // Under a PID namespace, pid=1 is the parent process. 5730 // We don't want a forked child to mangle parent syz-executor's fds. 5731 pid = 0; 5732 } 5733 return syscall(__NR_pidfd_open, pid, flags); 5734 } 5735 5736 #endif