gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/test/syscalls/linux/tuntap.cc (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include <arpa/inet.h>
    16  #include <asm-generic/errno.h>
    17  #include <linux/capability.h>
    18  #include <linux/if_arp.h>
    19  #include <linux/if_ether.h>
    20  #include <linux/if_tun.h>
    21  #include <netinet/ip.h>
    22  #include <netinet/ip_icmp.h>
    23  #include <poll.h>
    24  #include <sys/ioctl.h>
    25  #include <sys/socket.h>
    26  #include <sys/types.h>
    27  
    28  #include <cstddef>
    29  #include <cstring>
    30  
    31  #include "gmock/gmock.h"
    32  #include "gtest/gtest.h"
    33  #include "absl/strings/ascii.h"
    34  #include "absl/strings/str_split.h"
    35  #include "test/syscalls/linux/socket_netlink_route_util.h"
    36  #include "test/util/capability_util.h"
    37  #include "test/util/file_descriptor.h"
    38  #include "test/util/fs_util.h"
    39  #include "test/util/posix_error.h"
    40  #include "test/util/socket_util.h"
    41  #include "test/util/test_util.h"
    42  
    43  namespace gvisor {
    44  namespace testing {
    45  namespace {
    46  
    47  constexpr int kIPLen = 4;
    48  
    49  constexpr const char kDevNetTun[] = "/dev/net/tun";
    50  constexpr const char kTapName[] = "tap0";
    51  constexpr const char kTunName[] = "tun0";
    52  
    53  #define kTapIPAddr htonl(0x0a000001)     /* Inet 10.0.0.1 */
    54  #define kTapPeerIPAddr htonl(0x0a000002) /* Inet 10.0.0.2 */
    55  
    56  constexpr const uint8_t kMacA[ETH_ALEN] = {0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA};
    57  constexpr const uint8_t kMacB[ETH_ALEN] = {0xBB, 0xBB, 0xBB, 0xBB, 0xBB, 0xBB};
    58  
    59  PosixErrorOr<std::set<std::string>> DumpLinkNames() {
    60    ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks());
    61    std::set<std::string> names;
    62    for (const auto& link : links) {
    63      names.emplace(link.name);
    64    }
    65    return names;
    66  }
    67  
    68  PosixErrorOr<Link> GetLinkByName(const std::string& name) {
    69    ASSIGN_OR_RETURN_ERRNO(auto links, DumpLinks());
    70    for (const auto& link : links) {
    71      if (link.name == name) {
    72        return link;
    73      }
    74    }
    75    return PosixError(ENOENT, "interface not found");
    76  }
    77  
    78  struct ping_ip_pkt {
    79    iphdr ip;
    80    icmphdr icmp;
    81    char payload[64];
    82  } __attribute__((packed));
    83  
    84  ping_ip_pkt CreatePingIPPacket(const in_addr_t srcip, const in_addr_t dstip) {
    85    ping_ip_pkt pkt = {};
    86  
    87    pkt.ip.ihl = 5;
    88    pkt.ip.version = 4;
    89    pkt.ip.tos = 0;
    90    pkt.ip.tot_len = htons(sizeof(struct iphdr) + sizeof(struct icmphdr) +
    91                           sizeof(pkt.payload));
    92    pkt.ip.id = 1;
    93    pkt.ip.frag_off = 1 << 6;  // Do not fragment
    94    pkt.ip.ttl = 64;
    95    pkt.ip.protocol = IPPROTO_ICMP;
    96    pkt.ip.daddr = dstip;
    97    pkt.ip.saddr = srcip;
    98    pkt.ip.check = IPChecksum(pkt.ip);
    99  
   100    pkt.icmp.type = ICMP_ECHO;
   101    pkt.icmp.code = 0;
   102    pkt.icmp.checksum = 0;
   103    pkt.icmp.un.echo.sequence = 1;
   104    pkt.icmp.un.echo.id = 1;
   105  
   106    strncpy(pkt.payload, "abcd", sizeof(pkt.payload));
   107    pkt.icmp.checksum = ICMPChecksum(pkt.icmp, pkt.payload, sizeof(pkt.payload));
   108  
   109    return pkt;
   110  }
   111  
   112  struct pihdr {
   113    uint16_t pi_flags;
   114    uint16_t pi_protocol;
   115  } __attribute__((packed));
   116  
   117  struct ping_pkt {
   118    pihdr pi;
   119    ethhdr eth;
   120    ping_ip_pkt ip_pkt;
   121  } __attribute__((packed));
   122  
   123  ping_pkt CreatePingPacket(const uint8_t srcmac[ETH_ALEN], const in_addr_t srcip,
   124                            const uint8_t dstmac[ETH_ALEN],
   125                            const in_addr_t dstip) {
   126    ping_pkt pkt = {};
   127  
   128    pkt.pi.pi_protocol = htons(ETH_P_IP);
   129  
   130    memcpy(pkt.eth.h_dest, dstmac, sizeof(pkt.eth.h_dest));
   131    memcpy(pkt.eth.h_source, srcmac, sizeof(pkt.eth.h_source));
   132    pkt.eth.h_proto = htons(ETH_P_IP);
   133  
   134    pkt.ip_pkt = CreatePingIPPacket(srcip, dstip);
   135  
   136    return pkt;
   137  }
   138  
   139  struct arp_pkt {
   140    pihdr pi;
   141    struct ethhdr eth;
   142    struct arphdr arp;
   143    uint8_t arp_sha[ETH_ALEN];
   144    uint8_t arp_spa[kIPLen];
   145    uint8_t arp_tha[ETH_ALEN];
   146    uint8_t arp_tpa[kIPLen];
   147  } __attribute__((packed));
   148  
   149  std::string CreateArpPacket(const uint8_t srcmac[ETH_ALEN],
   150                              const in_addr_t srcip,
   151                              const uint8_t dstmac[ETH_ALEN],
   152                              const in_addr_t dstip) {
   153    std::string buffer;
   154    buffer.resize(sizeof(arp_pkt));
   155  
   156    arp_pkt* pkt = reinterpret_cast<arp_pkt*>(&buffer[0]);
   157    {
   158      pkt->pi.pi_protocol = htons(ETH_P_ARP);
   159  
   160      memcpy(pkt->eth.h_dest, kMacA, sizeof(pkt->eth.h_dest));
   161      memcpy(pkt->eth.h_source, kMacB, sizeof(pkt->eth.h_source));
   162      pkt->eth.h_proto = htons(ETH_P_ARP);
   163  
   164      pkt->arp.ar_hrd = htons(ARPHRD_ETHER);
   165      pkt->arp.ar_pro = htons(ETH_P_IP);
   166      pkt->arp.ar_hln = ETH_ALEN;
   167      pkt->arp.ar_pln = kIPLen;
   168      pkt->arp.ar_op = htons(ARPOP_REPLY);
   169  
   170      memcpy(pkt->arp_sha, srcmac, sizeof(pkt->arp_sha));
   171      memcpy(pkt->arp_spa, &srcip, sizeof(pkt->arp_spa));
   172      memcpy(pkt->arp_tha, dstmac, sizeof(pkt->arp_tha));
   173      memcpy(pkt->arp_tpa, &dstip, sizeof(pkt->arp_tpa));
   174    }
   175    return buffer;
   176  }
   177  
   178  }  // namespace
   179  
   180  TEST(TuntapStaticTest, NetTunExists) {
   181    struct stat statbuf;
   182    ASSERT_THAT(stat(kDevNetTun, &statbuf), SyscallSucceeds());
   183    // Check that it's a character device with rw-rw-rw- permissions.
   184    EXPECT_EQ(statbuf.st_mode, S_IFCHR | 0666);
   185  }
   186  
   187  class TuntapTest : public ::testing::Test {
   188   protected:
   189    void SetUp() override {
   190      const bool have_net_admin_cap =
   191          ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN));
   192  
   193      if (have_net_admin_cap && !IsRunningOnGvisor()) {
   194        // gVisor always creates enabled/up'd interfaces, while Linux does not (as
   195        // observed in b/110961832). Some of the tests require the Linux stack to
   196        // notify the socket of any link-address-resolution failures. Those
   197        // notifications do not seem to show up when the loopback interface in the
   198        // namespace is down.
   199        auto link = ASSERT_NO_ERRNO_AND_VALUE(GetLinkByName("lo"));
   200        ASSERT_NO_ERRNO(LinkChangeFlags(link.index, IFF_UP, IFF_UP));
   201      }
   202    }
   203  };
   204  
   205  TEST_F(TuntapTest, CreateInterfaceNoCap) {
   206    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
   207  
   208    AutoCapability cap(CAP_NET_ADMIN, false);
   209  
   210    FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
   211  
   212    struct ifreq ifr = {};
   213    ifr.ifr_flags = IFF_TAP;
   214    strncpy(ifr.ifr_name, kTapName, IFNAMSIZ);
   215  
   216    EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallFailsWithErrno(EPERM));
   217  }
   218  
   219  TEST_F(TuntapTest, CreateFixedNameInterface) {
   220    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
   221  
   222    FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
   223  
   224    struct ifreq ifr_set;
   225    memset(&ifr_set, 0, sizeof(ifr_set));
   226    ifr_set.ifr_flags = IFF_TAP;
   227    strncpy(ifr_set.ifr_name, kTapName, IFNAMSIZ);
   228    EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr_set),
   229                SyscallSucceedsWithValue(0));
   230  
   231    struct ifreq ifr_get;
   232    memset(&ifr_get, 0, sizeof(ifr_get));
   233    EXPECT_THAT(ioctl(fd.get(), TUNGETIFF, &ifr_get),
   234                SyscallSucceedsWithValue(0));
   235  
   236    struct ifreq ifr_expect = ifr_set;
   237    // See __tun_chr_ioctl() in net/drivers/tun.c.
   238    ifr_expect.ifr_flags |= IFF_NOFILTER;
   239  
   240    EXPECT_THAT(DumpLinkNames(),
   241                IsPosixErrorOkAndHolds(::testing::Contains(kTapName)));
   242    EXPECT_THAT(memcmp(&ifr_expect, &ifr_get, sizeof(ifr_get)), ::testing::Eq(0));
   243  }
   244  
   245  TEST_F(TuntapTest, CreateInterface) {
   246    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
   247  
   248    FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
   249  
   250    struct ifreq ifr = {};
   251    ifr.ifr_flags = IFF_TAP;
   252    // Empty ifr.ifr_name. Let kernel assign.
   253  
   254    EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallSucceedsWithValue(0));
   255  
   256    struct ifreq ifr_get = {};
   257    EXPECT_THAT(ioctl(fd.get(), TUNGETIFF, &ifr_get),
   258                SyscallSucceedsWithValue(0));
   259  
   260    std::string ifname = ifr_get.ifr_name;
   261    EXPECT_THAT(ifname, ::testing::StartsWith("tap"));
   262    EXPECT_THAT(DumpLinkNames(),
   263                IsPosixErrorOkAndHolds(::testing::Contains(ifname)));
   264  }
   265  
   266  TEST_F(TuntapTest, InvalidReadWrite) {
   267    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
   268  
   269    FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
   270  
   271    char buf[128] = {};
   272    EXPECT_THAT(read(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EBADFD));
   273    EXPECT_THAT(write(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EBADFD));
   274  }
   275  
   276  TEST_F(TuntapTest, ZeroWrite) {
   277    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
   278  
   279    FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
   280    struct ifreq ifr_set = {};
   281    ifr_set.ifr_flags = IFF_TUN | IFF_NO_PI;
   282    strncpy(ifr_set.ifr_name, kTunName, IFNAMSIZ);
   283    EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr_set), SyscallSucceeds());
   284    EXPECT_THAT(write(fd.get(), nullptr, 0), SyscallFailsWithErrno(EINVAL));
   285  }
   286  
   287  TEST_F(TuntapTest, WriteToDownDevice) {
   288    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
   289  
   290    // FIXME(b/110961832): gVisor always creates enabled/up'd interfaces.
   291    SKIP_IF(IsRunningOnGvisor());
   292  
   293    FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
   294  
   295    // Device created should be down by default.
   296    struct ifreq ifr = {};
   297    ifr.ifr_flags = IFF_TAP;
   298    EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr), SyscallSucceedsWithValue(0));
   299  
   300    char buf[128] = {};
   301    EXPECT_THAT(write(fd.get(), buf, sizeof(buf)), SyscallFailsWithErrno(EIO));
   302  }
   303  
   304  struct TunTapInterface {
   305    FileDescriptor fd;
   306    Link link;
   307  };
   308  
   309  PosixErrorOr<TunTapInterface> OpenAndAttachTunTap(const std::string& dev_name,
   310                                                    const in_addr_t dev_addr,
   311                                                    bool tap, bool no_pi) {
   312    // Interface creation.
   313    ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, Open(kDevNetTun, O_RDWR));
   314  
   315    struct ifreq ifr_set = {};
   316    if (tap) {
   317      ifr_set.ifr_flags |= IFF_TAP;
   318    } else {
   319      ifr_set.ifr_flags |= IFF_TUN;
   320    }
   321    if (no_pi) {
   322      ifr_set.ifr_flags |= IFF_NO_PI;
   323    }
   324    strncpy(ifr_set.ifr_name, dev_name.c_str(), IFNAMSIZ);
   325    if (ioctl(fd.get(), TUNSETIFF, &ifr_set) < 0) {
   326      return PosixError(errno);
   327    }
   328  
   329    ASSIGN_OR_RETURN_ERRNO(auto link, GetLinkByName(dev_name));
   330  
   331    const struct in_addr dev_ipv4_addr = {.s_addr = dev_addr};
   332    // Interface setup.
   333    EXPECT_NO_ERRNO(LinkAddLocalAddr(link.index, AF_INET, /*prefixlen=*/24,
   334                                     &dev_ipv4_addr, sizeof(dev_ipv4_addr)));
   335  
   336    if (!IsRunningOnGvisor()) {
   337      // FIXME(b/110961832): gVisor doesn't support setting MAC address on
   338      // interfaces yet.
   339      if (tap) {
   340        RETURN_IF_ERRNO(LinkSetMacAddr(link.index, kMacA, sizeof(kMacA)));
   341      }
   342  
   343      // FIXME(b/110961832): gVisor always creates enabled/up'd interfaces.
   344      RETURN_IF_ERRNO(LinkChangeFlags(link.index, IFF_UP, IFF_UP));
   345    }
   346  
   347    return TunTapInterface{.fd = std::move(fd), .link = std::move(link)};
   348  }
   349  
   350  // This test sets up a TAP device and pings kernel by sending ICMP echo request.
   351  //
   352  // It works as the following:
   353  // * Open /dev/net/tun, and create kTapName interface.
   354  // * Use rtnetlink to do initial setup of the interface:
   355  //   * Assign IP address 10.0.0.1/24 to kernel.
   356  //   * MAC address: kMacA
   357  //   * Bring up the interface.
   358  // * Send an ICMP echo reqest (ping) packet from 10.0.0.2 (kMacB) to kernel.
   359  // * Loop to receive packets from TAP device/fd:
   360  //   * If packet is an ICMP echo reply, it stops and passes the test.
   361  //   * If packet is an ARP request, it responds with canned reply and resends
   362  //   the
   363  //     ICMP request packet.
   364  TEST_F(TuntapTest, PingKernel) {
   365    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
   366  
   367    const auto& [fd, link] = ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTunTap(
   368        kTapName, kTapIPAddr, true /* tap */, false /* no_pi */));
   369  
   370    ping_pkt ping_req =
   371        CreatePingPacket(kMacB, kTapPeerIPAddr, kMacA, kTapIPAddr);
   372    std::string arp_rep =
   373        CreateArpPacket(kMacB, kTapPeerIPAddr, kMacA, kTapIPAddr);
   374  
   375    // Send ping, this would trigger an ARP request on Linux.
   376    EXPECT_THAT(write(fd.get(), &ping_req, sizeof(ping_req)),
   377                SyscallSucceedsWithValue(sizeof(ping_req)));
   378  
   379    // Receive loop to process inbound packets.
   380    struct inpkt {
   381      union {
   382        pihdr pi;
   383        ping_pkt ping;
   384        arp_pkt arp;
   385      };
   386    };
   387    while (1) {
   388      inpkt r = {};
   389      size_t n;
   390      EXPECT_THAT(n = read(fd.get(), &r, sizeof(r)), SyscallSucceeds());
   391  
   392      if (n < sizeof(pihdr)) {
   393        std::cerr << "Ignored packet, protocol: " << r.pi.pi_protocol
   394                  << " len: " << n << std::endl;
   395        continue;
   396      }
   397  
   398      // Process ARP packet.
   399      if (n >= sizeof(arp_pkt) && r.pi.pi_protocol == htons(ETH_P_ARP)) {
   400        // Respond with canned ARP reply.
   401        EXPECT_THAT(write(fd.get(), arp_rep.data(), arp_rep.size()),
   402                    SyscallSucceedsWithValue(arp_rep.size()));
   403        // First ping request might have been dropped due to mac address not in
   404        // ARP cache. Send it again.
   405        EXPECT_THAT(write(fd.get(), &ping_req, sizeof(ping_req)),
   406                    SyscallSucceedsWithValue(sizeof(ping_req)));
   407      }
   408  
   409      // Process ping response packet.
   410      if (n >= sizeof(ping_pkt) && r.pi.pi_protocol == ping_req.pi.pi_protocol &&
   411          r.ping.ip_pkt.ip.protocol == ping_req.ip_pkt.ip.protocol &&
   412          !memcmp(&r.ping.ip_pkt.ip.saddr, &ping_req.ip_pkt.ip.daddr, kIPLen) &&
   413          !memcmp(&r.ping.ip_pkt.ip.daddr, &ping_req.ip_pkt.ip.saddr, kIPLen) &&
   414          r.ping.ip_pkt.icmp.type == 0 && r.ping.ip_pkt.icmp.code == 0) {
   415        // Ends and passes the test.
   416        break;
   417      }
   418    }
   419  }
   420  
   421  TEST_F(TuntapTest, LargeWritesFailWithEMSGSIZE) {
   422    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
   423  
   424    const auto& [fd, link] = ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTunTap(
   425        kTapName, kTapIPAddr, true /* tap */, false /* no_pi */));
   426  
   427    ping_pkt ping_req =
   428        CreatePingPacket(kMacB, kTapPeerIPAddr, kMacA, kTapIPAddr);
   429    std::string arp_rep =
   430        CreateArpPacket(kMacB, kTapPeerIPAddr, kMacA, kTapIPAddr);
   431  
   432    constexpr int kBufSize = 4096;
   433    std::vector<char> buf(kBufSize);
   434    struct iovec iov[2] = {
   435        {
   436            .iov_base = &ping_req,
   437            .iov_len = sizeof(ping_req),
   438        },
   439        {
   440            .iov_base = buf.data(),
   441            .iov_len = kBufSize,
   442        },
   443    };
   444  
   445    // A packet is large than MTU which is 1500 by default..
   446    EXPECT_THAT(writev(fd.get(), iov, 2), SyscallFailsWithErrno(EMSGSIZE));
   447  }
   448  
   449  TEST_F(TuntapTest, SendUdpTriggersArpResolution) {
   450    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
   451  
   452    const auto& [fd, link] = ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTunTap(
   453        kTapName, kTapIPAddr, true /* tap */, false /* no_pi */));
   454  
   455    // Send a UDP packet to remote.
   456    int sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
   457    ASSERT_THAT(sock, SyscallSucceeds());
   458  
   459    struct sockaddr_in remote = {
   460        .sin_family = AF_INET,
   461        .sin_port = htons(42),
   462        .sin_addr = {.s_addr = kTapPeerIPAddr},
   463    };
   464    ASSERT_THAT(sendto(sock, "hello", 5, 0, AsSockAddr(&remote), sizeof(remote)),
   465                SyscallSucceeds());
   466  
   467    struct inpkt {
   468      union {
   469        pihdr pi;
   470        arp_pkt arp;
   471      };
   472    };
   473    while (1) {
   474      inpkt r = {};
   475      size_t n;
   476      EXPECT_THAT(n = read(fd.get(), &r, sizeof(r)), SyscallSucceeds());
   477  
   478      if (n < sizeof(pihdr)) {
   479        std::cerr << "Ignored packet, protocol: " << r.pi.pi_protocol
   480                  << " len: " << n << std::endl;
   481        continue;
   482      }
   483  
   484      if (n >= sizeof(arp_pkt) && r.pi.pi_protocol == htons(ETH_P_ARP)) {
   485        break;
   486      }
   487    }
   488  }
   489  
   490  TEST_F(TuntapTest, TUNNoPacketInfo) {
   491    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
   492  
   493    // Interface creation.
   494    FileDescriptor fd = ASSERT_NO_ERRNO_AND_VALUE(Open(kDevNetTun, O_RDWR));
   495  
   496    struct ifreq ifr_set = {};
   497    ifr_set.ifr_flags = IFF_TUN | IFF_NO_PI;
   498    strncpy(ifr_set.ifr_name, kTunName, IFNAMSIZ);
   499    EXPECT_THAT(ioctl(fd.get(), TUNSETIFF, &ifr_set), SyscallSucceeds());
   500  
   501    // Interface setup.
   502    auto link = ASSERT_NO_ERRNO_AND_VALUE(GetLinkByName(kTunName));
   503    const struct in_addr dev_ipv4_addr = {.s_addr = kTapIPAddr};
   504    EXPECT_NO_ERRNO(LinkAddLocalAddr(link.index, AF_INET, 24, &dev_ipv4_addr,
   505                                     sizeof(dev_ipv4_addr)));
   506  
   507    ping_ip_pkt ping_req = CreatePingIPPacket(kTapPeerIPAddr, kTapIPAddr);
   508  
   509    // Send ICMP query
   510    EXPECT_THAT(write(fd.get(), &ping_req, sizeof(ping_req)),
   511                SyscallSucceedsWithValue(sizeof(ping_req)));
   512  
   513    // Receive loop to process inbound packets.
   514    while (1) {
   515      ping_ip_pkt ping_resp = {};
   516      EXPECT_THAT(read(fd.get(), &ping_resp, sizeof(ping_req)),
   517                  SyscallSucceedsWithValue(sizeof(ping_req)));
   518  
   519      // Process ping response packet.
   520      if (!memcmp(&ping_resp.ip.saddr, &ping_req.ip.daddr, kIPLen) &&
   521          !memcmp(&ping_resp.ip.daddr, &ping_req.ip.saddr, kIPLen) &&
   522          ping_resp.icmp.type == 0 && ping_resp.icmp.code == 0) {
   523        // Ends and passes the test.
   524        break;
   525      }
   526    }
   527  }
   528  
   529  // TCPBlockingConnectFailsArpResolution tests for TCP connect to fail on link
   530  // address resolution failure to a routable, but non existent peer.
   531  TEST_F(TuntapTest, TCPBlockingConnectFailsArpResolution) {
   532    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
   533  
   534    FileDescriptor sender =
   535        ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_STREAM, IPPROTO_TCP));
   536  
   537    const auto tuntap = ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTunTap(
   538        kTapName, kTapIPAddr, true /* tap */, false /* no_pi */));
   539  
   540    sockaddr_in connect_addr = {
   541        .sin_family = AF_INET,
   542        .sin_addr = {.s_addr = kTapPeerIPAddr},
   543    };
   544    ASSERT_THAT(connect(sender.get(),
   545                        reinterpret_cast<const struct sockaddr*>(&connect_addr),
   546                        sizeof(connect_addr)),
   547                SyscallFailsWithErrno(EHOSTUNREACH));
   548  }
   549  
   550  // TCPNonBlockingConnectFailsArpResolution tests for TCP non-blocking connect to
   551  // to trigger an error event to be notified to poll on link address resolution
   552  // failure to a routable, but non existent peer.
   553  TEST_F(TuntapTest, TCPNonBlockingConnectFailsArpResolution) {
   554    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
   555  
   556    FileDescriptor sender = ASSERT_NO_ERRNO_AND_VALUE(
   557        Socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
   558  
   559    const auto tuntap = ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTunTap(
   560        kTapName, kTapIPAddr, true /* tap */, false /* no_pi */));
   561  
   562    sockaddr_in connect_addr = {
   563        .sin_family = AF_INET,
   564        .sin_addr = {.s_addr = kTapPeerIPAddr},
   565    };
   566    ASSERT_THAT(connect(sender.get(),
   567                        reinterpret_cast<const struct sockaddr*>(&connect_addr),
   568                        sizeof(connect_addr)),
   569                SyscallFailsWithErrno(EINPROGRESS));
   570  
   571    constexpr int kTimeout = 10000;
   572    struct pollfd pfd = {
   573        .fd = sender.get(),
   574        .events = POLLIN | POLLOUT,
   575    };
   576    ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1));
   577    ASSERT_EQ(pfd.revents, POLLIN | POLLOUT | POLLHUP | POLLERR);
   578  
   579    ASSERT_THAT(connect(sender.get(),
   580                        reinterpret_cast<const struct sockaddr*>(&connect_addr),
   581                        sizeof(connect_addr)),
   582                SyscallFailsWithErrno(EHOSTUNREACH));
   583  }
   584  
   585  // Write hang bug found by syskaller: b/155928773
   586  // https://syzkaller.appspot.com/bug?id=065b893bd8d1d04a4e0a1d53c578537cde1efe99
   587  TEST_F(TuntapTest, WriteHangBug155928773) {
   588    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
   589  
   590    const auto tuntap = ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTunTap(
   591        kTapName, kTapIPAddr, true /* tap */, false /* no_pi */));
   592  
   593    int sock = socket(AF_INET, SOCK_DGRAM, 0);
   594    ASSERT_THAT(sock, SyscallSucceeds());
   595  
   596    struct sockaddr_in remote = {
   597        .sin_family = AF_INET,
   598        .sin_port = htons(42),
   599        .sin_addr = {.s_addr = kTapIPAddr},
   600    };
   601    // Return values do not matter in this test.
   602    connect(sock, AsSockAddr(&remote), sizeof(remote));
   603    write(sock, "hello", 5);
   604  }
   605  
   606  // Test that raw packet sockets do not need/include link headers when
   607  // sending/receiving packets to/from pure L3 (e.g. TUN) interfaces.
   608  TEST_F(TuntapTest, RawPacketSocket) {
   609    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_ADMIN)));
   610    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   611  
   612    auto [tun, link] = ASSERT_NO_ERRNO_AND_VALUE(OpenAndAttachTunTap(
   613        kTunName, kTapIPAddr, false /* tap */, true /* no_pi */));
   614    FileDescriptor packet_sock =
   615        ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_PACKET, SOCK_RAW, htons(ETH_P_IP)));
   616  
   617    constexpr int kInfiniteTimeout = -1;
   618  
   619    uint8_t hardware_address_length = 0;
   620    if (IsRunningOnGvisor()) {
   621      // TODO(https://gvisor.dev/issue/6530): Do not assume all interfaces have
   622      // an ethernet address.
   623      hardware_address_length = ETH_ALEN;
   624    }
   625  
   626    {
   627      const ping_ip_pkt ping_req = CreatePingIPPacket(kTapPeerIPAddr, kTapIPAddr);
   628      ASSERT_THAT(write(tun.get(), &ping_req, sizeof(ping_req)),
   629                  SyscallSucceedsWithValue(sizeof(ping_req)));
   630      // Wait for the packet socket to become readable.
   631      pollfd pfd = {
   632          .fd = packet_sock.get(),
   633          .events = POLLIN,
   634      };
   635      ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, kInfiniteTimeout),
   636                  SyscallSucceedsWithValue(1));
   637  
   638      char read_buf[sizeof(ping_req) + 1];
   639      struct sockaddr_ll src;
   640      socklen_t src_len = sizeof(src);
   641      ASSERT_THAT(recvfrom(packet_sock.get(), read_buf, sizeof(read_buf), 0,
   642                           reinterpret_cast<struct sockaddr*>(&src), &src_len),
   643                  SyscallSucceedsWithValue(sizeof(ping_req)));
   644      EXPECT_EQ(memcmp(read_buf, &ping_req, sizeof(ping_req)), 0);
   645      ASSERT_EQ(src_len, sizeof(src));
   646      EXPECT_EQ(src.sll_family, AF_PACKET);
   647      EXPECT_EQ(ntohs(src.sll_protocol), ETH_P_IP);
   648      EXPECT_EQ(src.sll_ifindex, link.index);
   649      EXPECT_EQ(src.sll_pkttype, PACKET_HOST);
   650      EXPECT_EQ(src.sll_halen, hardware_address_length);
   651      if (IsRunningOnGvisor()) {
   652        // TODO(https://gvisor.dev/issue/6531): Check this field for the right
   653        // hardware type.
   654        EXPECT_EQ(src.sll_hatype, 0);
   655      } else {
   656        EXPECT_EQ(src.sll_hatype, ARPHRD_NONE);
   657      }
   658    }
   659  
   660    {
   661      const struct sockaddr_ll dest = {
   662          .sll_family = AF_PACKET,
   663          .sll_protocol = htons(ETH_P_IP),
   664          .sll_ifindex = link.index,
   665          .sll_halen = hardware_address_length,
   666      };
   667  
   668      const ping_ip_pkt ping_req = CreatePingIPPacket(kTapIPAddr, kTapPeerIPAddr);
   669      ASSERT_THAT(
   670          sendto(packet_sock.get(), &ping_req, sizeof(ping_req), 0,
   671                 reinterpret_cast<const struct sockaddr*>(&dest), sizeof(dest)),
   672          SyscallSucceedsWithValue(sizeof(ping_req)));
   673  
   674      // Loop until we receive the packet we expect - the kernel may send packets
   675      // we do not care about.
   676      while (true) {
   677        // Wait for the TUN interface to become readable.
   678        pollfd pfd = {
   679            .fd = tun.get(),
   680            .events = POLLIN,
   681        };
   682        ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, kInfiniteTimeout),
   683                    SyscallSucceedsWithValue(1));
   684  
   685        char read_buf[sizeof(ping_req) + 1];
   686        int n = read(tun.get(), &read_buf, sizeof(read_buf));
   687        ASSERT_THAT(n, SyscallSucceeds());
   688        if (n == sizeof(ping_req) &&
   689            memcmp(read_buf, &ping_req, sizeof(ping_req)) == 0) {
   690          break;
   691        }
   692      }
   693    }
   694  }
   695  
   696  }  // namespace testing
   697  }  // namespace gvisor