github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/test/syscalls/linux/packet_socket_raw.cc (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include <arpa/inet.h>
    16  #include <linux/capability.h>
    17  #include <linux/filter.h>
    18  #include <linux/if_arp.h>
    19  #include <linux/if_packet.h>
    20  #include <net/ethernet.h>
    21  #include <netinet/in.h>
    22  #include <netinet/ip.h>
    23  #include <netinet/udp.h>
    24  #include <poll.h>
    25  #include <sys/ioctl.h>
    26  #include <sys/socket.h>
    27  #include <sys/types.h>
    28  #include <unistd.h>
    29  
    30  #include "gmock/gmock.h"
    31  #include "gtest/gtest.h"
    32  #include "absl/base/internal/endian.h"
    33  #include "test/syscalls/linux/socket_test_util.h"
    34  #include "test/syscalls/linux/unix_domain_socket_test_util.h"
    35  #include "test/util/capability_util.h"
    36  #include "test/util/file_descriptor.h"
    37  #include "test/util/test_util.h"
    38  
    39  // Some of these tests involve sending packets via AF_PACKET sockets and the
    40  // loopback interface. Because AF_PACKET circumvents so much of the networking
    41  // stack, Linux sees these packets as "martian", i.e. they claim to be to/from
    42  // localhost but don't have the usual associated data. Thus Linux drops them by
    43  // default. You can see where this happens by following the code at:
    44  //
    45  // - net/ipv4/ip_input.c:ip_rcv_finish, which calls
    46  // - net/ipv4/route.c:ip_route_input_noref, which calls
    47  // - net/ipv4/route.c:ip_route_input_slow, which finds and drops martian
    48  //   packets.
    49  //
    50  // To tell Linux not to drop these packets, you need to tell it to accept our
    51  // funny packets (which are completely valid and correct, but lack associated
    52  // in-kernel data because we use AF_PACKET):
    53  //
    54  // echo 1 >> /proc/sys/net/ipv4/conf/lo/accept_local
    55  // echo 1 >> /proc/sys/net/ipv4/conf/lo/route_localnet
    56  //
    57  // These tests require CAP_NET_RAW to run.
    58  
    59  namespace gvisor {
    60  namespace testing {
    61  
    62  namespace {
    63  
    64  using ::testing::AnyOf;
    65  using ::testing::Eq;
    66  
    67  constexpr char kMessage[] = "soweoneul malhaebwa";
    68  constexpr in_port_t kPort = 0x409c;  // htons(40000)
    69  
    70  // Send kMessage via sock to loopback
    71  void SendUDPMessage(int sock) {
    72    struct sockaddr_in dest = {};
    73    dest.sin_port = kPort;
    74    dest.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
    75    dest.sin_family = AF_INET;
    76    EXPECT_THAT(sendto(sock, kMessage, sizeof(kMessage), 0,
    77                       reinterpret_cast<struct sockaddr*>(&dest), sizeof(dest)),
    78                SyscallSucceedsWithValue(sizeof(kMessage)));
    79  }
    80  
    81  //
    82  // Raw tests. Packets sent with raw AF_PACKET sockets always include link layer
    83  // headers.
    84  //
    85  
    86  // Tests for "raw" (SOCK_RAW) packet(7) sockets.
    87  class RawPacketTest : public ::testing::TestWithParam<int> {
    88   protected:
    89    // Creates a socket to be used in tests.
    90    void SetUp() override;
    91  
    92    // Closes the socket created by SetUp().
    93    void TearDown() override;
    94  
    95    // Gets the device index of the loopback device.
    96    int GetLoopbackIndex();
    97  
    98    // The socket used for both reading and writing.
    99    int s_;
   100  };
   101  
   102  void RawPacketTest::SetUp() {
   103    if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
   104      ASSERT_THAT(socket(AF_PACKET, SOCK_RAW, htons(GetParam())),
   105                  SyscallFailsWithErrno(EPERM));
   106      GTEST_SKIP();
   107    }
   108  
   109    if (!IsRunningOnGvisor()) {
   110      // Ensure that looped back packets aren't rejected by the kernel.
   111      FileDescriptor acceptLocal = ASSERT_NO_ERRNO_AND_VALUE(
   112          Open("/proc/sys/net/ipv4/conf/lo/accept_local", O_RDWR));
   113      FileDescriptor routeLocalnet = ASSERT_NO_ERRNO_AND_VALUE(
   114          Open("/proc/sys/net/ipv4/conf/lo/route_localnet", O_RDWR));
   115      char enabled;
   116      ASSERT_THAT(read(acceptLocal.get(), &enabled, 1),
   117                  SyscallSucceedsWithValue(1));
   118      if (enabled != '1') {
   119        enabled = '1';
   120        ASSERT_THAT(lseek(acceptLocal.get(), 0, SEEK_SET),
   121                    SyscallSucceedsWithValue(0));
   122        ASSERT_THAT(write(acceptLocal.get(), &enabled, 1),
   123                    SyscallSucceedsWithValue(1));
   124        ASSERT_THAT(lseek(acceptLocal.get(), 0, SEEK_SET),
   125                    SyscallSucceedsWithValue(0));
   126        ASSERT_THAT(read(acceptLocal.get(), &enabled, 1),
   127                    SyscallSucceedsWithValue(1));
   128        ASSERT_EQ(enabled, '1');
   129      }
   130  
   131      ASSERT_THAT(read(routeLocalnet.get(), &enabled, 1),
   132                  SyscallSucceedsWithValue(1));
   133      if (enabled != '1') {
   134        enabled = '1';
   135        ASSERT_THAT(lseek(routeLocalnet.get(), 0, SEEK_SET),
   136                    SyscallSucceedsWithValue(0));
   137        ASSERT_THAT(write(routeLocalnet.get(), &enabled, 1),
   138                    SyscallSucceedsWithValue(1));
   139        ASSERT_THAT(lseek(routeLocalnet.get(), 0, SEEK_SET),
   140                    SyscallSucceedsWithValue(0));
   141        ASSERT_THAT(read(routeLocalnet.get(), &enabled, 1),
   142                    SyscallSucceedsWithValue(1));
   143        ASSERT_EQ(enabled, '1');
   144      }
   145    }
   146  
   147    ASSERT_THAT(s_ = socket(AF_PACKET, SOCK_RAW, htons(GetParam())),
   148                SyscallSucceeds());
   149  }
   150  
   151  void RawPacketTest::TearDown() {
   152    // TearDown will be run even if we skip the test.
   153    if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
   154      EXPECT_THAT(close(s_), SyscallSucceeds());
   155    }
   156  }
   157  
   158  int RawPacketTest::GetLoopbackIndex() {
   159    struct ifreq ifr;
   160    snprintf(ifr.ifr_name, IFNAMSIZ, "lo");
   161    EXPECT_THAT(ioctl(s_, SIOCGIFINDEX, &ifr), SyscallSucceeds());
   162    EXPECT_NE(ifr.ifr_ifindex, 0);
   163    return ifr.ifr_ifindex;
   164  }
   165  
   166  // Receive via a packet socket.
   167  TEST_P(RawPacketTest, Receive) {
   168    // Let's use a simple IP payload: a UDP datagram.
   169    FileDescriptor udp_sock =
   170        ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
   171    SendUDPMessage(udp_sock.get());
   172  
   173    // Wait for the socket to become readable.
   174    struct pollfd pfd = {};
   175    pfd.fd = s_;
   176    pfd.events = POLLIN;
   177    EXPECT_THAT(RetryEINTR(poll)(&pfd, 1, 2000), SyscallSucceedsWithValue(1));
   178  
   179    // Read and verify the data.
   180    constexpr size_t packet_size = sizeof(struct ethhdr) + sizeof(struct iphdr) +
   181                                   sizeof(struct udphdr) + sizeof(kMessage);
   182    char buf[64];
   183    struct sockaddr_ll src = {};
   184    socklen_t src_len = sizeof(src);
   185    ASSERT_THAT(recvfrom(s_, buf, sizeof(buf), 0,
   186                         reinterpret_cast<struct sockaddr*>(&src), &src_len),
   187                SyscallSucceedsWithValue(packet_size));
   188    // sockaddr_ll ends with an 8 byte physical address field, but ethernet
   189    // addresses only use 6 bytes.  Linux used to return sizeof(sockaddr_ll)-2
   190    // here, but since commit b2cf86e1563e33a14a1c69b3e508d15dc12f804c returns
   191    // sizeof(sockaddr_ll).
   192    ASSERT_THAT(src_len, AnyOf(Eq(sizeof(src)), Eq(sizeof(src) - 2)));
   193  
   194    // Verify the source address.
   195    EXPECT_EQ(src.sll_family, AF_PACKET);
   196    EXPECT_EQ(src.sll_ifindex, GetLoopbackIndex());
   197    EXPECT_EQ(src.sll_halen, ETH_ALEN);
   198    EXPECT_EQ(ntohs(src.sll_protocol), ETH_P_IP);
   199    // This came from the loopback device, so the address is all 0s.
   200    for (int i = 0; i < src.sll_halen; i++) {
   201      EXPECT_EQ(src.sll_addr[i], 0);
   202    }
   203  
   204    // Verify the ethernet header. We memcpy to deal with pointer alignment.
   205    struct ethhdr eth = {};
   206    memcpy(&eth, buf, sizeof(eth));
   207    // The destination and source address should be 0, for loopback.
   208    for (int i = 0; i < ETH_ALEN; i++) {
   209      EXPECT_EQ(eth.h_dest[i], 0);
   210      EXPECT_EQ(eth.h_source[i], 0);
   211    }
   212    EXPECT_EQ(eth.h_proto, htons(ETH_P_IP));
   213  
   214    // Verify the IP header. We memcpy to deal with pointer aligment.
   215    struct iphdr ip = {};
   216    memcpy(&ip, buf + sizeof(ethhdr), sizeof(ip));
   217    EXPECT_EQ(ip.ihl, 5);
   218    EXPECT_EQ(ip.version, 4);
   219    EXPECT_EQ(ip.tot_len, htons(packet_size - sizeof(eth)));
   220    EXPECT_EQ(ip.protocol, IPPROTO_UDP);
   221    EXPECT_EQ(ip.daddr, htonl(INADDR_LOOPBACK));
   222    EXPECT_EQ(ip.saddr, htonl(INADDR_LOOPBACK));
   223  
   224    // Verify the UDP header. We memcpy to deal with pointer aligment.
   225    struct udphdr udp = {};
   226    memcpy(&udp, buf + sizeof(eth) + sizeof(iphdr), sizeof(udp));
   227    EXPECT_EQ(udp.dest, kPort);
   228    EXPECT_EQ(udp.len, htons(sizeof(udphdr) + sizeof(kMessage)));
   229  
   230    // Verify the payload.
   231    char* payload = reinterpret_cast<char*>(buf + sizeof(eth) + sizeof(iphdr) +
   232                                            sizeof(udphdr));
   233    EXPECT_EQ(strncmp(payload, kMessage, sizeof(kMessage)), 0);
   234  }
   235  
   236  // Send via a packet socket.
   237  TEST_P(RawPacketTest, Send) {
   238    // Let's send a UDP packet and receive it using a regular UDP socket.
   239    FileDescriptor udp_sock =
   240        ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
   241    struct sockaddr_in bind_addr = {};
   242    bind_addr.sin_family = AF_INET;
   243    bind_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
   244    bind_addr.sin_port = kPort;
   245    ASSERT_THAT(
   246        bind(udp_sock.get(), reinterpret_cast<struct sockaddr*>(&bind_addr),
   247             sizeof(bind_addr)),
   248        SyscallSucceeds());
   249  
   250    // Set up the destination physical address.
   251    struct sockaddr_ll dest = {};
   252    dest.sll_family = AF_PACKET;
   253    dest.sll_halen = ETH_ALEN;
   254    dest.sll_ifindex = GetLoopbackIndex();
   255    dest.sll_protocol = htons(ETH_P_IP);
   256    // We're sending to the loopback device, so the address is all 0s.
   257    memset(dest.sll_addr, 0x00, ETH_ALEN);
   258  
   259    // Set up the ethernet header. The kernel takes care of the footer.
   260    // We're sending to and from hardware address 0 (loopback).
   261    struct ethhdr eth = {};
   262    eth.h_proto = htons(ETH_P_IP);
   263  
   264    // Set up the IP header.
   265    struct iphdr iphdr = {};
   266    iphdr.ihl = 5;
   267    iphdr.version = 4;
   268    iphdr.tos = 0;
   269    iphdr.tot_len =
   270        htons(sizeof(struct iphdr) + sizeof(struct udphdr) + sizeof(kMessage));
   271    // Get a pseudo-random ID. If we clash with an in-use ID the test will fail,
   272    // but we have no way of getting an ID we know to be good.
   273    srand(*reinterpret_cast<unsigned int*>(&iphdr));
   274    iphdr.id = rand();
   275    // Linux sets this bit ("do not fragment") for small packets.
   276    iphdr.frag_off = 1 << 6;
   277    iphdr.ttl = 64;
   278    iphdr.protocol = IPPROTO_UDP;
   279    iphdr.daddr = htonl(INADDR_LOOPBACK);
   280    iphdr.saddr = htonl(INADDR_LOOPBACK);
   281    iphdr.check = IPChecksum(iphdr);
   282  
   283    // Set up the UDP header.
   284    struct udphdr udphdr = {};
   285    udphdr.source = kPort;
   286    udphdr.dest = kPort;
   287    udphdr.len = htons(sizeof(udphdr) + sizeof(kMessage));
   288    udphdr.check = UDPChecksum(iphdr, udphdr, kMessage, sizeof(kMessage));
   289  
   290    // Copy both headers and the payload into our packet buffer.
   291    char
   292        send_buf[sizeof(eth) + sizeof(iphdr) + sizeof(udphdr) + sizeof(kMessage)];
   293    memcpy(send_buf, &eth, sizeof(eth));
   294    memcpy(send_buf + sizeof(ethhdr), &iphdr, sizeof(iphdr));
   295    memcpy(send_buf + sizeof(ethhdr) + sizeof(iphdr), &udphdr, sizeof(udphdr));
   296    memcpy(send_buf + sizeof(ethhdr) + sizeof(iphdr) + sizeof(udphdr), kMessage,
   297           sizeof(kMessage));
   298  
   299    // We don't implement writing to packet sockets on gVisor.
   300    if (IsRunningOnGvisor()) {
   301      ASSERT_THAT(sendto(s_, send_buf, sizeof(send_buf), 0,
   302                         reinterpret_cast<struct sockaddr*>(&dest), sizeof(dest)),
   303                  SyscallFailsWithErrno(EINVAL));
   304      GTEST_SKIP();
   305    }
   306  
   307    // Send it.
   308    ASSERT_THAT(sendto(s_, send_buf, sizeof(send_buf), 0,
   309                       reinterpret_cast<struct sockaddr*>(&dest), sizeof(dest)),
   310                SyscallSucceedsWithValue(sizeof(send_buf)));
   311  
   312    // Wait for the packet to become available on both sockets.
   313    struct pollfd pfd = {};
   314    pfd.fd = udp_sock.get();
   315    pfd.events = POLLIN;
   316    ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 5000), SyscallSucceedsWithValue(1));
   317    pfd.fd = s_;
   318    pfd.events = POLLIN;
   319    ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 5000), SyscallSucceedsWithValue(1));
   320  
   321    // Receive on the packet socket.
   322    char recv_buf[sizeof(send_buf)];
   323    ASSERT_THAT(recv(s_, recv_buf, sizeof(recv_buf), 0),
   324                SyscallSucceedsWithValue(sizeof(recv_buf)));
   325    ASSERT_EQ(memcmp(recv_buf, send_buf, sizeof(send_buf)), 0);
   326  
   327    // Receive on the UDP socket.
   328    struct sockaddr_in src;
   329    socklen_t src_len = sizeof(src);
   330    ASSERT_THAT(recvfrom(udp_sock.get(), recv_buf, sizeof(recv_buf), MSG_DONTWAIT,
   331                         reinterpret_cast<struct sockaddr*>(&src), &src_len),
   332                SyscallSucceedsWithValue(sizeof(kMessage)));
   333    // Check src and payload.
   334    EXPECT_EQ(strncmp(recv_buf, kMessage, sizeof(kMessage)), 0);
   335    EXPECT_EQ(src.sin_family, AF_INET);
   336    EXPECT_EQ(src.sin_port, kPort);
   337    EXPECT_EQ(src.sin_addr.s_addr, htonl(INADDR_LOOPBACK));
   338  }
   339  
   340  // Check that setting SO_RCVBUF below min is clamped to the minimum
   341  // receive buffer size.
   342  TEST_P(RawPacketTest, SetSocketRecvBufBelowMin) {
   343    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   344  
   345    // Discover minimum receive buf size by trying to set it to zero.
   346    // See:
   347    // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820
   348    constexpr int kRcvBufSz = 0;
   349    ASSERT_THAT(
   350        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
   351        SyscallSucceeds());
   352  
   353    int min = 0;
   354    socklen_t min_len = sizeof(min);
   355    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
   356                SyscallSucceeds());
   357  
   358    // Linux doubles the value so let's use a value that when doubled will still
   359    // be smaller than min.
   360    int below_min = min / 2 - 1;
   361    ASSERT_THAT(
   362        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &below_min, sizeof(below_min)),
   363        SyscallSucceeds());
   364  
   365    int val = 0;
   366    socklen_t val_len = sizeof(val);
   367    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
   368                SyscallSucceeds());
   369  
   370    ASSERT_EQ(min, val);
   371  }
   372  
   373  // Check that setting SO_RCVBUF above max is clamped to the maximum
   374  // receive buffer size.
   375  TEST_P(RawPacketTest, SetSocketRecvBufAboveMax) {
   376    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   377  
   378    // Discover max buf size by trying to set the largest possible buffer size.
   379    constexpr int kRcvBufSz = 0xffffffff;
   380    ASSERT_THAT(
   381        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
   382        SyscallSucceeds());
   383  
   384    int max = 0;
   385    socklen_t max_len = sizeof(max);
   386    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len),
   387                SyscallSucceeds());
   388  
   389    int above_max = max + 1;
   390    ASSERT_THAT(
   391        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &above_max, sizeof(above_max)),
   392        SyscallSucceeds());
   393  
   394    int val = 0;
   395    socklen_t val_len = sizeof(val);
   396    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
   397                SyscallSucceeds());
   398    ASSERT_EQ(max, val);
   399  }
   400  
   401  // Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored.
   402  TEST_P(RawPacketTest, SetSocketRecvBuf) {
   403    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   404  
   405    int max = 0;
   406    int min = 0;
   407    {
   408      // Discover max buf size by trying to set a really large buffer size.
   409      constexpr int kRcvBufSz = 0xffffffff;
   410      ASSERT_THAT(
   411          setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
   412          SyscallSucceeds());
   413  
   414      max = 0;
   415      socklen_t max_len = sizeof(max);
   416      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len),
   417                  SyscallSucceeds());
   418    }
   419  
   420    {
   421      // Discover minimum buffer size by trying to set a zero size receive buffer
   422      // size.
   423      // See:
   424      // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820
   425      constexpr int kRcvBufSz = 0;
   426      ASSERT_THAT(
   427          setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
   428          SyscallSucceeds());
   429  
   430      socklen_t min_len = sizeof(min);
   431      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
   432                  SyscallSucceeds());
   433    }
   434  
   435    int quarter_sz = min + (max - min) / 4;
   436    ASSERT_THAT(
   437        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &quarter_sz, sizeof(quarter_sz)),
   438        SyscallSucceeds());
   439  
   440    int val = 0;
   441    socklen_t val_len = sizeof(val);
   442    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
   443                SyscallSucceeds());
   444  
   445    quarter_sz *= 2;
   446    ASSERT_EQ(quarter_sz, val);
   447  }
   448  
   449  // Check that setting SO_SNDBUF below min is clamped to the minimum
   450  // receive buffer size.
   451  TEST_P(RawPacketTest, SetSocketSendBufBelowMin) {
   452    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   453  
   454    // Discover minimum buffer size by trying to set it to zero.
   455    constexpr int kSndBufSz = 0;
   456    ASSERT_THAT(
   457        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
   458        SyscallSucceeds());
   459  
   460    int min = 0;
   461    socklen_t min_len = sizeof(min);
   462    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len),
   463                SyscallSucceeds());
   464  
   465    // Linux doubles the value so let's use a value that when doubled will still
   466    // be smaller than min.
   467    int below_min = min / 2 - 1;
   468    ASSERT_THAT(
   469        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &below_min, sizeof(below_min)),
   470        SyscallSucceeds());
   471  
   472    int val = 0;
   473    socklen_t val_len = sizeof(val);
   474    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
   475                SyscallSucceeds());
   476  
   477    ASSERT_EQ(min, val);
   478  }
   479  
   480  // Check that setting SO_SNDBUF above max is clamped to the maximum
   481  // send buffer size.
   482  TEST_P(RawPacketTest, SetSocketSendBufAboveMax) {
   483    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   484  
   485    // Discover maximum buffer size by trying to set it to a large value.
   486    constexpr int kSndBufSz = 0xffffffff;
   487    ASSERT_THAT(
   488        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
   489        SyscallSucceeds());
   490  
   491    int max = 0;
   492    socklen_t max_len = sizeof(max);
   493    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len),
   494                SyscallSucceeds());
   495  
   496    int above_max = max + 1;
   497    ASSERT_THAT(
   498        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &above_max, sizeof(above_max)),
   499        SyscallSucceeds());
   500  
   501    int val = 0;
   502    socklen_t val_len = sizeof(val);
   503    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
   504                SyscallSucceeds());
   505    ASSERT_EQ(max, val);
   506  }
   507  
   508  // Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored.
   509  TEST_P(RawPacketTest, SetSocketSendBuf) {
   510    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   511  
   512    int max = 0;
   513    int min = 0;
   514    {
   515      // Discover maximum buffer size by trying to set it to a large value.
   516      constexpr int kSndBufSz = 0xffffffff;
   517      ASSERT_THAT(
   518          setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
   519          SyscallSucceeds());
   520  
   521      max = 0;
   522      socklen_t max_len = sizeof(max);
   523      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len),
   524                  SyscallSucceeds());
   525    }
   526  
   527    {
   528      // Discover minimum buffer size by trying to set it to zero.
   529      constexpr int kSndBufSz = 0;
   530      ASSERT_THAT(
   531          setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
   532          SyscallSucceeds());
   533  
   534      socklen_t min_len = sizeof(min);
   535      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len),
   536                  SyscallSucceeds());
   537    }
   538  
   539    int quarter_sz = min + (max - min) / 4;
   540    ASSERT_THAT(
   541        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &quarter_sz, sizeof(quarter_sz)),
   542        SyscallSucceeds());
   543  
   544    int val = 0;
   545    socklen_t val_len = sizeof(val);
   546    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
   547                SyscallSucceeds());
   548  
   549    quarter_sz *= 2;
   550    ASSERT_EQ(quarter_sz, val);
   551  }
   552  
   553  TEST_P(RawPacketTest, GetSocketError) {
   554    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   555  
   556    int val = 0;
   557    socklen_t val_len = sizeof(val);
   558    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_ERROR, &val, &val_len),
   559                SyscallSucceeds());
   560    ASSERT_EQ(val, 0);
   561  }
   562  
   563  TEST_P(RawPacketTest, GetSocketErrorBind) {
   564    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   565  
   566    {
   567      // Bind to the loopback device.
   568      struct sockaddr_ll bind_addr = {};
   569      bind_addr.sll_family = AF_PACKET;
   570      bind_addr.sll_protocol = htons(GetParam());
   571      bind_addr.sll_ifindex = GetLoopbackIndex();
   572  
   573      ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&bind_addr),
   574                       sizeof(bind_addr)),
   575                  SyscallSucceeds());
   576  
   577      // SO_ERROR should return no errors.
   578      int val = 0;
   579      socklen_t val_len = sizeof(val);
   580      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_ERROR, &val, &val_len),
   581                  SyscallSucceeds());
   582      ASSERT_EQ(val, 0);
   583    }
   584  
   585    {
   586      // Now try binding to an invalid interface.
   587      struct sockaddr_ll bind_addr = {};
   588      bind_addr.sll_family = AF_PACKET;
   589      bind_addr.sll_protocol = htons(GetParam());
   590      bind_addr.sll_ifindex = 0xffff;  // Just pick a really large number.
   591  
   592      // Binding should fail with EINVAL
   593      ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&bind_addr),
   594                       sizeof(bind_addr)),
   595                  SyscallFailsWithErrno(ENODEV));
   596  
   597      // SO_ERROR does not return error when the device is invalid.
   598      // On Linux there is just one odd ball condition where this can return
   599      // an error where the device was valid and then removed or disabled
   600      // between the first check for index and the actual registration of
   601      // the packet endpoint. On Netstack this is not possible as the stack
   602      // global mutex is held during registration and check.
   603      int val = 0;
   604      socklen_t val_len = sizeof(val);
   605      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_ERROR, &val, &val_len),
   606                  SyscallSucceeds());
   607      ASSERT_EQ(val, 0);
   608    }
   609  }
   610  
   611  TEST_P(RawPacketTest, SetSocketDetachFilterNoInstalledFilter) {
   612    // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER.
   613    //
   614    // gVisor returns no error on SO_DETACH_FILTER even if there is no filter
   615    // attached unlike linux which does return ENOENT in such cases. This is
   616    // because gVisor doesn't support SO_ATTACH_FILTER and just silently returns
   617    // success.
   618    if (IsRunningOnGvisor()) {
   619      constexpr int val = 0;
   620      ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)),
   621                  SyscallSucceeds());
   622      return;
   623    }
   624    constexpr int val = 0;
   625    ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)),
   626                SyscallFailsWithErrno(ENOENT));
   627  }
   628  
   629  TEST_P(RawPacketTest, GetSocketDetachFilter) {
   630    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   631  
   632    int val = 0;
   633    socklen_t val_len = sizeof(val);
   634    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, &val_len),
   635                SyscallFailsWithErrno(ENOPROTOOPT));
   636  }
   637  
   638  TEST_P(RawPacketTest, SetAndGetSocketLinger) {
   639    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   640  
   641    int level = SOL_SOCKET;
   642    int type = SO_LINGER;
   643  
   644    struct linger sl;
   645    sl.l_onoff = 1;
   646    sl.l_linger = 5;
   647    ASSERT_THAT(setsockopt(s_, level, type, &sl, sizeof(sl)),
   648                SyscallSucceedsWithValue(0));
   649  
   650    struct linger got_linger = {};
   651    socklen_t length = sizeof(sl);
   652    ASSERT_THAT(getsockopt(s_, level, type, &got_linger, &length),
   653                SyscallSucceedsWithValue(0));
   654  
   655    ASSERT_EQ(length, sizeof(got_linger));
   656    EXPECT_EQ(0, memcmp(&sl, &got_linger, length));
   657  }
   658  
   659  TEST_P(RawPacketTest, GetSocketAcceptConn) {
   660    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   661  
   662    int got = -1;
   663    socklen_t length = sizeof(got);
   664    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_ACCEPTCONN, &got, &length),
   665                SyscallSucceedsWithValue(0));
   666  
   667    ASSERT_EQ(length, sizeof(got));
   668    EXPECT_EQ(got, 0);
   669  }
   670  INSTANTIATE_TEST_SUITE_P(AllInetTests, RawPacketTest,
   671                           ::testing::Values(ETH_P_IP, ETH_P_ALL));
   672  
   673  class RawPacketMsgSizeTest : public ::testing::TestWithParam<TestAddress> {};
   674  
   675  TEST_P(RawPacketMsgSizeTest, SendTooLong) {
   676    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   677  
   678    TestAddress addr = GetParam().WithPort(kPort);
   679  
   680    FileDescriptor udp_sock =
   681        ASSERT_NO_ERRNO_AND_VALUE(Socket(addr.family(), SOCK_RAW, IPPROTO_UDP));
   682  
   683    ASSERT_THAT(
   684        connect(udp_sock.get(), reinterpret_cast<struct sockaddr*>(&addr.addr),
   685                addr.addr_len),
   686        SyscallSucceeds());
   687  
   688    const char buf[65536] = {};
   689    ASSERT_THAT(send(udp_sock.get(), buf, sizeof(buf), 0),
   690                SyscallFailsWithErrno(EMSGSIZE));
   691  }
   692  
   693  TEST_P(RawPacketMsgSizeTest, SpliceTooLong) {
   694    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   695  
   696    const char buf[65536] = {};
   697    int fds[2];
   698    ASSERT_THAT(pipe(fds), SyscallSucceeds());
   699    ASSERT_THAT(write(fds[1], buf, sizeof(buf)),
   700                SyscallSucceedsWithValue(sizeof(buf)));
   701  
   702    TestAddress addr = GetParam().WithPort(kPort);
   703  
   704    FileDescriptor udp_sock =
   705        ASSERT_NO_ERRNO_AND_VALUE(Socket(addr.family(), SOCK_RAW, IPPROTO_UDP));
   706  
   707    ASSERT_THAT(
   708        connect(udp_sock.get(), reinterpret_cast<struct sockaddr*>(&addr.addr),
   709                addr.addr_len),
   710        SyscallSucceeds());
   711  
   712    ssize_t n = splice(fds[0], nullptr, udp_sock.get(), nullptr, sizeof(buf), 0);
   713    if (IsRunningOnGvisor()) {
   714      EXPECT_THAT(n, SyscallFailsWithErrno(EMSGSIZE));
   715    } else {
   716      // TODO(gvisor.dev/issue/138): Linux sends out multiple UDP datagrams, each
   717      // of the size of a page.
   718      EXPECT_THAT(n, SyscallSucceedsWithValue(sizeof(buf)));
   719    }
   720  }
   721  
   722  INSTANTIATE_TEST_SUITE_P(AllRawPacketMsgSizeTest, RawPacketMsgSizeTest,
   723                           ::testing::Values(V4Loopback(), V6Loopback()));
   724  
   725  }  // namespace
   726  
   727  }  // namespace testing
   728  }  // namespace gvisor