gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/test/syscalls/linux/raw_socket.cc (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include <arpa/inet.h>
    16  #include <net/if.h>
    17  #include <netinet/in.h>
    18  #include <netinet/ip.h>
    19  #include <netinet/ip6.h>
    20  #include <netinet/ip_icmp.h>
    21  #include <poll.h>
    22  #include <sys/socket.h>
    23  #include <sys/types.h>
    24  #include <unistd.h>
    25  
    26  #include "gmock/gmock.h"
    27  #include "gtest/gtest.h"
    28  #include "test/syscalls/linux/ip_socket_test_util.h"
    29  #include "test/syscalls/linux/unix_domain_socket_test_util.h"
    30  #include "test/util/capability_util.h"
    31  #include "test/util/file_descriptor.h"
    32  #include "test/util/socket_util.h"
    33  #include "test/util/test_util.h"
    34  
    35  // Note: in order to run these tests, /proc/sys/net/ipv4/ping_group_range will
    36  // need to be configured to let the superuser create ping sockets (see icmp(7)).
    37  
    38  namespace gvisor {
    39  namespace testing {
    40  
    41  namespace {
    42  
    43  #define TCPHDR_RST 0x4
    44  #define TCPHDR_FLAGS_OFF 13
    45  
    46  using ::testing::AnyOf;
    47  
    48  // Fixture for tests parameterized by protocol.
    49  class RawSocketTest : public ::testing::TestWithParam<std::tuple<int, int>> {
    50   protected:
    51    // Creates a socket to be used in tests.
    52    void SetUp() override;
    53  
    54    // Closes the socket created by SetUp().
    55    void TearDown() override;
    56  
    57    // Sends buf via s_.
    58    void SendBuf(const char* buf, int buf_len);
    59  
    60    // Reads from s_ into recv_buf.
    61    void ReceiveBuf(char* recv_buf, size_t recv_buf_len);
    62  
    63    void ReceiveBufFrom(int sock, char* recv_buf, size_t recv_buf_len);
    64  
    65    int Protocol() { return std::get<0>(GetParam()); }
    66  
    67    int Family() { return std::get<1>(GetParam()); }
    68  
    69    socklen_t AddrLen() {
    70      if (Family() == AF_INET) {
    71        return sizeof(sockaddr_in);
    72      }
    73      return sizeof(sockaddr_in6);
    74    }
    75  
    76    int HdrLen() {
    77      if (Family() == AF_INET) {
    78        return sizeof(struct iphdr);
    79      }
    80      // IPv6 raw sockets don't include the header.
    81      return 0;
    82    }
    83  
    84    uint16_t Port(struct sockaddr* s) {
    85      if (Family() == AF_INET) {
    86        return ntohs(reinterpret_cast<struct sockaddr_in*>(s)->sin_port);
    87      }
    88      return ntohs(reinterpret_cast<struct sockaddr_in6*>(s)->sin6_port);
    89    }
    90  
    91    void* Addr(struct sockaddr* s) {
    92      if (Family() == AF_INET) {
    93        return &(reinterpret_cast<struct sockaddr_in*>(s)->sin_addr);
    94      }
    95      return &(reinterpret_cast<struct sockaddr_in6*>(s)->sin6_addr);
    96    }
    97  
    98    // The socket used for both reading and writing.
    99    int s_;
   100  
   101    // The loopback address.
   102    struct sockaddr_storage addr_;
   103  };
   104  
   105  void RawSocketTest::SetUp() {
   106    if (!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())) {
   107      ASSERT_THAT(socket(Family(), SOCK_RAW, Protocol()),
   108                  SyscallFailsWithErrno(EPERM));
   109      GTEST_SKIP();
   110    }
   111  
   112    ASSERT_THAT(s_ = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds());
   113  
   114    addr_ = {};
   115  
   116    // We don't set ports because raw sockets don't have a notion of ports.
   117    if (Family() == AF_INET) {
   118      struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr_);
   119      sin->sin_family = AF_INET;
   120      sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
   121    } else {
   122      struct sockaddr_in6* sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr_);
   123      sin6->sin6_family = AF_INET6;
   124      sin6->sin6_addr = in6addr_loopback;
   125    }
   126  }
   127  
   128  void RawSocketTest::TearDown() {
   129    // TearDown will be run even if we skip the test.
   130    if (ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())) {
   131      EXPECT_THAT(close(s_), SyscallSucceeds());
   132    }
   133  }
   134  
   135  // We should be able to create multiple raw sockets for the same protocol.
   136  // BasicRawSocket::Setup creates the first one, so we only have to create one
   137  // more here.
   138  TEST_P(RawSocketTest, MultipleCreation) {
   139    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   140  
   141    int s2;
   142    ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds());
   143  
   144    ASSERT_THAT(close(s2), SyscallSucceeds());
   145  }
   146  
   147  // Test that shutting down an unconnected socket fails.
   148  TEST_P(RawSocketTest, FailShutdownWithoutConnect) {
   149    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   150  
   151    ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN));
   152    ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN));
   153  }
   154  
   155  // Shutdown is a no-op for raw sockets (and datagram sockets in general).
   156  TEST_P(RawSocketTest, ShutdownWriteNoop) {
   157    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   158  
   159    ASSERT_THAT(
   160        connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   161        SyscallSucceeds());
   162    ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallSucceeds());
   163  
   164    // Arbitrary.
   165    constexpr char kBuf[] = "noop";
   166    ASSERT_THAT(RetryEINTR(write)(s_, kBuf, sizeof(kBuf)),
   167                SyscallSucceedsWithValue(sizeof(kBuf)));
   168  }
   169  
   170  // Shutdown is a no-op for raw sockets (and datagram sockets in general).
   171  TEST_P(RawSocketTest, ShutdownReadNoop) {
   172    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   173  
   174    ASSERT_THAT(
   175        connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   176        SyscallSucceeds());
   177    ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds());
   178  
   179    // Arbitrary.
   180    constexpr char kBuf[] = "gdg";
   181    ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
   182  
   183    std::vector<char> c(sizeof(kBuf) + HdrLen());
   184    ASSERT_THAT(read(s_, c.data(), c.size()), SyscallSucceedsWithValue(c.size()));
   185  }
   186  
   187  // Test that listen() fails.
   188  TEST_P(RawSocketTest, FailListen) {
   189    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   190  
   191    ASSERT_THAT(listen(s_, 1), SyscallFailsWithErrno(ENOTSUP));
   192  }
   193  
   194  // Test that accept() fails.
   195  TEST_P(RawSocketTest, FailAccept) {
   196    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   197  
   198    struct sockaddr saddr;
   199    socklen_t addrlen;
   200    ASSERT_THAT(accept(s_, &saddr, &addrlen), SyscallFailsWithErrno(ENOTSUP));
   201  }
   202  
   203  TEST_P(RawSocketTest, BindThenGetSockName) {
   204    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   205  
   206    struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_);
   207    ASSERT_THAT(bind(s_, addr, AddrLen()), SyscallSucceeds());
   208    struct sockaddr_storage saddr_storage;
   209    struct sockaddr* saddr = reinterpret_cast<struct sockaddr*>(&saddr_storage);
   210    socklen_t saddrlen = AddrLen();
   211    ASSERT_THAT(getsockname(s_, saddr, &saddrlen), SyscallSucceeds());
   212    ASSERT_EQ(saddrlen, AddrLen());
   213  
   214    // The port is expected to hold the protocol number.
   215    EXPECT_EQ(Port(saddr), Protocol());
   216  
   217    char addrbuf[INET6_ADDRSTRLEN], saddrbuf[INET6_ADDRSTRLEN];
   218    const char* addrstr =
   219        inet_ntop(addr->sa_family, Addr(addr), addrbuf, sizeof(addrbuf));
   220    ASSERT_NE(addrstr, nullptr);
   221    const char* saddrstr =
   222        inet_ntop(saddr->sa_family, Addr(saddr), saddrbuf, sizeof(saddrbuf));
   223    ASSERT_NE(saddrstr, nullptr);
   224    EXPECT_STREQ(saddrstr, addrstr);
   225  }
   226  
   227  TEST_P(RawSocketTest, ConnectThenGetSockName) {
   228    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   229  
   230    struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_);
   231    ASSERT_THAT(connect(s_, addr, AddrLen()), SyscallSucceeds());
   232    struct sockaddr_storage saddr_storage;
   233    struct sockaddr* saddr = reinterpret_cast<struct sockaddr*>(&saddr_storage);
   234    socklen_t saddrlen = AddrLen();
   235    ASSERT_THAT(getsockname(s_, saddr, &saddrlen), SyscallSucceeds());
   236    ASSERT_EQ(saddrlen, AddrLen());
   237  
   238    // The port is expected to hold the protocol number.
   239    EXPECT_EQ(Port(saddr), Protocol());
   240  
   241    char addrbuf[INET6_ADDRSTRLEN], saddrbuf[INET6_ADDRSTRLEN];
   242    const char* addrstr =
   243        inet_ntop(addr->sa_family, Addr(addr), addrbuf, sizeof(addrbuf));
   244    ASSERT_NE(addrstr, nullptr);
   245    const char* saddrstr =
   246        inet_ntop(saddr->sa_family, Addr(saddr), saddrbuf, sizeof(saddrbuf));
   247    ASSERT_NE(saddrstr, nullptr);
   248    EXPECT_STREQ(saddrstr, addrstr);
   249  }
   250  
   251  // Test that getpeername() returns nothing before connect().
   252  TEST_P(RawSocketTest, FailGetPeerNameBeforeConnect) {
   253    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   254  
   255    struct sockaddr saddr;
   256    socklen_t addrlen = sizeof(saddr);
   257    ASSERT_THAT(getpeername(s_, &saddr, &addrlen),
   258                SyscallFailsWithErrno(ENOTCONN));
   259  }
   260  
   261  // Test that getpeername() returns something after connect().
   262  TEST_P(RawSocketTest, GetPeerName) {
   263    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   264  
   265    ASSERT_THAT(
   266        connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   267        SyscallSucceeds());
   268    struct sockaddr saddr;
   269    socklen_t addrlen = sizeof(saddr);
   270    ASSERT_THAT(getpeername(s_, &saddr, &addrlen),
   271                SyscallFailsWithErrno(ENOTCONN));
   272    ASSERT_GT(addrlen, 0);
   273  }
   274  
   275  // Test that the socket is writable immediately.
   276  TEST_P(RawSocketTest, PollWritableImmediately) {
   277    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   278  
   279    struct pollfd pfd = {};
   280    pfd.fd = s_;
   281    pfd.events = POLLOUT;
   282    ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 10000), SyscallSucceedsWithValue(1));
   283  }
   284  
   285  // Test that the socket isn't readable before receiving anything.
   286  TEST_P(RawSocketTest, PollNotReadableInitially) {
   287    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   288  
   289    // Try to receive data with MSG_DONTWAIT, which returns immediately if there's
   290    // nothing to be read.
   291    char buf[117];
   292    ASSERT_THAT(RetryEINTR(recv)(s_, buf, sizeof(buf), MSG_DONTWAIT),
   293                SyscallFailsWithErrno(EAGAIN));
   294  }
   295  
   296  // Test that the socket becomes readable once something is written to it.
   297  TEST_P(RawSocketTest, PollTriggeredOnWrite) {
   298    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   299  
   300    // Write something so that there's data to be read.
   301    // Arbitrary.
   302    constexpr char kBuf[] = "JP5";
   303    ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
   304  
   305    struct pollfd pfd = {};
   306    pfd.fd = s_;
   307    pfd.events = POLLIN;
   308    ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 10000), SyscallSucceedsWithValue(1));
   309  }
   310  
   311  // Test that we can connect() to a valid IP (loopback).
   312  TEST_P(RawSocketTest, ConnectToLoopback) {
   313    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   314  
   315    ASSERT_THAT(
   316        connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   317        SyscallSucceeds());
   318  }
   319  
   320  // Test that calling send() without connect() fails.
   321  TEST_P(RawSocketTest, SendWithoutConnectFails) {
   322    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   323  
   324    // Arbitrary.
   325    constexpr char kBuf[] = "Endgame was good";
   326    ASSERT_THAT(send(s_, kBuf, sizeof(kBuf), 0),
   327                SyscallFailsWithErrno(EDESTADDRREQ));
   328  }
   329  
   330  // Wildcard Bind.
   331  TEST_P(RawSocketTest, BindToWildcard) {
   332    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   333    struct sockaddr_storage addr;
   334    addr = {};
   335  
   336    // We don't set ports because raw sockets don't have a notion of ports.
   337    if (Family() == AF_INET) {
   338      struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr);
   339      sin->sin_family = AF_INET;
   340      sin->sin_addr.s_addr = htonl(INADDR_ANY);
   341    } else {
   342      struct sockaddr_in6* sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr);
   343      sin6->sin6_family = AF_INET6;
   344      sin6->sin6_addr = in6addr_any;
   345    }
   346  
   347    ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   348                SyscallSucceeds());
   349  }
   350  
   351  // Bind to localhost.
   352  TEST_P(RawSocketTest, BindToLocalhost) {
   353    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   354  
   355    ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   356                SyscallSucceeds());
   357  }
   358  
   359  // Bind to a different address.
   360  TEST_P(RawSocketTest, BindToInvalid) {
   361    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   362  
   363    struct sockaddr_storage bind_addr = addr_;
   364    if (Family() == AF_INET) {
   365      struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&bind_addr);
   366      sin->sin_addr = {1};  // 1.0.0.0 - An address that we can't bind to.
   367    } else {
   368      struct sockaddr_in6* sin6 =
   369          reinterpret_cast<struct sockaddr_in6*>(&bind_addr);
   370      memset(&sin6->sin6_addr.s6_addr, 0, sizeof(sin6->sin6_addr.s6_addr));
   371      sin6->sin6_addr.s6_addr[0] = 1;  // 1: - An address that we can't bind to.
   372    }
   373    ASSERT_THAT(
   374        bind(s_, reinterpret_cast<struct sockaddr*>(&bind_addr), AddrLen()),
   375        SyscallFailsWithErrno(EADDRNOTAVAIL));
   376  }
   377  
   378  // Send and receive an packet.
   379  TEST_P(RawSocketTest, SendAndReceive) {
   380    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   381  
   382    // Arbitrary.
   383    constexpr char kBuf[] = "TB12";
   384    ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
   385  
   386    // Receive the packet and make sure it's identical.
   387    std::vector<char> recv_buf(sizeof(kBuf) + HdrLen());
   388    ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
   389    EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0);
   390  }
   391  
   392  // We should be able to create multiple raw sockets for the same protocol and
   393  // receive the same packet on both.
   394  TEST_P(RawSocketTest, MultipleSocketReceive) {
   395    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   396  
   397    int s2;
   398    ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds());
   399  
   400    // Arbitrary.
   401    constexpr char kBuf[] = "TB10";
   402    ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
   403  
   404    // Receive it on socket 1.
   405    std::vector<char> recv_buf1(sizeof(kBuf) + HdrLen());
   406    ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf1.data(), recv_buf1.size()));
   407  
   408    // Receive it on socket 2.
   409    std::vector<char> recv_buf2(sizeof(kBuf) + HdrLen());
   410    ASSERT_NO_FATAL_FAILURE(
   411        ReceiveBufFrom(s2, recv_buf2.data(), recv_buf2.size()));
   412  
   413    EXPECT_EQ(memcmp(recv_buf1.data() + HdrLen(), recv_buf2.data() + HdrLen(),
   414                     sizeof(kBuf)),
   415              0);
   416  
   417    ASSERT_THAT(close(s2), SyscallSucceeds());
   418  }
   419  
   420  // Test that connect sends packets to the right place.
   421  TEST_P(RawSocketTest, SendAndReceiveViaConnect) {
   422    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   423  
   424    ASSERT_THAT(
   425        connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   426        SyscallSucceeds());
   427  
   428    // Arbitrary.
   429    constexpr char kBuf[] = "JH4";
   430    ASSERT_THAT(send(s_, kBuf, sizeof(kBuf), 0),
   431                SyscallSucceedsWithValue(sizeof(kBuf)));
   432  
   433    // Receive the packet and make sure it's identical.
   434    std::vector<char> recv_buf(sizeof(kBuf) + HdrLen());
   435    ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
   436    EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0);
   437  }
   438  
   439  // Bind to localhost, then send and receive packets.
   440  TEST_P(RawSocketTest, BindSendAndReceive) {
   441    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   442  
   443    ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   444                SyscallSucceeds());
   445  
   446    // Arbitrary.
   447    constexpr char kBuf[] = "DR16";
   448    ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
   449  
   450    // Receive the packet and make sure it's identical.
   451    std::vector<char> recv_buf(sizeof(kBuf) + HdrLen());
   452    ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
   453    EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0);
   454  }
   455  
   456  // Bind and connect to localhost and send/receive packets.
   457  TEST_P(RawSocketTest, BindConnectSendAndReceive) {
   458    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   459  
   460    ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   461                SyscallSucceeds());
   462    ASSERT_THAT(
   463        connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   464        SyscallSucceeds());
   465  
   466    // Arbitrary.
   467    constexpr char kBuf[] = "DG88";
   468    ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
   469  
   470    // Receive the packet and make sure it's identical.
   471    std::vector<char> recv_buf(sizeof(kBuf) + HdrLen());
   472    ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
   473    EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0);
   474  }
   475  
   476  // Check that setting SO_RCVBUF below min is clamped to the minimum
   477  // receive buffer size.
   478  TEST_P(RawSocketTest, SetSocketRecvBufBelowMin) {
   479    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   480  
   481    // Discover minimum receive buf size by trying to set it to zero.
   482    // See:
   483    // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820
   484    constexpr int kRcvBufSz = 0;
   485    ASSERT_THAT(
   486        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
   487        SyscallSucceeds());
   488  
   489    int min = 0;
   490    socklen_t min_len = sizeof(min);
   491    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
   492                SyscallSucceeds());
   493  
   494    // Linux doubles the value so let's use a value that when doubled will still
   495    // be smaller than min.
   496    int below_min = min / 2 - 1;
   497    ASSERT_THAT(
   498        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &below_min, sizeof(below_min)),
   499        SyscallSucceeds());
   500  
   501    int val = 0;
   502    socklen_t val_len = sizeof(val);
   503    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
   504                SyscallSucceeds());
   505  
   506    ASSERT_EQ(min, val);
   507  }
   508  
   509  // Check that setting SO_RCVBUF above max is clamped to the maximum
   510  // receive buffer size.
   511  TEST_P(RawSocketTest, SetSocketRecvBufAboveMax) {
   512    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   513  
   514    // Discover max buf size by trying to set the largest possible buffer size.
   515    constexpr int kRcvBufSz = 0xffffffff;
   516    ASSERT_THAT(
   517        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
   518        SyscallSucceeds());
   519  
   520    int max = 0;
   521    socklen_t max_len = sizeof(max);
   522    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len),
   523                SyscallSucceeds());
   524  
   525    int above_max = max + 1;
   526    ASSERT_THAT(
   527        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &above_max, sizeof(above_max)),
   528        SyscallSucceeds());
   529  
   530    int val = 0;
   531    socklen_t val_len = sizeof(val);
   532    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
   533                SyscallSucceeds());
   534    ASSERT_EQ(max, val);
   535  }
   536  
   537  // Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored.
   538  TEST_P(RawSocketTest, SetSocketRecvBuf) {
   539    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   540  
   541    int max = 0;
   542    int min = 0;
   543    {
   544      // Discover max buf size by trying to set a really large buffer size.
   545      constexpr int kRcvBufSz = 0xffffffff;
   546      ASSERT_THAT(
   547          setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
   548          SyscallSucceeds());
   549  
   550      max = 0;
   551      socklen_t max_len = sizeof(max);
   552      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len),
   553                  SyscallSucceeds());
   554    }
   555  
   556    {
   557      // Discover minimum buffer size by trying to set a zero size receive buffer
   558      // size.
   559      // See:
   560      // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820
   561      constexpr int kRcvBufSz = 0;
   562      ASSERT_THAT(
   563          setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
   564          SyscallSucceeds());
   565  
   566      socklen_t min_len = sizeof(min);
   567      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
   568                  SyscallSucceeds());
   569    }
   570  
   571    int quarter_sz = min + (max - min) / 4;
   572    ASSERT_THAT(
   573        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &quarter_sz, sizeof(quarter_sz)),
   574        SyscallSucceeds());
   575  
   576    int val = 0;
   577    socklen_t val_len = sizeof(val);
   578    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
   579                SyscallSucceeds());
   580  
   581    // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF.
   582    quarter_sz *= 2;
   583    ASSERT_EQ(quarter_sz, val);
   584  }
   585  
   586  // Check that setting SO_SNDBUF below min is clamped to the minimum
   587  // receive buffer size.
   588  TEST_P(RawSocketTest, SetSocketSendBufBelowMin) {
   589    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   590  
   591    // Discover minimum buffer size by trying to set it to zero.
   592    constexpr int kSndBufSz = 0;
   593    ASSERT_THAT(
   594        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
   595        SyscallSucceeds());
   596  
   597    int min = 0;
   598    socklen_t min_len = sizeof(min);
   599    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len),
   600                SyscallSucceeds());
   601  
   602    // Linux doubles the value so let's use a value that when doubled will still
   603    // be smaller than min.
   604    int below_min = min / 2 - 1;
   605    ASSERT_THAT(
   606        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &below_min, sizeof(below_min)),
   607        SyscallSucceeds());
   608  
   609    int val = 0;
   610    socklen_t val_len = sizeof(val);
   611    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
   612                SyscallSucceeds());
   613  
   614    ASSERT_EQ(min, val);
   615  }
   616  
   617  // Check that setting SO_SNDBUF above max is clamped to the maximum
   618  // send buffer size.
   619  TEST_P(RawSocketTest, SetSocketSendBufAboveMax) {
   620    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   621  
   622    // Discover maximum buffer size by trying to set it to a large value.
   623    constexpr int kSndBufSz = 0xffffffff;
   624    ASSERT_THAT(
   625        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
   626        SyscallSucceeds());
   627  
   628    int max = 0;
   629    socklen_t max_len = sizeof(max);
   630    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len),
   631                SyscallSucceeds());
   632  
   633    int above_max = max + 1;
   634    ASSERT_THAT(
   635        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &above_max, sizeof(above_max)),
   636        SyscallSucceeds());
   637  
   638    int val = 0;
   639    socklen_t val_len = sizeof(val);
   640    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
   641                SyscallSucceeds());
   642    ASSERT_EQ(max, val);
   643  }
   644  
   645  // Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored.
   646  TEST_P(RawSocketTest, SetSocketSendBuf) {
   647    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   648  
   649    int max = 0;
   650    int min = 0;
   651    {
   652      // Discover maximum buffer size by trying to set it to a large value.
   653      constexpr int kSndBufSz = 0xffffffff;
   654      ASSERT_THAT(
   655          setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
   656          SyscallSucceeds());
   657  
   658      max = 0;
   659      socklen_t max_len = sizeof(max);
   660      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len),
   661                  SyscallSucceeds());
   662    }
   663  
   664    {
   665      // Discover minimum buffer size by trying to set it to zero.
   666      constexpr int kSndBufSz = 0;
   667      ASSERT_THAT(
   668          setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
   669          SyscallSucceeds());
   670  
   671      socklen_t min_len = sizeof(min);
   672      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len),
   673                  SyscallSucceeds());
   674    }
   675  
   676    int quarter_sz = min + (max - min) / 4;
   677    ASSERT_THAT(
   678        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &quarter_sz, sizeof(quarter_sz)),
   679        SyscallSucceeds());
   680  
   681    int val = 0;
   682    socklen_t val_len = sizeof(val);
   683    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
   684                SyscallSucceeds());
   685  
   686    quarter_sz *= 2;
   687    ASSERT_EQ(quarter_sz, val);
   688  }
   689  
   690  void randomizePacket(char* buf, size_t len, int proto) {
   691    RandomizeBuffer(buf, len);
   692    // When testing with TCP sockets, ensure the RST flag is set. This is to
   693    // prevent the TCP stack from generating RSTs packets for unknown endpoints.
   694    if (proto == IPPROTO_TCP && len > TCPHDR_FLAGS_OFF)
   695      buf[TCPHDR_FLAGS_OFF] |= TCPHDR_RST;
   696  }
   697  
   698  // Test that receive buffer limits are not enforced when the recv buffer is
   699  // empty.
   700  TEST_P(RawSocketTest, RecvBufLimitsEmptyRecvBuffer) {
   701    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   702  
   703    ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   704                SyscallSucceeds());
   705    ASSERT_THAT(
   706        connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   707        SyscallSucceeds());
   708  
   709    int min = 0;
   710    {
   711      // Discover minimum buffer size by trying to set it to zero.
   712      constexpr int kRcvBufSz = 0;
   713      ASSERT_THAT(
   714          setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
   715          SyscallSucceeds());
   716  
   717      socklen_t min_len = sizeof(min);
   718      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
   719                  SyscallSucceeds());
   720    }
   721  
   722    {
   723      // Send data of size min and verify that it's received.
   724      std::vector<char> buf(min);
   725      randomizePacket(buf.data(), buf.size(), Protocol());
   726      ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
   727  
   728      // Receive the packet and make sure it's identical.
   729      std::vector<char> recv_buf(buf.size() + HdrLen());
   730      ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
   731      EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), 0);
   732    }
   733  
   734    {
   735      // Send data of size min + 1 and verify that its received. Both linux and
   736      // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer
   737      // is currently empty.
   738      std::vector<char> buf(min + 1);
   739      randomizePacket(buf.data(), buf.size(), Protocol());
   740      ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
   741      // Receive the packet and make sure it's identical.
   742      std::vector<char> recv_buf(buf.size() + HdrLen());
   743      ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
   744      EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), 0);
   745    }
   746  }
   747  
   748  TEST_P(RawSocketTest, RecvBufLimits) {
   749    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   750  
   751    ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   752                SyscallSucceeds());
   753    ASSERT_THAT(
   754        connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   755        SyscallSucceeds());
   756  
   757    int min = 0;
   758    {
   759      // Discover minimum buffer size by trying to set it to zero.
   760      constexpr int kRcvBufSz = 0;
   761      ASSERT_THAT(
   762          setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
   763          SyscallSucceeds());
   764  
   765      socklen_t min_len = sizeof(min);
   766      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
   767                  SyscallSucceeds());
   768    }
   769  
   770    // Now set the limit to min * 2.
   771    int new_rcv_buf_sz = min * 2;
   772    ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz,
   773                           sizeof(new_rcv_buf_sz)),
   774                SyscallSucceeds());
   775    int rcv_buf_sz = 0;
   776    {
   777      socklen_t rcv_buf_len = sizeof(rcv_buf_sz);
   778      ASSERT_THAT(
   779          getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, &rcv_buf_len),
   780          SyscallSucceeds());
   781    }
   782  
   783    // Set a receive timeout so that we don't block forever on reads if the test
   784    // fails.
   785    struct timeval tv {
   786      .tv_sec = 1, .tv_usec = 0,
   787    };
   788    ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
   789                SyscallSucceeds());
   790  
   791    {
   792      std::vector<char> buf(min);
   793      randomizePacket(buf.data(), buf.size(), Protocol());
   794  
   795      ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
   796      ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
   797      ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
   798      ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
   799      int sent = 4;
   800      if (IsRunningOnGvisor() && !IsRunningWithHostinet()) {
   801        // Linux seems to drop the 4th packet even though technically it should
   802        // fit in the receive buffer.
   803        ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
   804        sent++;
   805      }
   806  
   807      // Verify that the expected number of packets are available to be read.
   808      for (int i = 0; i < sent - 1; i++) {
   809        // Receive the packet and make sure it's identical.
   810        std::vector<char> recv_buf(buf.size() + HdrLen());
   811        ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
   812        EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), 0);
   813      }
   814  
   815      // Assert that the last packet is dropped because the receive buffer should
   816      // be full after the first four packets.
   817      std::vector<char> recv_buf(buf.size() + HdrLen());
   818      struct iovec iov = {};
   819      iov.iov_base = static_cast<void*>(const_cast<char*>(recv_buf.data()));
   820      iov.iov_len = buf.size();
   821      struct msghdr msg = {};
   822      msg.msg_iov = &iov;
   823      msg.msg_iovlen = 1;
   824      msg.msg_control = NULL;
   825      msg.msg_controllen = 0;
   826      msg.msg_flags = 0;
   827      ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, MSG_DONTWAIT),
   828                  SyscallFailsWithErrno(EAGAIN));
   829    }
   830  }
   831  
   832  void RawSocketTest::SendBuf(const char* buf, int buf_len) {
   833    // It's safe to use const_cast here because sendmsg won't modify the iovec or
   834    // address.
   835    struct iovec iov = {};
   836    iov.iov_base = static_cast<void*>(const_cast<char*>(buf));
   837    iov.iov_len = static_cast<size_t>(buf_len);
   838    struct msghdr msg = {};
   839    msg.msg_name = static_cast<void*>(&addr_);
   840    msg.msg_namelen = AddrLen();
   841    msg.msg_iov = &iov;
   842    msg.msg_iovlen = 1;
   843    msg.msg_control = NULL;
   844    msg.msg_controllen = 0;
   845    msg.msg_flags = 0;
   846    ASSERT_THAT(sendmsg(s_, &msg, 0), SyscallSucceedsWithValue(buf_len));
   847  }
   848  
   849  void RawSocketTest::ReceiveBuf(char* recv_buf, size_t recv_buf_len) {
   850    ASSERT_NO_FATAL_FAILURE(ReceiveBufFrom(s_, recv_buf, recv_buf_len));
   851  }
   852  
   853  void RawSocketTest::ReceiveBufFrom(int sock, char* recv_buf,
   854                                     size_t recv_buf_len) {
   855    ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sock, recv_buf, recv_buf_len));
   856  }
   857  
   858  TEST_P(RawSocketTest, SetSocketDetachFilterNoInstalledFilter) {
   859    // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER.
   860    if (IsRunningOnGvisor()) {
   861      constexpr int val = 0;
   862      ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)),
   863                  SyscallSucceeds());
   864      return;
   865    }
   866  
   867    constexpr int val = 0;
   868    ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)),
   869                SyscallFailsWithErrno(ENOENT));
   870  }
   871  
   872  TEST_P(RawSocketTest, GetSocketDetachFilter) {
   873    int val = 0;
   874    socklen_t val_len = sizeof(val);
   875    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, &val_len),
   876                SyscallFailsWithErrno(ENOPROTOOPT));
   877  }
   878  
   879  TEST_P(RawSocketTest, BindToDevice) {
   880    constexpr char kLoopbackDeviceName[] = "lo";
   881    ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_BINDTODEVICE, &kLoopbackDeviceName,
   882                           sizeof(kLoopbackDeviceName)),
   883                SyscallSucceeds());
   884  
   885    char got[IFNAMSIZ];
   886    socklen_t got_len = sizeof(got);
   887    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_BINDTODEVICE, &got, &got_len),
   888                SyscallSucceeds());
   889    ASSERT_EQ(got_len, sizeof(kLoopbackDeviceName));
   890    EXPECT_EQ(strcmp(kLoopbackDeviceName, got), 0);
   891  }
   892  
   893  // AF_INET6+SOCK_RAW+IPPROTO_RAW sockets can be created, but not written to.
   894  TEST(RawSocketTest, IPv6ProtoRaw) {
   895    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   896  
   897    int sock;
   898    ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW),
   899                SyscallSucceeds());
   900  
   901    // Verify that writing yields EINVAL.
   902    char buf[] = "This is such a weird little edge case";
   903    struct sockaddr_in6 sin6 = {};
   904    sin6.sin6_family = AF_INET6;
   905    sin6.sin6_addr = in6addr_loopback;
   906    ASSERT_THAT(sendto(sock, buf, sizeof(buf), 0 /* flags */,
   907                       reinterpret_cast<struct sockaddr*>(&sin6), sizeof(sin6)),
   908                SyscallFailsWithErrno(EINVAL));
   909  }
   910  
   911  TEST(RawSocketTest, IPv6SendMsg) {
   912    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   913  
   914    int sock;
   915    ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_TCP),
   916                SyscallSucceeds());
   917  
   918    char kBuf[] = "hello";
   919    struct iovec iov = {};
   920    iov.iov_base = static_cast<void*>(const_cast<char*>(kBuf));
   921    iov.iov_len = static_cast<size_t>(sizeof(kBuf));
   922  
   923    struct sockaddr_storage addr = {};
   924    struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr);
   925    sin->sin_family = AF_INET;
   926    sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
   927  
   928    struct msghdr msg = {};
   929    msg.msg_name = static_cast<void*>(&addr);
   930    msg.msg_namelen = sizeof(sockaddr_in);
   931    msg.msg_iov = &iov;
   932    msg.msg_iovlen = 1;
   933    msg.msg_control = NULL;
   934    msg.msg_controllen = 0;
   935    msg.msg_flags = 0;
   936    ASSERT_THAT(sendmsg(sock, &msg, 0), SyscallFailsWithErrno(EINVAL));
   937  }
   938  
   939  TEST_P(RawSocketTest, ConnectOnIPv6Socket) {
   940    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   941  
   942    int sock;
   943    ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_TCP),
   944                SyscallSucceeds());
   945  
   946    struct sockaddr_storage addr = {};
   947    struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr);
   948    sin->sin_family = AF_INET;
   949    sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
   950  
   951    ASSERT_THAT(connect(sock, reinterpret_cast<struct sockaddr*>(&addr),
   952                        sizeof(sockaddr_in6)),
   953                SyscallFailsWithErrno(EAFNOSUPPORT));
   954  }
   955  
   956  INSTANTIATE_TEST_SUITE_P(
   957      AllInetTests, RawSocketTest,
   958      ::testing::Combine(::testing::Values(IPPROTO_TCP, IPPROTO_UDP),
   959                         ::testing::Values(AF_INET, AF_INET6)));
   960  
   961  void TestRawSocketMaybeBindReceive(bool do_bind) {
   962    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
   963  
   964    constexpr char payload[] = "abcdefgh";
   965  
   966    const sockaddr_in addr = {
   967        .sin_family = AF_INET,
   968        .sin_addr = {.s_addr = htonl(INADDR_LOOPBACK)},
   969    };
   970  
   971    FileDescriptor udp_sock =
   972        ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0));
   973    sockaddr_in udp_sock_bind_addr = addr;
   974    socklen_t udp_sock_bind_addr_len = sizeof(udp_sock_bind_addr);
   975    ASSERT_THAT(bind(udp_sock.get(),
   976                     reinterpret_cast<const sockaddr*>(&udp_sock_bind_addr),
   977                     sizeof(udp_sock_bind_addr)),
   978                SyscallSucceeds());
   979    ASSERT_THAT(getsockname(udp_sock.get(),
   980                            reinterpret_cast<sockaddr*>(&udp_sock_bind_addr),
   981                            &udp_sock_bind_addr_len),
   982                SyscallSucceeds());
   983    ASSERT_EQ(udp_sock_bind_addr_len, sizeof(udp_sock_bind_addr));
   984  
   985    FileDescriptor raw_sock =
   986        ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP));
   987  
   988    auto test_recv = [&](const char* scope, uint32_t expected_destination) {
   989      SCOPED_TRACE(scope);
   990  
   991      constexpr int kInfinitePollTimeout = -1;
   992      pollfd pfd = {
   993          .fd = raw_sock.get(),
   994          .events = POLLIN,
   995      };
   996      ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, kInfinitePollTimeout),
   997                  SyscallSucceedsWithValue(1));
   998  
   999      struct ipv4_udp_packet {
  1000        iphdr ip;
  1001        udphdr udp;
  1002        char data[sizeof(payload)];
  1003  
  1004        // Used to make sure only the required space is used.
  1005        char unused_space;
  1006      } ABSL_ATTRIBUTE_PACKED;
  1007      constexpr size_t kExpectedIPPacketSize =
  1008          offsetof(ipv4_udp_packet, unused_space);
  1009  
  1010      // Receive the whole IPv4 packet on the raw socket.
  1011      ipv4_udp_packet read_raw_packet;
  1012      sockaddr_in peer;
  1013      socklen_t peerlen = sizeof(peer);
  1014      ASSERT_EQ(
  1015          recvfrom(raw_sock.get(), reinterpret_cast<char*>(&read_raw_packet),
  1016                   sizeof(read_raw_packet), 0 /* flags */,
  1017                   reinterpret_cast<sockaddr*>(&peer), &peerlen),
  1018          static_cast<ssize_t>(kExpectedIPPacketSize))
  1019          << strerror(errno);
  1020      ASSERT_EQ(peerlen, sizeof(peer));
  1021      EXPECT_EQ(read_raw_packet.ip.version, static_cast<unsigned int>(IPVERSION));
  1022      // IHL holds the number of header bytes in 4 byte units.
  1023      EXPECT_EQ(read_raw_packet.ip.ihl, sizeof(read_raw_packet.ip) / 4);
  1024      EXPECT_EQ(ntohs(read_raw_packet.ip.tot_len), kExpectedIPPacketSize);
  1025      EXPECT_EQ(ntohs(read_raw_packet.ip.frag_off) & IP_OFFMASK, 0);
  1026      EXPECT_EQ(read_raw_packet.ip.protocol, SOL_UDP);
  1027      EXPECT_EQ(ntohl(read_raw_packet.ip.saddr), INADDR_LOOPBACK);
  1028      EXPECT_EQ(ntohl(read_raw_packet.ip.daddr), expected_destination);
  1029      EXPECT_EQ(read_raw_packet.udp.source, udp_sock_bind_addr.sin_port);
  1030      EXPECT_EQ(read_raw_packet.udp.dest, udp_sock_bind_addr.sin_port);
  1031      EXPECT_EQ(ntohs(read_raw_packet.udp.len),
  1032                kExpectedIPPacketSize - sizeof(read_raw_packet.ip));
  1033      for (size_t i = 0; i < sizeof(payload); i++) {
  1034        EXPECT_EQ(read_raw_packet.data[i], payload[i])
  1035            << "byte mismatch @ idx=" << i;
  1036      }
  1037      EXPECT_EQ(peer.sin_family, AF_INET);
  1038      EXPECT_EQ(peer.sin_port, 0);
  1039      EXPECT_EQ(ntohl(peer.sin_addr.s_addr), INADDR_LOOPBACK);
  1040    };
  1041  
  1042    if (do_bind) {
  1043      ASSERT_THAT(bind(raw_sock.get(), reinterpret_cast<const sockaddr*>(&addr),
  1044                       sizeof(addr)),
  1045                  SyscallSucceeds());
  1046    }
  1047  
  1048    constexpr int kSendToFlags = 0;
  1049    sockaddr_in different_addr = udp_sock_bind_addr;
  1050    different_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK + 1);
  1051    ASSERT_THAT(sendto(udp_sock.get(), payload, sizeof(payload), kSendToFlags,
  1052                       reinterpret_cast<const sockaddr*>(&different_addr),
  1053                       sizeof(different_addr)),
  1054                SyscallSucceedsWithValue(sizeof(payload)));
  1055    if (!do_bind) {
  1056      ASSERT_NO_FATAL_FAILURE(
  1057          test_recv("different_addr", ntohl(different_addr.sin_addr.s_addr)));
  1058    }
  1059    ASSERT_THAT(sendto(udp_sock.get(), payload, sizeof(payload), kSendToFlags,
  1060                       reinterpret_cast<const sockaddr*>(&udp_sock_bind_addr),
  1061                       sizeof(udp_sock_bind_addr)),
  1062                SyscallSucceedsWithValue(sizeof(payload)));
  1063    ASSERT_NO_FATAL_FAILURE(
  1064        test_recv("addr", ntohl(udp_sock_bind_addr.sin_addr.s_addr)));
  1065  }
  1066  
  1067  TEST(RawSocketTest, UnboundReceive) {
  1068    // Test that a raw socket receives packets destined to any address if it is
  1069    // not bound to an address.
  1070    ASSERT_NO_FATAL_FAILURE(TestRawSocketMaybeBindReceive(false /* do_bind */));
  1071  }
  1072  
  1073  TEST(RawSocketTest, BindReceive) {
  1074    // Test that a raw socket only receives packets destined to the address it is
  1075    // bound to.
  1076    ASSERT_NO_FATAL_FAILURE(TestRawSocketMaybeBindReceive(true /* do_bind */));
  1077  }
  1078  
  1079  TEST(RawSocketTest, ReceiveIPPacketInfo) {
  1080    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
  1081  
  1082    FileDescriptor raw =
  1083        ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP));
  1084  
  1085    const sockaddr_in addr_ = {
  1086        .sin_family = AF_INET,
  1087        .sin_addr = {.s_addr = htonl(INADDR_LOOPBACK)},
  1088    };
  1089    ASSERT_THAT(
  1090        bind(raw.get(), reinterpret_cast<const sockaddr*>(&addr_), sizeof(addr_)),
  1091        SyscallSucceeds());
  1092  
  1093    // Register to receive IP packet info.
  1094    ASSERT_THAT(setsockopt(raw.get(), IPPROTO_IP, IP_PKTINFO, &kSockOptOn,
  1095                           sizeof(kSockOptOn)),
  1096                SyscallSucceeds());
  1097  
  1098    constexpr char send_buf[] = "malformed UDP";
  1099    ASSERT_THAT(sendto(raw.get(), send_buf, sizeof(send_buf), /*flags=*/0,
  1100                       reinterpret_cast<const sockaddr*>(&addr_), sizeof(addr_)),
  1101                SyscallSucceedsWithValue(sizeof(send_buf)));
  1102  
  1103    struct {
  1104      iphdr ip;
  1105      char data[sizeof(send_buf)];
  1106  
  1107      // Extra space in the receive buffer should be unused.
  1108      char unused_space;
  1109    } ABSL_ATTRIBUTE_PACKED recv_buf;
  1110  
  1111    size_t recv_buf_len = sizeof(recv_buf);
  1112    in_pktinfo received_pktinfo;
  1113    ASSERT_NO_FATAL_FAILURE(RecvPktInfo(raw.get(),
  1114                                        reinterpret_cast<char*>(&recv_buf),
  1115                                        &recv_buf_len, &received_pktinfo));
  1116  
  1117    EXPECT_EQ(recv_buf_len, sizeof(iphdr) + sizeof(send_buf));
  1118    EXPECT_EQ(memcmp(send_buf, &recv_buf.data, sizeof(send_buf)), 0);
  1119    EXPECT_EQ(recv_buf.ip.version, static_cast<unsigned int>(IPVERSION));
  1120    // IHL holds the number of header bytes in 4 byte units.
  1121    EXPECT_EQ(recv_buf.ip.ihl, sizeof(iphdr) / 4);
  1122    EXPECT_EQ(ntohs(recv_buf.ip.tot_len), sizeof(iphdr) + sizeof(send_buf));
  1123    EXPECT_EQ(recv_buf.ip.protocol, IPPROTO_UDP);
  1124    EXPECT_EQ(ntohl(recv_buf.ip.saddr), INADDR_LOOPBACK);
  1125    EXPECT_EQ(ntohl(recv_buf.ip.daddr), INADDR_LOOPBACK);
  1126  
  1127    EXPECT_EQ(received_pktinfo.ipi_ifindex,
  1128              ASSERT_NO_ERRNO_AND_VALUE(GetLoopbackIndex()));
  1129    EXPECT_EQ(ntohl(received_pktinfo.ipi_spec_dst.s_addr), INADDR_LOOPBACK);
  1130    EXPECT_EQ(ntohl(received_pktinfo.ipi_addr.s_addr), INADDR_LOOPBACK);
  1131  }
  1132  
  1133  TEST(RawSocketTest, ReceiveIPv6PacketInfo) {
  1134    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
  1135  
  1136    FileDescriptor raw =
  1137        ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_RAW, IPPROTO_UDP));
  1138  
  1139    const sockaddr_in6 addr_ = {
  1140        .sin6_family = AF_INET6,
  1141        .sin6_addr = in6addr_loopback,
  1142    };
  1143    ASSERT_THAT(
  1144        bind(raw.get(), reinterpret_cast<const sockaddr*>(&addr_), sizeof(addr_)),
  1145        SyscallSucceeds());
  1146  
  1147    // Register to receive IPv6 packet info.
  1148    ASSERT_THAT(setsockopt(raw.get(), IPPROTO_IPV6, IPV6_RECVPKTINFO, &kSockOptOn,
  1149                           sizeof(kSockOptOn)),
  1150                SyscallSucceeds());
  1151  
  1152    constexpr char send_buf[] = "malformed UDP";
  1153    ASSERT_THAT(sendto(raw.get(), send_buf, sizeof(send_buf), /*flags=*/0,
  1154                       reinterpret_cast<const sockaddr*>(&addr_), sizeof(addr_)),
  1155                SyscallSucceedsWithValue(sizeof(send_buf)));
  1156  
  1157    char recv_buf[sizeof(send_buf) + 1];
  1158    size_t recv_buf_len = sizeof(recv_buf);
  1159    in6_pktinfo received_pktinfo;
  1160    ASSERT_NO_FATAL_FAILURE(RecvIPv6PktInfo(raw.get(),
  1161                                            reinterpret_cast<char*>(&recv_buf),
  1162                                            &recv_buf_len, &received_pktinfo));
  1163    EXPECT_EQ(recv_buf_len, sizeof(send_buf));
  1164    EXPECT_EQ(memcmp(send_buf, recv_buf, sizeof(send_buf)), 0);
  1165    EXPECT_EQ(received_pktinfo.ipi6_ifindex,
  1166              ASSERT_NO_ERRNO_AND_VALUE(GetLoopbackIndex()));
  1167    ASSERT_EQ(memcmp(&received_pktinfo.ipi6_addr, &in6addr_loopback,
  1168                     sizeof(in6addr_loopback)),
  1169              0);
  1170  }
  1171  
  1172  TEST(RawSocketTest, ReceiveTOS) {
  1173    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
  1174  
  1175    FileDescriptor raw =
  1176        ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP));
  1177  
  1178    const sockaddr_in kAddr = {
  1179        .sin_family = AF_INET,
  1180        .sin_addr = {.s_addr = htonl(INADDR_LOOPBACK)},
  1181    };
  1182    ASSERT_THAT(
  1183        bind(raw.get(), reinterpret_cast<const sockaddr*>(&kAddr), sizeof(kAddr)),
  1184        SyscallSucceeds());
  1185  
  1186    constexpr int kArbitraryTOS = 42;
  1187    ASSERT_THAT(setsockopt(raw.get(), IPPROTO_IP, IP_TOS, &kArbitraryTOS,
  1188                           sizeof(kArbitraryTOS)),
  1189                SyscallSucceeds());
  1190  
  1191    constexpr char kSendBuf[] = "malformed UDP";
  1192    ASSERT_THAT(sendto(raw.get(), kSendBuf, sizeof(kSendBuf), 0 /* flags */,
  1193                       reinterpret_cast<const sockaddr*>(&kAddr), sizeof(kAddr)),
  1194                SyscallSucceedsWithValue(sizeof(kSendBuf)));
  1195  
  1196    // Register to receive TOS.
  1197    constexpr int kOne = 1;
  1198    ASSERT_THAT(
  1199        setsockopt(raw.get(), IPPROTO_IP, IP_RECVTOS, &kOne, sizeof(kOne)),
  1200        SyscallSucceeds());
  1201  
  1202    struct {
  1203      iphdr ip;
  1204      char data[sizeof(kSendBuf)];
  1205  
  1206      // Extra space in the receive buffer should be unused.
  1207      char unused_space;
  1208    } ABSL_ATTRIBUTE_PACKED recv_buf;
  1209    uint8_t recv_tos;
  1210    size_t recv_buf_len = sizeof(recv_buf);
  1211    ASSERT_NO_FATAL_FAILURE(RecvTOS(raw.get(), reinterpret_cast<char*>(&recv_buf),
  1212                                    &recv_buf_len, &recv_tos));
  1213    ASSERT_EQ(recv_buf_len, sizeof(iphdr) + sizeof(kSendBuf));
  1214  
  1215    EXPECT_EQ(recv_buf.ip.version, static_cast<unsigned int>(IPVERSION));
  1216    // IHL holds the number of header bytes in 4 byte units.
  1217    EXPECT_EQ(recv_buf.ip.ihl, sizeof(iphdr) / 4);
  1218    EXPECT_EQ(ntohs(recv_buf.ip.tot_len), sizeof(iphdr) + sizeof(kSendBuf));
  1219    EXPECT_EQ(recv_buf.ip.protocol, IPPROTO_UDP);
  1220    EXPECT_EQ(ntohl(recv_buf.ip.saddr), INADDR_LOOPBACK);
  1221    EXPECT_EQ(ntohl(recv_buf.ip.daddr), INADDR_LOOPBACK);
  1222  
  1223    EXPECT_EQ(memcmp(kSendBuf, &recv_buf.data, sizeof(kSendBuf)), 0);
  1224  
  1225    if (const char* val = getenv("TOS_TCLASS_EXPECT_DEFAULT");
  1226        val != nullptr && strcmp(val, "1") == 0) {
  1227      // TODO(b/217448626): At least one Linux environment does not allow setting
  1228      // a custom TOS. In this case, we additionally accept the default.
  1229      EXPECT_THAT(recv_buf.ip.tos, AnyOf(kArbitraryTOS, 0u));
  1230      EXPECT_THAT(recv_tos, AnyOf(kArbitraryTOS, 0u));
  1231    } else {
  1232      EXPECT_EQ(recv_buf.ip.tos, static_cast<uint8_t>(kArbitraryTOS));
  1233      EXPECT_EQ(recv_tos, kArbitraryTOS);
  1234    }
  1235  }
  1236  
  1237  TEST(RawSocketTest, ReceiveTClass) {
  1238    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
  1239  
  1240    FileDescriptor raw =
  1241        ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_RAW, IPPROTO_UDP));
  1242  
  1243    const sockaddr_in6 kAddr = {
  1244        .sin6_family = AF_INET6,
  1245        .sin6_addr = in6addr_loopback,
  1246    };
  1247    ASSERT_THAT(
  1248        bind(raw.get(), reinterpret_cast<const sockaddr*>(&kAddr), sizeof(kAddr)),
  1249        SyscallSucceeds());
  1250  
  1251    constexpr int kArbitraryTClass = 42;
  1252    ASSERT_THAT(setsockopt(raw.get(), IPPROTO_IPV6, IPV6_TCLASS,
  1253                           &kArbitraryTClass, sizeof(kArbitraryTClass)),
  1254                SyscallSucceeds());
  1255  
  1256    constexpr char send_buf[] = "malformed UDP";
  1257    ASSERT_THAT(sendto(raw.get(), send_buf, sizeof(send_buf), 0 /* flags */,
  1258                       reinterpret_cast<const sockaddr*>(&kAddr), sizeof(kAddr)),
  1259                SyscallSucceedsWithValue(sizeof(send_buf)));
  1260  
  1261    // Register to receive TClass.
  1262    constexpr int kOne = 1;
  1263    ASSERT_THAT(
  1264        setsockopt(raw.get(), IPPROTO_IPV6, IPV6_RECVTCLASS, &kOne, sizeof(kOne)),
  1265        SyscallSucceeds());
  1266  
  1267    char recv_buf[sizeof(send_buf) + 1];
  1268    size_t recv_buf_len = sizeof(recv_buf);
  1269    int recv_tclass;
  1270    ASSERT_NO_FATAL_FAILURE(
  1271        RecvTClass(raw.get(), recv_buf, &recv_buf_len, &recv_tclass));
  1272    ASSERT_EQ(recv_buf_len, sizeof(send_buf));
  1273  
  1274    EXPECT_EQ(memcmp(send_buf, recv_buf, sizeof(send_buf)), 0);
  1275  
  1276    if (const char* val = getenv("TOS_TCLASS_EXPECT_DEFAULT");
  1277        val != nullptr && strcmp(val, "1") == 0) {
  1278      // TODO(b/217448626): At least one Linux environment does not allow setting
  1279      // a custom TCLASS. In this case, we additionally accept the default.
  1280      EXPECT_THAT(recv_tclass, AnyOf(kArbitraryTClass, 0));
  1281    } else {
  1282      EXPECT_EQ(recv_tclass, kArbitraryTClass);
  1283    }
  1284  }
  1285  
  1286  TEST(RawSocketTest, ReceiveTTL) {
  1287    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
  1288  
  1289    FileDescriptor raw =
  1290        ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP));
  1291  
  1292    const sockaddr_in kAddr = {
  1293        .sin_family = AF_INET,
  1294        .sin_addr = {.s_addr = htonl(INADDR_LOOPBACK)},
  1295    };
  1296    ASSERT_THAT(
  1297        bind(raw.get(), reinterpret_cast<const sockaddr*>(&kAddr), sizeof(kAddr)),
  1298        SyscallSucceeds());
  1299    ASSERT_THAT(connect(raw.get(), reinterpret_cast<const sockaddr*>(&kAddr),
  1300                        sizeof(kAddr)),
  1301                SyscallSucceeds());
  1302  
  1303    constexpr int kArbitraryTTL = 42;
  1304    ASSERT_THAT(setsockopt(raw.get(), IPPROTO_IP, IP_TTL, &kArbitraryTTL,
  1305                           sizeof(kArbitraryTTL)),
  1306                SyscallSucceeds());
  1307  
  1308    char send_buf[] = "malformed UDP";
  1309    auto test_recv_ttl = [&](int expected_ttl) {
  1310      // Register to receive TTL.
  1311      constexpr int kOne = 1;
  1312      ASSERT_THAT(
  1313          setsockopt(raw.get(), IPPROTO_IP, IP_RECVTTL, &kOne, sizeof(kOne)),
  1314          SyscallSucceeds());
  1315  
  1316      struct {
  1317        iphdr ip;
  1318        char data[sizeof(send_buf)];
  1319      } ABSL_ATTRIBUTE_PACKED recv_buf;
  1320  
  1321      int recv_ttl;
  1322      size_t recv_buf_len = sizeof(recv_buf);
  1323      ASSERT_NO_FATAL_FAILURE(RecvTTL(raw.get(),
  1324                                      reinterpret_cast<char*>(&recv_buf),
  1325                                      &recv_buf_len, &recv_ttl));
  1326      ASSERT_EQ(recv_buf_len, sizeof(iphdr) + sizeof(send_buf));
  1327  
  1328      EXPECT_EQ(recv_buf.ip.version, static_cast<unsigned int>(IPVERSION));
  1329      // IHL holds the number of header bytes in 4 byte units.
  1330      EXPECT_EQ(recv_buf.ip.ihl, sizeof(iphdr) / 4);
  1331      EXPECT_EQ(ntohs(recv_buf.ip.tot_len), sizeof(iphdr) + sizeof(send_buf));
  1332      EXPECT_EQ(recv_buf.ip.protocol, IPPROTO_UDP);
  1333      EXPECT_EQ(ntohl(recv_buf.ip.saddr), INADDR_LOOPBACK);
  1334      EXPECT_EQ(ntohl(recv_buf.ip.daddr), INADDR_LOOPBACK);
  1335      EXPECT_EQ(recv_buf.ip.ttl, static_cast<uint8_t>(expected_ttl));
  1336  
  1337      EXPECT_EQ(memcmp(send_buf, &recv_buf.data, sizeof(send_buf)), 0);
  1338  
  1339      EXPECT_EQ(recv_ttl, expected_ttl);
  1340    };
  1341  
  1342    ASSERT_THAT(send(raw.get(), send_buf, sizeof(send_buf), /*flags=*/0),
  1343                SyscallSucceedsWithValue(sizeof(send_buf)));
  1344    {
  1345      SCOPED_TRACE("receive ttl set by option");
  1346      ASSERT_NO_FATAL_FAILURE(test_recv_ttl(kArbitraryTTL));
  1347    }
  1348  
  1349    constexpr int kArbitrarySendmsgTTL = kArbitraryTTL + 1;
  1350    ASSERT_NO_FATAL_FAILURE(SendTTL(raw.get(), send_buf, size_t(sizeof(send_buf)),
  1351                                    kArbitrarySendmsgTTL));
  1352    {
  1353      SCOPED_TRACE("receive ttl set by cmsg");
  1354      ASSERT_NO_FATAL_FAILURE(test_recv_ttl(kArbitrarySendmsgTTL));
  1355    }
  1356  }
  1357  
  1358  TEST(RawSocketTest, ReceiveHopLimit) {
  1359    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
  1360  
  1361    FileDescriptor raw =
  1362        ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_RAW, IPPROTO_UDP));
  1363  
  1364    const sockaddr_in6 kAddr = {
  1365        .sin6_family = AF_INET6,
  1366        .sin6_addr = in6addr_loopback,
  1367    };
  1368    ASSERT_THAT(
  1369        bind(raw.get(), reinterpret_cast<const sockaddr*>(&kAddr), sizeof(kAddr)),
  1370        SyscallSucceeds());
  1371    ASSERT_THAT(connect(raw.get(), reinterpret_cast<const sockaddr*>(&kAddr),
  1372                        sizeof(kAddr)),
  1373                SyscallSucceeds());
  1374  
  1375    constexpr int kArbitraryHopLimit = 42;
  1376    ASSERT_THAT(setsockopt(raw.get(), IPPROTO_IPV6, IPV6_UNICAST_HOPS,
  1377                           &kArbitraryHopLimit, sizeof(kArbitraryHopLimit)),
  1378                SyscallSucceeds());
  1379  
  1380    // Register to receive HOPLIMIT.
  1381    constexpr int kOne = 1;
  1382    ASSERT_THAT(setsockopt(raw.get(), IPPROTO_IPV6, IPV6_RECVHOPLIMIT, &kOne,
  1383                           sizeof(kOne)),
  1384                SyscallSucceeds());
  1385  
  1386    char send_buf[] = "malformed UDP";
  1387    auto test_recv_hoplimit = [&](int expected_hoplimit) {
  1388      char recv_buf[sizeof(send_buf)];
  1389      size_t recv_buf_len = sizeof(recv_buf);
  1390      int recv_hoplimit;
  1391      ASSERT_NO_FATAL_FAILURE(
  1392          RecvHopLimit(raw.get(), recv_buf, &recv_buf_len, &recv_hoplimit));
  1393      ASSERT_EQ(recv_buf_len, sizeof(send_buf));
  1394  
  1395      EXPECT_EQ(memcmp(send_buf, recv_buf, sizeof(send_buf)), 0);
  1396      EXPECT_EQ(recv_hoplimit, expected_hoplimit);
  1397    };
  1398  
  1399    ASSERT_THAT(send(raw.get(), send_buf, sizeof(send_buf), /*flags=*/0),
  1400                SyscallSucceedsWithValue(sizeof(send_buf)));
  1401    {
  1402      SCOPED_TRACE("receive hoplimit set by option");
  1403      ASSERT_NO_FATAL_FAILURE(test_recv_hoplimit(kArbitraryHopLimit));
  1404    }
  1405  
  1406    constexpr int kArbitrarySendmsgHopLimit = kArbitraryHopLimit + 1;
  1407    ASSERT_NO_FATAL_FAILURE(SendHopLimit(raw.get(), send_buf,
  1408                                         size_t(sizeof(send_buf)),
  1409                                         kArbitrarySendmsgHopLimit));
  1410    {
  1411      SCOPED_TRACE("receive hoplimit set by cmsg");
  1412      ASSERT_NO_FATAL_FAILURE(test_recv_hoplimit(kArbitrarySendmsgHopLimit));
  1413    }
  1414  }
  1415  
  1416  TEST(RawSocketTest, SetIPv6ChecksumError_MultipleOf2) {
  1417    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
  1418  
  1419    FileDescriptor fd =
  1420        ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_RAW, IPPROTO_UDP));
  1421  
  1422    int intV = 3;
  1423    ASSERT_THAT(
  1424        setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &intV, sizeof(intV)),
  1425        SyscallFailsWithErrno(EINVAL));
  1426  
  1427    intV = 5;
  1428    ASSERT_THAT(
  1429        setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &intV, sizeof(intV)),
  1430        SyscallFailsWithErrno(EINVAL));
  1431  
  1432    intV = 2;
  1433    ASSERT_THAT(
  1434        setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &intV, sizeof(intV)),
  1435        SyscallSucceeds());
  1436  
  1437    intV = 4;
  1438    ASSERT_THAT(
  1439        setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &intV, sizeof(intV)),
  1440        SyscallSucceeds());
  1441  }
  1442  
  1443  TEST(RawSocketTest, SetIPv6ChecksumError_ReadShort) {
  1444    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
  1445  
  1446    FileDescriptor fd =
  1447        ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_RAW, IPPROTO_UDP));
  1448  
  1449    int intV = 2;
  1450    if (IsRunningOnGvisor()) {
  1451      // TODO(https://gvisor.dev/issue/6982): This is a deviation from Linux. We
  1452      // should determine if we want to match the behaviour or handle the error
  1453      // more gracefully.
  1454      ASSERT_THAT(
  1455          setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &intV, sizeof(intV) - 1),
  1456          SyscallFailsWithErrno(EINVAL));
  1457      return;
  1458    }
  1459  
  1460    intV = std::numeric_limits<int>::max();
  1461    if (intV % 2) {
  1462      intV--;
  1463    }
  1464  
  1465    if (const char* val = getenv("IPV6_CHECKSUM_SETSOCKOPT_SHORT_EXCEPTION");
  1466        val != nullptr && strcmp(val, "1") == 0) {
  1467      // TODO(https://issuetracker.google.com/issues/212585236): As of writing, it
  1468      // seems like at least one Linux environment considers optlen unlike a local
  1469      // Linux environment. In this case we call setsockopt with the full int so
  1470      // that the rest of the test passes. Once the root cause for this difference
  1471      // is found, we can update this check.
  1472      ASSERT_THAT(
  1473          setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &intV, sizeof(intV)),
  1474          SyscallSucceeds());
  1475    } else {
  1476      ASSERT_THAT(
  1477          setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &intV, sizeof(intV) - 1),
  1478          SyscallSucceeds());
  1479    }
  1480  
  1481    {
  1482      int got;
  1483      socklen_t got_len = sizeof(got);
  1484      ASSERT_THAT(getsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &got, &got_len),
  1485                  SyscallSucceeds());
  1486      ASSERT_EQ(got_len, sizeof(got));
  1487      // Even though we called setsockopt with a length smaller than an int, Linux
  1488      // seems to read the full int.
  1489      EXPECT_EQ(got, intV);
  1490    }
  1491  
  1492    // If we have pass a pointer that points to memory less than the size of an
  1493    // int, we get a bad address error.
  1494    std::unique_ptr<uint8_t> u8V;
  1495    // Linux seems to assume a full int but doesn't check the passed length.
  1496    //
  1497    // https://github.com/torvalds/linux/blob/a52a8e9eaf4a12dd58953fc622bb2bc08fd1d32c/net/ipv6/raw.c#L1023
  1498    // shows that Linux copies optVal to an int without first checking optLen.
  1499    ASSERT_THAT(
  1500        setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, u8V.get(), sizeof(*u8V)),
  1501        SyscallFailsWithErrno(EFAULT));
  1502  }
  1503  
  1504  TEST(RawSocketTest, IPv6Checksum_ValidateAndCalculate) {
  1505    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability()));
  1506  
  1507    FileDescriptor checksum_set =
  1508        ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_RAW, IPPROTO_UDP));
  1509  
  1510    FileDescriptor checksum_not_set =
  1511        ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_RAW, IPPROTO_UDP));
  1512  
  1513    const sockaddr_in6 addr = {
  1514        .sin6_family = AF_INET6,
  1515        .sin6_addr = IN6ADDR_LOOPBACK_INIT,
  1516    };
  1517  
  1518    auto bind_and_set_checksum = [&](const FileDescriptor& fd, int v) {
  1519      ASSERT_THAT(
  1520          bind(fd.get(), reinterpret_cast<const sockaddr*>(&addr), sizeof(addr)),
  1521          SyscallSucceeds());
  1522  
  1523      int got;
  1524      socklen_t got_len = sizeof(got);
  1525      ASSERT_THAT(getsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &got, &got_len),
  1526                  SyscallSucceeds());
  1527      ASSERT_EQ(got_len, sizeof(got));
  1528      EXPECT_EQ(got, -1);
  1529  
  1530      ASSERT_THAT(setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &v, sizeof(v)),
  1531                  SyscallSucceeds());
  1532      ASSERT_THAT(getsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &got, &got_len),
  1533                  SyscallSucceeds());
  1534      ASSERT_EQ(got_len, sizeof(got));
  1535      EXPECT_EQ(got, v);
  1536    };
  1537  
  1538    struct udp_packet {
  1539      udphdr udp;
  1540      uint32_t value;
  1541    } ABSL_ATTRIBUTE_PACKED;
  1542  
  1543    ASSERT_NO_FATAL_FAILURE(bind_and_set_checksum(
  1544        checksum_set, offsetof(udp_packet, udp) + offsetof(udphdr, uh_sum)));
  1545    ASSERT_NO_FATAL_FAILURE(bind_and_set_checksum(checksum_not_set, -1));
  1546  
  1547    auto send = [&](const FileDescriptor& fd, uint32_t v) {
  1548      const udp_packet packet = {
  1549          .value = v,
  1550      };
  1551  
  1552      ASSERT_THAT(sendto(fd.get(), &packet, sizeof(packet), /*flags=*/0,
  1553                         reinterpret_cast<const sockaddr*>(&addr), sizeof(addr)),
  1554                  SyscallSucceedsWithValue(sizeof(packet)));
  1555    };
  1556  
  1557    auto expect_receive = [&](const FileDescriptor& fd, uint32_t v,
  1558                              bool should_check_xsum) {
  1559      udp_packet packet;
  1560      sockaddr_in6 sender;
  1561      socklen_t sender_len = sizeof(sender);
  1562      ASSERT_THAT(
  1563          RetryEINTR(recvfrom)(fd.get(), &packet, sizeof(packet), /*flags=*/0,
  1564                               reinterpret_cast<sockaddr*>(&sender), &sender_len),
  1565          SyscallSucceedsWithValue(sizeof(packet)));
  1566      ASSERT_EQ(sender_len, sizeof(sender));
  1567      EXPECT_EQ(memcmp(&sender, &addr, sizeof(addr)), 0);
  1568      EXPECT_EQ(packet.value, v);
  1569      if (should_check_xsum) {
  1570        EXPECT_NE(packet.udp.uh_sum, 0);
  1571      } else {
  1572        EXPECT_EQ(packet.udp.uh_sum, 0);
  1573      }
  1574    };
  1575  
  1576    uint32_t counter = 1;
  1577    // Packets sent through checksum_not_set will not have a valid checksum set so
  1578    // checksum_set should not accept those packets.
  1579    ASSERT_NO_FATAL_FAILURE(send(checksum_not_set, counter));
  1580    ASSERT_NO_FATAL_FAILURE(expect_receive(checksum_not_set, counter, false));
  1581  
  1582    // Packets sent through checksum_set will have a valid checksum so both
  1583    // sockets should accept them.
  1584    ASSERT_NO_FATAL_FAILURE(send(checksum_set, ++counter));
  1585    ASSERT_NO_FATAL_FAILURE(expect_receive(checksum_set, counter, true));
  1586    ASSERT_NO_FATAL_FAILURE(expect_receive(checksum_not_set, counter, true));
  1587  }
  1588  
  1589  }  // namespace
  1590  
  1591  }  // namespace testing
  1592  }  // namespace gvisor