github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/test/syscalls/linux/raw_socket.cc (about)

     1  // Copyright 2019 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include <linux/capability.h>
    16  #include <linux/filter.h>
    17  #include <netinet/in.h>
    18  #include <netinet/ip.h>
    19  #include <netinet/ip6.h>
    20  #include <netinet/ip_icmp.h>
    21  #include <poll.h>
    22  #include <sys/socket.h>
    23  #include <sys/types.h>
    24  #include <unistd.h>
    25  
    26  #include <algorithm>
    27  
    28  #include "gtest/gtest.h"
    29  #include "test/syscalls/linux/socket_test_util.h"
    30  #include "test/syscalls/linux/unix_domain_socket_test_util.h"
    31  #include "test/util/capability_util.h"
    32  #include "test/util/file_descriptor.h"
    33  #include "test/util/test_util.h"
    34  
    35  // Note: in order to run these tests, /proc/sys/net/ipv4/ping_group_range will
    36  // need to be configured to let the superuser create ping sockets (see icmp(7)).
    37  
    38  namespace gvisor {
    39  namespace testing {
    40  
    41  namespace {
    42  
    43  // Fixture for tests parameterized by protocol.
    44  class RawSocketTest : public ::testing::TestWithParam<std::tuple<int, int>> {
    45   protected:
    46    // Creates a socket to be used in tests.
    47    void SetUp() override;
    48  
    49    // Closes the socket created by SetUp().
    50    void TearDown() override;
    51  
    52    // Sends buf via s_.
    53    void SendBuf(const char* buf, int buf_len);
    54  
    55    // Reads from s_ into recv_buf.
    56    void ReceiveBuf(char* recv_buf, size_t recv_buf_len);
    57  
    58    void ReceiveBufFrom(int sock, char* recv_buf, size_t recv_buf_len);
    59  
    60    int Protocol() { return std::get<0>(GetParam()); }
    61  
    62    int Family() { return std::get<1>(GetParam()); }
    63  
    64    socklen_t AddrLen() {
    65      if (Family() == AF_INET) {
    66        return sizeof(sockaddr_in);
    67      }
    68      return sizeof(sockaddr_in6);
    69    }
    70  
    71    int HdrLen() {
    72      if (Family() == AF_INET) {
    73        return sizeof(struct iphdr);
    74      }
    75      // IPv6 raw sockets don't include the header.
    76      return 0;
    77    }
    78  
    79    // The socket used for both reading and writing.
    80    int s_;
    81  
    82    // The loopback address.
    83    struct sockaddr_storage addr_;
    84  };
    85  
    86  void RawSocketTest::SetUp() {
    87    if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
    88      ASSERT_THAT(socket(Family(), SOCK_RAW, Protocol()),
    89                  SyscallFailsWithErrno(EPERM));
    90      GTEST_SKIP();
    91    }
    92  
    93    ASSERT_THAT(s_ = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds());
    94  
    95    addr_ = {};
    96  
    97    // We don't set ports because raw sockets don't have a notion of ports.
    98    if (Family() == AF_INET) {
    99      struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr_);
   100      sin->sin_family = AF_INET;
   101      sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
   102    } else {
   103      struct sockaddr_in6* sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr_);
   104      sin6->sin6_family = AF_INET6;
   105      sin6->sin6_addr = in6addr_loopback;
   106    }
   107  }
   108  
   109  void RawSocketTest::TearDown() {
   110    // TearDown will be run even if we skip the test.
   111    if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
   112      EXPECT_THAT(close(s_), SyscallSucceeds());
   113    }
   114  }
   115  
   116  // We should be able to create multiple raw sockets for the same protocol.
   117  // BasicRawSocket::Setup creates the first one, so we only have to create one
   118  // more here.
   119  TEST_P(RawSocketTest, MultipleCreation) {
   120    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   121  
   122    int s2;
   123    ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds());
   124  
   125    ASSERT_THAT(close(s2), SyscallSucceeds());
   126  }
   127  
   128  // Test that shutting down an unconnected socket fails.
   129  TEST_P(RawSocketTest, FailShutdownWithoutConnect) {
   130    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   131  
   132    ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN));
   133    ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN));
   134  }
   135  
   136  // Shutdown is a no-op for raw sockets (and datagram sockets in general).
   137  TEST_P(RawSocketTest, ShutdownWriteNoop) {
   138    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   139  
   140    ASSERT_THAT(
   141        connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   142        SyscallSucceeds());
   143    ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallSucceeds());
   144  
   145    // Arbitrary.
   146    constexpr char kBuf[] = "noop";
   147    ASSERT_THAT(RetryEINTR(write)(s_, kBuf, sizeof(kBuf)),
   148                SyscallSucceedsWithValue(sizeof(kBuf)));
   149  }
   150  
   151  // Shutdown is a no-op for raw sockets (and datagram sockets in general).
   152  TEST_P(RawSocketTest, ShutdownReadNoop) {
   153    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   154  
   155    ASSERT_THAT(
   156        connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   157        SyscallSucceeds());
   158    ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds());
   159  
   160    // Arbitrary.
   161    constexpr char kBuf[] = "gdg";
   162    ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
   163  
   164    std::vector<char> c(sizeof(kBuf) + HdrLen());
   165    ASSERT_THAT(read(s_, c.data(), c.size()), SyscallSucceedsWithValue(c.size()));
   166  }
   167  
   168  // Test that listen() fails.
   169  TEST_P(RawSocketTest, FailListen) {
   170    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   171  
   172    ASSERT_THAT(listen(s_, 1), SyscallFailsWithErrno(ENOTSUP));
   173  }
   174  
   175  // Test that accept() fails.
   176  TEST_P(RawSocketTest, FailAccept) {
   177    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   178  
   179    struct sockaddr saddr;
   180    socklen_t addrlen;
   181    ASSERT_THAT(accept(s_, &saddr, &addrlen), SyscallFailsWithErrno(ENOTSUP));
   182  }
   183  
   184  // Test that getpeername() returns nothing before connect().
   185  TEST_P(RawSocketTest, FailGetPeerNameBeforeConnect) {
   186    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   187  
   188    struct sockaddr saddr;
   189    socklen_t addrlen = sizeof(saddr);
   190    ASSERT_THAT(getpeername(s_, &saddr, &addrlen),
   191                SyscallFailsWithErrno(ENOTCONN));
   192  }
   193  
   194  // Test that getpeername() returns something after connect().
   195  TEST_P(RawSocketTest, GetPeerName) {
   196    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   197  
   198    ASSERT_THAT(
   199        connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   200        SyscallSucceeds());
   201    struct sockaddr saddr;
   202    socklen_t addrlen = sizeof(saddr);
   203    ASSERT_THAT(getpeername(s_, &saddr, &addrlen),
   204                SyscallFailsWithErrno(ENOTCONN));
   205    ASSERT_GT(addrlen, 0);
   206  }
   207  
   208  // Test that the socket is writable immediately.
   209  TEST_P(RawSocketTest, PollWritableImmediately) {
   210    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   211  
   212    struct pollfd pfd = {};
   213    pfd.fd = s_;
   214    pfd.events = POLLOUT;
   215    ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 10000), SyscallSucceedsWithValue(1));
   216  }
   217  
   218  // Test that the socket isn't readable before receiving anything.
   219  TEST_P(RawSocketTest, PollNotReadableInitially) {
   220    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   221  
   222    // Try to receive data with MSG_DONTWAIT, which returns immediately if there's
   223    // nothing to be read.
   224    char buf[117];
   225    ASSERT_THAT(RetryEINTR(recv)(s_, buf, sizeof(buf), MSG_DONTWAIT),
   226                SyscallFailsWithErrno(EAGAIN));
   227  }
   228  
   229  // Test that the socket becomes readable once something is written to it.
   230  TEST_P(RawSocketTest, PollTriggeredOnWrite) {
   231    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   232  
   233    // Write something so that there's data to be read.
   234    // Arbitrary.
   235    constexpr char kBuf[] = "JP5";
   236    ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
   237  
   238    struct pollfd pfd = {};
   239    pfd.fd = s_;
   240    pfd.events = POLLIN;
   241    ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 10000), SyscallSucceedsWithValue(1));
   242  }
   243  
   244  // Test that we can connect() to a valid IP (loopback).
   245  TEST_P(RawSocketTest, ConnectToLoopback) {
   246    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   247  
   248    ASSERT_THAT(
   249        connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   250        SyscallSucceeds());
   251  }
   252  
   253  // Test that calling send() without connect() fails.
   254  TEST_P(RawSocketTest, SendWithoutConnectFails) {
   255    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   256  
   257    // Arbitrary.
   258    constexpr char kBuf[] = "Endgame was good";
   259    ASSERT_THAT(send(s_, kBuf, sizeof(kBuf), 0),
   260                SyscallFailsWithErrno(EDESTADDRREQ));
   261  }
   262  
   263  // Wildcard Bind.
   264  TEST_P(RawSocketTest, BindToWildcard) {
   265    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   266    struct sockaddr_storage addr;
   267    addr = {};
   268  
   269    // We don't set ports because raw sockets don't have a notion of ports.
   270    if (Family() == AF_INET) {
   271      struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr);
   272      sin->sin_family = AF_INET;
   273      sin->sin_addr.s_addr = htonl(INADDR_ANY);
   274    } else {
   275      struct sockaddr_in6* sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr);
   276      sin6->sin6_family = AF_INET6;
   277      sin6->sin6_addr = in6addr_any;
   278    }
   279  
   280    ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   281                SyscallSucceeds());
   282  }
   283  
   284  // Bind to localhost.
   285  TEST_P(RawSocketTest, BindToLocalhost) {
   286    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   287  
   288    ASSERT_THAT(
   289        bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   290        SyscallSucceeds());
   291  }
   292  
   293  // Bind to a different address.
   294  TEST_P(RawSocketTest, BindToInvalid) {
   295    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   296  
   297    struct sockaddr_storage bind_addr = addr_;
   298    if (Family() == AF_INET) {
   299      struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&bind_addr);
   300      sin->sin_addr = {1};  // 1.0.0.0 - An address that we can't bind to.
   301    } else {
   302      struct sockaddr_in6* sin6 =
   303          reinterpret_cast<struct sockaddr_in6*>(&bind_addr);
   304      memset(&sin6->sin6_addr.s6_addr, 0, sizeof(sin6->sin6_addr.s6_addr));
   305      sin6->sin6_addr.s6_addr[0] = 1;  // 1: - An address that we can't bind to.
   306    }
   307    ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&bind_addr),
   308                     AddrLen()), SyscallFailsWithErrno(EADDRNOTAVAIL));
   309  }
   310  
   311  // Send and receive an packet.
   312  TEST_P(RawSocketTest, SendAndReceive) {
   313    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   314  
   315    // Arbitrary.
   316    constexpr char kBuf[] = "TB12";
   317    ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
   318  
   319    // Receive the packet and make sure it's identical.
   320    std::vector<char> recv_buf(sizeof(kBuf) + HdrLen());
   321    ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
   322    EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0);
   323  }
   324  
   325  // We should be able to create multiple raw sockets for the same protocol and
   326  // receive the same packet on both.
   327  TEST_P(RawSocketTest, MultipleSocketReceive) {
   328    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   329  
   330    int s2;
   331    ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds());
   332  
   333    // Arbitrary.
   334    constexpr char kBuf[] = "TB10";
   335    ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
   336  
   337    // Receive it on socket 1.
   338    std::vector<char> recv_buf1(sizeof(kBuf) + HdrLen());
   339    ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf1.data(), recv_buf1.size()));
   340  
   341    // Receive it on socket 2.
   342    std::vector<char> recv_buf2(sizeof(kBuf) + HdrLen());
   343    ASSERT_NO_FATAL_FAILURE(ReceiveBufFrom(s2, recv_buf2.data(),
   344                                           recv_buf2.size()));
   345  
   346    EXPECT_EQ(memcmp(recv_buf1.data() + HdrLen(),
   347                     recv_buf2.data() + HdrLen(), sizeof(kBuf)),
   348              0);
   349  
   350    ASSERT_THAT(close(s2), SyscallSucceeds());
   351  }
   352  
   353  // Test that connect sends packets to the right place.
   354  TEST_P(RawSocketTest, SendAndReceiveViaConnect) {
   355    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   356  
   357    ASSERT_THAT(
   358        connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   359        SyscallSucceeds());
   360  
   361    // Arbitrary.
   362    constexpr char kBuf[] = "JH4";
   363    ASSERT_THAT(send(s_, kBuf, sizeof(kBuf), 0),
   364                SyscallSucceedsWithValue(sizeof(kBuf)));
   365  
   366    // Receive the packet and make sure it's identical.
   367    std::vector<char> recv_buf(sizeof(kBuf) + HdrLen());
   368    ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
   369    EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0);
   370  }
   371  
   372  // Bind to localhost, then send and receive packets.
   373  TEST_P(RawSocketTest, BindSendAndReceive) {
   374    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   375  
   376    ASSERT_THAT(
   377        bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   378        SyscallSucceeds());
   379  
   380    // Arbitrary.
   381    constexpr char kBuf[] = "DR16";
   382    ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
   383  
   384    // Receive the packet and make sure it's identical.
   385    std::vector<char> recv_buf(sizeof(kBuf) + HdrLen());
   386    ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
   387    EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0);
   388  }
   389  
   390  // Bind and connect to localhost and send/receive packets.
   391  TEST_P(RawSocketTest, BindConnectSendAndReceive) {
   392    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   393  
   394    ASSERT_THAT(
   395        bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   396        SyscallSucceeds());
   397    ASSERT_THAT(
   398        connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   399        SyscallSucceeds());
   400  
   401    // Arbitrary.
   402    constexpr char kBuf[] = "DG88";
   403    ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf)));
   404  
   405    // Receive the packet and make sure it's identical.
   406    std::vector<char> recv_buf(sizeof(kBuf) + HdrLen());
   407    ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
   408    EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0);
   409  }
   410  
   411  // Check that setting SO_RCVBUF below min is clamped to the minimum
   412  // receive buffer size.
   413  TEST_P(RawSocketTest, SetSocketRecvBufBelowMin) {
   414    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   415  
   416    // Discover minimum receive buf size by trying to set it to zero.
   417    // See:
   418    // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820
   419    constexpr int kRcvBufSz = 0;
   420    ASSERT_THAT(
   421        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
   422        SyscallSucceeds());
   423  
   424    int min = 0;
   425    socklen_t min_len = sizeof(min);
   426    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
   427                SyscallSucceeds());
   428  
   429    // Linux doubles the value so let's use a value that when doubled will still
   430    // be smaller than min.
   431    int below_min = min / 2 - 1;
   432    ASSERT_THAT(
   433        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &below_min, sizeof(below_min)),
   434        SyscallSucceeds());
   435  
   436    int val = 0;
   437    socklen_t val_len = sizeof(val);
   438    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
   439                SyscallSucceeds());
   440  
   441    ASSERT_EQ(min, val);
   442  }
   443  
   444  // Check that setting SO_RCVBUF above max is clamped to the maximum
   445  // receive buffer size.
   446  TEST_P(RawSocketTest, SetSocketRecvBufAboveMax) {
   447    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   448  
   449    // Discover max buf size by trying to set the largest possible buffer size.
   450    constexpr int kRcvBufSz = 0xffffffff;
   451    ASSERT_THAT(
   452        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
   453        SyscallSucceeds());
   454  
   455    int max = 0;
   456    socklen_t max_len = sizeof(max);
   457    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len),
   458                SyscallSucceeds());
   459  
   460    int above_max = max + 1;
   461    ASSERT_THAT(
   462        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &above_max, sizeof(above_max)),
   463        SyscallSucceeds());
   464  
   465    int val = 0;
   466    socklen_t val_len = sizeof(val);
   467    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
   468                SyscallSucceeds());
   469    ASSERT_EQ(max, val);
   470  }
   471  
   472  // Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored.
   473  TEST_P(RawSocketTest, SetSocketRecvBuf) {
   474    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   475  
   476    int max = 0;
   477    int min = 0;
   478    {
   479      // Discover max buf size by trying to set a really large buffer size.
   480      constexpr int kRcvBufSz = 0xffffffff;
   481      ASSERT_THAT(
   482          setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
   483          SyscallSucceeds());
   484  
   485      max = 0;
   486      socklen_t max_len = sizeof(max);
   487      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len),
   488                  SyscallSucceeds());
   489    }
   490  
   491    {
   492      // Discover minimum buffer size by trying to set a zero size receive buffer
   493      // size.
   494      // See:
   495      // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820
   496      constexpr int kRcvBufSz = 0;
   497      ASSERT_THAT(
   498          setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
   499          SyscallSucceeds());
   500  
   501      socklen_t min_len = sizeof(min);
   502      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
   503                  SyscallSucceeds());
   504    }
   505  
   506    int quarter_sz = min + (max - min) / 4;
   507    ASSERT_THAT(
   508        setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &quarter_sz, sizeof(quarter_sz)),
   509        SyscallSucceeds());
   510  
   511    int val = 0;
   512    socklen_t val_len = sizeof(val);
   513    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len),
   514                SyscallSucceeds());
   515  
   516    // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF.
   517    quarter_sz *= 2;
   518    ASSERT_EQ(quarter_sz, val);
   519  }
   520  
   521  // Check that setting SO_SNDBUF below min is clamped to the minimum
   522  // receive buffer size.
   523  TEST_P(RawSocketTest, SetSocketSendBufBelowMin) {
   524    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   525  
   526    // Discover minimum buffer size by trying to set it to zero.
   527    constexpr int kSndBufSz = 0;
   528    ASSERT_THAT(
   529        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
   530        SyscallSucceeds());
   531  
   532    int min = 0;
   533    socklen_t min_len = sizeof(min);
   534    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len),
   535                SyscallSucceeds());
   536  
   537    // Linux doubles the value so let's use a value that when doubled will still
   538    // be smaller than min.
   539    int below_min = min / 2 - 1;
   540    ASSERT_THAT(
   541        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &below_min, sizeof(below_min)),
   542        SyscallSucceeds());
   543  
   544    int val = 0;
   545    socklen_t val_len = sizeof(val);
   546    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
   547                SyscallSucceeds());
   548  
   549    ASSERT_EQ(min, val);
   550  }
   551  
   552  // Check that setting SO_SNDBUF above max is clamped to the maximum
   553  // send buffer size.
   554  TEST_P(RawSocketTest, SetSocketSendBufAboveMax) {
   555    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   556  
   557    // Discover maximum buffer size by trying to set it to a large value.
   558    constexpr int kSndBufSz = 0xffffffff;
   559    ASSERT_THAT(
   560        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
   561        SyscallSucceeds());
   562  
   563    int max = 0;
   564    socklen_t max_len = sizeof(max);
   565    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len),
   566                SyscallSucceeds());
   567  
   568    int above_max = max + 1;
   569    ASSERT_THAT(
   570        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &above_max, sizeof(above_max)),
   571        SyscallSucceeds());
   572  
   573    int val = 0;
   574    socklen_t val_len = sizeof(val);
   575    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
   576                SyscallSucceeds());
   577    ASSERT_EQ(max, val);
   578  }
   579  
   580  // Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored.
   581  TEST_P(RawSocketTest, SetSocketSendBuf) {
   582    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   583  
   584    int max = 0;
   585    int min = 0;
   586    {
   587      // Discover maximum buffer size by trying to set it to a large value.
   588      constexpr int kSndBufSz = 0xffffffff;
   589      ASSERT_THAT(
   590          setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
   591          SyscallSucceeds());
   592  
   593      max = 0;
   594      socklen_t max_len = sizeof(max);
   595      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len),
   596                  SyscallSucceeds());
   597    }
   598  
   599    {
   600      // Discover minimum buffer size by trying to set it to zero.
   601      constexpr int kSndBufSz = 0;
   602      ASSERT_THAT(
   603          setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)),
   604          SyscallSucceeds());
   605  
   606      socklen_t min_len = sizeof(min);
   607      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len),
   608                  SyscallSucceeds());
   609    }
   610  
   611    int quarter_sz = min + (max - min) / 4;
   612    ASSERT_THAT(
   613        setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &quarter_sz, sizeof(quarter_sz)),
   614        SyscallSucceeds());
   615  
   616    int val = 0;
   617    socklen_t val_len = sizeof(val);
   618    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len),
   619                SyscallSucceeds());
   620  
   621    quarter_sz *= 2;
   622    ASSERT_EQ(quarter_sz, val);
   623  }
   624  
   625  // Test that receive buffer limits are not enforced when the recv buffer is
   626  // empty.
   627  TEST_P(RawSocketTest, RecvBufLimitsEmptyRecvBuffer) {
   628    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   629  
   630    ASSERT_THAT(
   631        bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   632        SyscallSucceeds());
   633    ASSERT_THAT(
   634        connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   635        SyscallSucceeds());
   636  
   637    int min = 0;
   638    {
   639      // Discover minimum buffer size by trying to set it to zero.
   640      constexpr int kRcvBufSz = 0;
   641      ASSERT_THAT(
   642          setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
   643          SyscallSucceeds());
   644  
   645      socklen_t min_len = sizeof(min);
   646      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
   647                  SyscallSucceeds());
   648    }
   649  
   650    {
   651      // Send data of size min and verify that it's received.
   652      std::vector<char> buf(min);
   653      RandomizeBuffer(buf.data(), buf.size());
   654      ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
   655  
   656      // Receive the packet and make sure it's identical.
   657      std::vector<char> recv_buf(buf.size() + HdrLen());
   658      ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
   659      EXPECT_EQ(
   660          memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()),
   661          0);
   662    }
   663  
   664    {
   665      // Send data of size min + 1 and verify that its received. Both linux and
   666      // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer
   667      // is currently empty.
   668      std::vector<char> buf(min + 1);
   669      RandomizeBuffer(buf.data(), buf.size());
   670      ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
   671      // Receive the packet and make sure it's identical.
   672      std::vector<char> recv_buf(buf.size() + HdrLen());
   673      ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
   674      EXPECT_EQ(
   675          memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()),
   676          0);
   677    }
   678  }
   679  
   680  TEST_P(RawSocketTest, RecvBufLimits) {
   681    // TCP stack generates RSTs for unknown endpoints and it complicates the test
   682    // as we have to deal with the RST packets as well. For testing the raw socket
   683    // endpoints buffer limit enforcement we can just test for UDP.
   684    //
   685    // We don't use SKIP_IF here because root_test_runner explicitly fails if a
   686    // test is skipped.
   687    if (Protocol() == IPPROTO_TCP) {
   688      return;
   689    }
   690    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   691  
   692    ASSERT_THAT(
   693        bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   694        SyscallSucceeds());
   695    ASSERT_THAT(
   696        connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()),
   697        SyscallSucceeds());
   698  
   699    int min = 0;
   700    {
   701      // Discover minimum buffer size by trying to set it to zero.
   702      constexpr int kRcvBufSz = 0;
   703      ASSERT_THAT(
   704          setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)),
   705          SyscallSucceeds());
   706  
   707      socklen_t min_len = sizeof(min);
   708      ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len),
   709                  SyscallSucceeds());
   710    }
   711  
   712    // Now set the limit to min * 2.
   713    int new_rcv_buf_sz = min * 2;
   714    ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz,
   715                           sizeof(new_rcv_buf_sz)),
   716                SyscallSucceeds());
   717    int rcv_buf_sz = 0;
   718    {
   719      socklen_t rcv_buf_len = sizeof(rcv_buf_sz);
   720      ASSERT_THAT(
   721          getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, &rcv_buf_len),
   722          SyscallSucceeds());
   723    }
   724  
   725    // Set a receive timeout so that we don't block forever on reads if the test
   726    // fails.
   727    struct timeval tv {
   728      .tv_sec = 1, .tv_usec = 0,
   729    };
   730    ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
   731                SyscallSucceeds());
   732  
   733    {
   734      std::vector<char> buf(min);
   735      RandomizeBuffer(buf.data(), buf.size());
   736  
   737      ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
   738      ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
   739      ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
   740      ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
   741      int sent = 4;
   742      if (IsRunningOnGvisor()) {
   743        // Linux seems to drop the 4th packet even though technically it should
   744        // fit in the receive buffer.
   745        ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size()));
   746        sent++;
   747      }
   748  
   749      // Verify that the expected number of packets are available to be read.
   750      for (int i = 0; i < sent - 1; i++) {
   751        // Receive the packet and make sure it's identical.
   752        std::vector<char> recv_buf(buf.size() + HdrLen());
   753        ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size()));
   754        EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(),
   755                         buf.size()),
   756                  0);
   757      }
   758  
   759      // Assert that the last packet is dropped because the receive buffer should
   760      // be full after the first four packets.
   761      std::vector<char> recv_buf(buf.size() + HdrLen());
   762      struct iovec iov = {};
   763      iov.iov_base = static_cast<void*>(const_cast<char*>(recv_buf.data()));
   764      iov.iov_len = buf.size();
   765      struct msghdr msg = {};
   766      msg.msg_iov = &iov;
   767      msg.msg_iovlen = 1;
   768      msg.msg_control = NULL;
   769      msg.msg_controllen = 0;
   770      msg.msg_flags = 0;
   771      ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, MSG_DONTWAIT),
   772                  SyscallFailsWithErrno(EAGAIN));
   773    }
   774  }
   775  
   776  void RawSocketTest::SendBuf(const char* buf, int buf_len) {
   777    // It's safe to use const_cast here because sendmsg won't modify the iovec or
   778    // address.
   779    struct iovec iov = {};
   780    iov.iov_base = static_cast<void*>(const_cast<char*>(buf));
   781    iov.iov_len = static_cast<size_t>(buf_len);
   782    struct msghdr msg = {};
   783    msg.msg_name = static_cast<void*>(&addr_);
   784    msg.msg_namelen = AddrLen();
   785    msg.msg_iov = &iov;
   786    msg.msg_iovlen = 1;
   787    msg.msg_control = NULL;
   788    msg.msg_controllen = 0;
   789    msg.msg_flags = 0;
   790    ASSERT_THAT(sendmsg(s_, &msg, 0), SyscallSucceedsWithValue(buf_len));
   791  }
   792  
   793  void RawSocketTest::ReceiveBuf(char* recv_buf, size_t recv_buf_len) {
   794    ASSERT_NO_FATAL_FAILURE(ReceiveBufFrom(s_, recv_buf, recv_buf_len));
   795  }
   796  
   797  void RawSocketTest::ReceiveBufFrom(int sock, char* recv_buf,
   798                                     size_t recv_buf_len) {
   799    ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sock, recv_buf, recv_buf_len));
   800  }
   801  
   802  TEST_P(RawSocketTest, SetSocketDetachFilterNoInstalledFilter) {
   803    // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER.
   804    if (IsRunningOnGvisor()) {
   805      constexpr int val = 0;
   806      ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)),
   807                  SyscallSucceeds());
   808      return;
   809    }
   810  
   811    constexpr int val = 0;
   812    ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)),
   813                SyscallFailsWithErrno(ENOENT));
   814  }
   815  
   816  TEST_P(RawSocketTest, GetSocketDetachFilter) {
   817    int val = 0;
   818    socklen_t val_len = sizeof(val);
   819    ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, &val_len),
   820                SyscallFailsWithErrno(ENOPROTOOPT));
   821  }
   822  
   823  // AF_INET6+SOCK_RAW+IPPROTO_RAW sockets can be created, but not written to.
   824  TEST(RawSocketTest, IPv6ProtoRaw) {
   825    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   826  
   827    int sock;
   828    ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW),
   829                SyscallSucceeds());
   830  
   831    // Verify that writing yields EINVAL.
   832    char buf[] = "This is such a weird little edge case";
   833    struct sockaddr_in6 sin6 = {};
   834    sin6.sin6_family = AF_INET6;
   835    sin6.sin6_addr = in6addr_loopback;
   836    ASSERT_THAT(sendto(sock, buf, sizeof(buf), 0 /* flags */,
   837                       reinterpret_cast<struct sockaddr*>(&sin6), sizeof(sin6)),
   838                SyscallFailsWithErrno(EINVAL));
   839  }
   840  
   841  TEST(RawSocketTest, IPv6SendMsg) {
   842    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   843  
   844    int sock;
   845    ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_TCP),
   846                SyscallSucceeds());
   847  
   848    char kBuf[] = "hello";
   849    struct iovec iov = {};
   850    iov.iov_base = static_cast<void*>(const_cast<char*>(kBuf));
   851    iov.iov_len = static_cast<size_t>(sizeof(kBuf));
   852  
   853    struct sockaddr_storage addr = {};
   854    struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr);
   855    sin->sin_family = AF_INET;
   856    sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
   857  
   858    struct msghdr msg = {};
   859    msg.msg_name = static_cast<void*>(&addr);
   860    msg.msg_namelen = sizeof(sockaddr_in);
   861    msg.msg_iov = &iov;
   862    msg.msg_iovlen = 1;
   863    msg.msg_control = NULL;
   864    msg.msg_controllen = 0;
   865    msg.msg_flags = 0;
   866    ASSERT_THAT(sendmsg(sock, &msg, 0), SyscallFailsWithErrno(EINVAL));
   867  }
   868  
   869  TEST_P(RawSocketTest, ConnectOnIPv6Socket) {
   870    SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW)));
   871  
   872    int sock;
   873    ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_TCP),
   874                SyscallSucceeds());
   875  
   876    struct sockaddr_storage addr = {};
   877    struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr);
   878    sin->sin_family = AF_INET;
   879    sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
   880  
   881    ASSERT_THAT(connect(sock, reinterpret_cast<struct sockaddr*>(&addr),
   882                        sizeof(sockaddr_in6)),
   883                SyscallFailsWithErrno(EAFNOSUPPORT));
   884  }
   885  
   886  INSTANTIATE_TEST_SUITE_P(
   887      AllInetTests, RawSocketTest,
   888      ::testing::Combine(::testing::Values(IPPROTO_TCP, IPPROTO_UDP),
   889                         ::testing::Values(AF_INET, AF_INET6)));
   890  
   891  }  // namespace
   892  
   893  }  // namespace testing
   894  }  // namespace gvisor