github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/test/syscalls/linux/tcp_socket.cc (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include <fcntl.h>
    16  #ifdef __linux__
    17  #include <linux/filter.h>
    18  #endif  // __linux__
    19  #include <netinet/in.h>
    20  #include <netinet/tcp.h>
    21  #include <poll.h>
    22  #include <sys/ioctl.h>
    23  #include <sys/socket.h>
    24  #include <unistd.h>
    25  
    26  #include <limits>
    27  #include <vector>
    28  
    29  #include "gtest/gtest.h"
    30  #include "absl/time/clock.h"
    31  #include "absl/time/time.h"
    32  #include "test/syscalls/linux/socket_test_util.h"
    33  #include "test/util/file_descriptor.h"
    34  #include "test/util/posix_error.h"
    35  #include "test/util/test_util.h"
    36  #include "test/util/thread_util.h"
    37  
    38  namespace gvisor {
    39  namespace testing {
    40  
    41  namespace {
    42  
    43  PosixErrorOr<sockaddr_storage> InetLoopbackAddr(int family) {
    44    struct sockaddr_storage addr;
    45    memset(&addr, 0, sizeof(addr));
    46    addr.ss_family = family;
    47    switch (family) {
    48      case AF_INET:
    49        reinterpret_cast<struct sockaddr_in*>(&addr)->sin_addr.s_addr =
    50            htonl(INADDR_LOOPBACK);
    51        break;
    52      case AF_INET6:
    53        reinterpret_cast<struct sockaddr_in6*>(&addr)->sin6_addr =
    54            in6addr_loopback;
    55        break;
    56      default:
    57        return PosixError(EINVAL,
    58                          absl::StrCat("unknown socket family: ", family));
    59    }
    60    return addr;
    61  }
    62  
    63  static void FillSocketBuffers(int sender, int receiver) {
    64    // Set the FD to O_NONBLOCK.
    65    int opts;
    66    int orig_opts;
    67    ASSERT_THAT(opts = fcntl(sender, F_GETFL), SyscallSucceeds());
    68    orig_opts = opts;
    69    opts |= O_NONBLOCK;
    70    ASSERT_THAT(fcntl(sender, F_SETFL, opts), SyscallSucceeds());
    71  
    72    // Set TCP_NODELAY, which will cause linux to fill the receive buffer from the
    73    // send buffer as quickly as possibly. This way we can fill up both buffers
    74    // faster.
    75    constexpr int tcp_nodelay_flag = 1;
    76    ASSERT_THAT(setsockopt(sender, IPPROTO_TCP, TCP_NODELAY, &tcp_nodelay_flag,
    77                           sizeof(tcp_nodelay_flag)),
    78                SyscallSucceeds());
    79  
    80    // Set a 256KB send/receive buffer.
    81    int buf_sz = 1 << 18;
    82    EXPECT_THAT(
    83        setsockopt(receiver, SOL_SOCKET, SO_RCVBUF, &buf_sz, sizeof(buf_sz)),
    84        SyscallSucceedsWithValue(0));
    85    EXPECT_THAT(
    86        setsockopt(sender, SOL_SOCKET, SO_SNDBUF, &buf_sz, sizeof(buf_sz)),
    87        SyscallSucceedsWithValue(0));
    88  
    89    // Create a large buffer that will be used for sending.
    90    std::vector<char> buf(1 << 16);
    91  
    92    // Write until we receive an error.
    93    while (RetryEINTR(send)(sender, buf.data(), buf.size(), 0) != -1) {
    94      // Sleep to give linux a chance to move data from the send buffer to the
    95      // receive buffer.
    96      usleep(10000);  // 10ms.
    97    }
    98    // The last error should have been EWOULDBLOCK.
    99    ASSERT_EQ(errno, EWOULDBLOCK);
   100  
   101    // Restore the fcntl opts
   102    ASSERT_THAT(fcntl(sender, F_SETFL, orig_opts), SyscallSucceeds());
   103  }
   104  
   105  // Fixture for tests parameterized by the address family to use (AF_INET and
   106  // AF_INET6) when creating sockets.
   107  class TcpSocketTest : public ::testing::TestWithParam<int> {
   108   protected:
   109    // Creates three sockets that will be used by test cases -- a listener, one
   110    // that connects, and the accepted one.
   111    void SetUp() override;
   112  
   113    // Closes the sockets created by SetUp().
   114    void TearDown() override;
   115  
   116    // Listening socket.
   117    int listener_ = -1;
   118  
   119    // Socket connected via connect().
   120    int first_fd = -1;
   121  
   122    // Socket connected via accept().
   123    int second_fd = -1;
   124  
   125    // Initial size of the send buffer.
   126    int sendbuf_size_ = -1;
   127  };
   128  
   129  void TcpSocketTest::SetUp() {
   130    ASSERT_THAT(listener_ = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
   131                SyscallSucceeds());
   132  
   133    ASSERT_THAT(first_fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
   134                SyscallSucceeds());
   135  
   136    // Initialize address to the loopback one.
   137    sockaddr_storage addr =
   138        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
   139    socklen_t addrlen = sizeof(addr);
   140  
   141    // Bind to some port then start listening.
   142    ASSERT_THAT(bind(listener_, AsSockAddr(&addr), addrlen), SyscallSucceeds());
   143  
   144    ASSERT_THAT(listen(listener_, SOMAXCONN), SyscallSucceeds());
   145  
   146    // Get the address we're listening on, then connect to it. We need to do this
   147    // because we're allowing the stack to pick a port for us.
   148    ASSERT_THAT(getsockname(listener_, AsSockAddr(&addr), &addrlen),
   149                SyscallSucceeds());
   150  
   151    ASSERT_THAT(RetryEINTR(connect)(first_fd, AsSockAddr(&addr), addrlen),
   152                SyscallSucceeds());
   153  
   154    // Get the initial send buffer size.
   155    socklen_t optlen = sizeof(sendbuf_size_);
   156    ASSERT_THAT(
   157        getsockopt(first_fd, SOL_SOCKET, SO_SNDBUF, &sendbuf_size_, &optlen),
   158        SyscallSucceeds());
   159  
   160    // Accept the connection.
   161    ASSERT_THAT(second_fd = RetryEINTR(accept)(listener_, nullptr, nullptr),
   162                SyscallSucceeds());
   163  }
   164  
   165  void TcpSocketTest::TearDown() {
   166    EXPECT_THAT(close(listener_), SyscallSucceeds());
   167    if (first_fd >= 0) {
   168      EXPECT_THAT(close(first_fd), SyscallSucceeds());
   169    }
   170    if (second_fd >= 0) {
   171      EXPECT_THAT(close(second_fd), SyscallSucceeds());
   172    }
   173  }
   174  
   175  TEST_P(TcpSocketTest, ConnectOnEstablishedConnection) {
   176    sockaddr_storage addr =
   177        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
   178    socklen_t addrlen = sizeof(addr);
   179  
   180    ASSERT_THAT(connect(first_fd, reinterpret_cast<const struct sockaddr*>(&addr),
   181                        addrlen),
   182                SyscallFailsWithErrno(EISCONN));
   183    ASSERT_THAT(connect(second_fd,
   184                        reinterpret_cast<const struct sockaddr*>(&addr), addrlen),
   185                SyscallFailsWithErrno(EISCONN));
   186  }
   187  
   188  TEST_P(TcpSocketTest, ShutdownWriteInTimeWait) {
   189    EXPECT_THAT(shutdown(second_fd, SHUT_WR), SyscallSucceeds());
   190    EXPECT_THAT(shutdown(first_fd, SHUT_RDWR), SyscallSucceeds());
   191    absl::SleepFor(absl::Seconds(1));  // Wait to enter TIME_WAIT.
   192    EXPECT_THAT(shutdown(second_fd, SHUT_WR), SyscallFailsWithErrno(ENOTCONN));
   193  }
   194  
   195  TEST_P(TcpSocketTest, ShutdownWriteInFinWait1) {
   196    EXPECT_THAT(shutdown(second_fd, SHUT_WR), SyscallSucceeds());
   197    EXPECT_THAT(shutdown(second_fd, SHUT_WR), SyscallSucceeds());
   198    absl::SleepFor(absl::Seconds(1));  // Wait to enter FIN-WAIT2.
   199    EXPECT_THAT(shutdown(second_fd, SHUT_WR), SyscallSucceeds());
   200  }
   201  
   202  TEST_P(TcpSocketTest, DataCoalesced) {
   203    char buf[10];
   204  
   205    // Write in two steps.
   206    ASSERT_THAT(RetryEINTR(write)(first_fd, buf, sizeof(buf) / 2),
   207                SyscallSucceedsWithValue(sizeof(buf) / 2));
   208    ASSERT_THAT(RetryEINTR(write)(first_fd, buf, sizeof(buf) / 2),
   209                SyscallSucceedsWithValue(sizeof(buf) / 2));
   210  
   211    // Allow stack to process both packets.
   212    absl::SleepFor(absl::Seconds(1));
   213  
   214    // Read in one shot.
   215    EXPECT_THAT(RetryEINTR(recv)(second_fd, buf, sizeof(buf), 0),
   216                SyscallSucceedsWithValue(sizeof(buf)));
   217  }
   218  
   219  TEST_P(TcpSocketTest, SenderAddressIgnored) {
   220    char buf[3];
   221    ASSERT_THAT(RetryEINTR(write)(first_fd, buf, sizeof(buf)),
   222                SyscallSucceedsWithValue(sizeof(buf)));
   223  
   224    struct sockaddr_storage addr;
   225    socklen_t addrlen = sizeof(addr);
   226    memset(&addr, 0, sizeof(addr));
   227  
   228    ASSERT_THAT(RetryEINTR(recvfrom)(second_fd, buf, sizeof(buf), 0,
   229                                     AsSockAddr(&addr), &addrlen),
   230                SyscallSucceedsWithValue(3));
   231  
   232    // Check that addr remains zeroed-out.
   233    const char* ptr = reinterpret_cast<char*>(&addr);
   234    for (size_t i = 0; i < sizeof(addr); i++) {
   235      EXPECT_EQ(ptr[i], 0);
   236    }
   237  }
   238  
   239  TEST_P(TcpSocketTest, SenderAddressIgnoredOnPeek) {
   240    char buf[3];
   241    ASSERT_THAT(RetryEINTR(write)(first_fd, buf, sizeof(buf)),
   242                SyscallSucceedsWithValue(sizeof(buf)));
   243  
   244    struct sockaddr_storage addr;
   245    socklen_t addrlen = sizeof(addr);
   246    memset(&addr, 0, sizeof(addr));
   247  
   248    ASSERT_THAT(RetryEINTR(recvfrom)(second_fd, buf, sizeof(buf), MSG_PEEK,
   249                                     AsSockAddr(&addr), &addrlen),
   250                SyscallSucceedsWithValue(3));
   251  
   252    // Check that addr remains zeroed-out.
   253    const char* ptr = reinterpret_cast<char*>(&addr);
   254    for (size_t i = 0; i < sizeof(addr); i++) {
   255      EXPECT_EQ(ptr[i], 0);
   256    }
   257  }
   258  
   259  TEST_P(TcpSocketTest, SendtoAddressIgnored) {
   260    struct sockaddr_storage addr;
   261    memset(&addr, 0, sizeof(addr));
   262    addr.ss_family = GetParam();  // FIXME(b/63803955)
   263  
   264    char data = '\0';
   265    EXPECT_THAT(RetryEINTR(sendto)(first_fd, &data, sizeof(data), 0,
   266                                   AsSockAddr(&addr), sizeof(addr)),
   267                SyscallSucceedsWithValue(1));
   268  }
   269  
   270  TEST_P(TcpSocketTest, WritevZeroIovec) {
   271    // 2 bytes just to be safe and have vecs[1] not point to something random
   272    // (even though length is 0).
   273    char buf[2];
   274    char recv_buf[1];
   275  
   276    // Construct a vec where the final vector is of length 0.
   277    iovec vecs[2] = {};
   278    vecs[0].iov_base = buf;
   279    vecs[0].iov_len = 1;
   280    vecs[1].iov_base = buf + 1;
   281    vecs[1].iov_len = 0;
   282  
   283    EXPECT_THAT(RetryEINTR(writev)(first_fd, vecs, 2),
   284                SyscallSucceedsWithValue(1));
   285  
   286    EXPECT_THAT(RetryEINTR(recv)(second_fd, recv_buf, 1, 0),
   287                SyscallSucceedsWithValue(1));
   288    EXPECT_EQ(memcmp(recv_buf, buf, 1), 0);
   289  }
   290  
   291  TEST_P(TcpSocketTest, ZeroWriteAllowed) {
   292    char buf[3];
   293    // Send a zero length packet.
   294    ASSERT_THAT(RetryEINTR(write)(first_fd, buf, 0), SyscallSucceedsWithValue(0));
   295    // Verify that there is no packet available.
   296    EXPECT_THAT(RetryEINTR(recv)(second_fd, buf, sizeof(buf), MSG_DONTWAIT),
   297                SyscallFailsWithErrno(EAGAIN));
   298  }
   299  
   300  // Test that a non-blocking write with a buffer that is larger than the send
   301  // buffer size will not actually write the whole thing at once. Regression test
   302  // for b/64438887.
   303  TEST_P(TcpSocketTest, NonblockingLargeWrite) {
   304    // Set the FD to O_NONBLOCK.
   305    int opts;
   306    ASSERT_THAT(opts = fcntl(first_fd, F_GETFL), SyscallSucceeds());
   307    opts |= O_NONBLOCK;
   308    ASSERT_THAT(fcntl(first_fd, F_SETFL, opts), SyscallSucceeds());
   309  
   310    // Allocate a buffer three times the size of the send buffer. We do this with
   311    // a vector to avoid allocating on the stack.
   312    int size = 3 * sendbuf_size_;
   313    std::vector<char> buf(size);
   314  
   315    // Try to write the whole thing.
   316    int n;
   317    ASSERT_THAT(n = RetryEINTR(write)(first_fd, buf.data(), size),
   318                SyscallSucceeds());
   319  
   320    // We should have written something, but not the whole thing.
   321    EXPECT_GT(n, 0);
   322    EXPECT_LT(n, size);
   323  }
   324  
   325  // Test that a blocking write with a buffer that is larger than the send buffer
   326  // will block until the entire buffer is sent.
   327  TEST_P(TcpSocketTest, BlockingLargeWrite) {
   328    // Allocate a buffer three times the size of the send buffer on the heap. We
   329    // do this as a vector to avoid allocating on the stack.
   330    int size = 3 * sendbuf_size_;
   331    std::vector<char> writebuf(size);
   332  
   333    // Start reading the response in a loop.
   334    int read_bytes = 0;
   335    ScopedThread t([this, &read_bytes]() {
   336      // Avoid interrupting the blocking write in main thread.
   337      const DisableSave disable_save;
   338  
   339      // Take ownership of the FD so that we close it on failure. This will
   340      // unblock the blocking write below.
   341      FileDescriptor fd(second_fd);
   342      second_fd = -1;
   343  
   344      char readbuf[2500] = {};
   345      int n = -1;
   346      while (n != 0) {
   347        ASSERT_THAT(n = RetryEINTR(read)(fd.get(), &readbuf, sizeof(readbuf)),
   348                    SyscallSucceeds());
   349        read_bytes += n;
   350      }
   351    });
   352  
   353    // Try to write the whole thing.
   354    int n;
   355    ASSERT_THAT(n = WriteFd(first_fd, writebuf.data(), size), SyscallSucceeds());
   356  
   357    // We should have written the whole thing.
   358    EXPECT_EQ(n, size);
   359    EXPECT_THAT(close(first_fd), SyscallSucceedsWithValue(0));
   360    first_fd = -1;
   361    t.Join();
   362  
   363    // We should have read the whole thing.
   364    EXPECT_EQ(read_bytes, size);
   365  }
   366  
   367  // Test that a send with MSG_DONTWAIT flag and buffer that larger than the send
   368  // buffer size will not write the whole thing.
   369  TEST_P(TcpSocketTest, LargeSendDontWait) {
   370    // Allocate a buffer three times the size of the send buffer. We do this on
   371    // with a vector to avoid allocating on the stack.
   372    int size = 3 * sendbuf_size_;
   373    std::vector<char> buf(size);
   374  
   375    // Try to write the whole thing with MSG_DONTWAIT flag, which can
   376    // return a partial write.
   377    int n;
   378    ASSERT_THAT(n = RetryEINTR(send)(first_fd, buf.data(), size, MSG_DONTWAIT),
   379                SyscallSucceeds());
   380  
   381    // We should have written something, but not the whole thing.
   382    EXPECT_GT(n, 0);
   383    EXPECT_LT(n, size);
   384  }
   385  
   386  // Test that a send on a non-blocking socket with a buffer that larger than the
   387  // send buffer will not write the whole thing at once.
   388  TEST_P(TcpSocketTest, NonblockingLargeSend) {
   389    // Set the FD to O_NONBLOCK.
   390    int opts;
   391    ASSERT_THAT(opts = fcntl(first_fd, F_GETFL), SyscallSucceeds());
   392    opts |= O_NONBLOCK;
   393    ASSERT_THAT(fcntl(first_fd, F_SETFL, opts), SyscallSucceeds());
   394  
   395    // Allocate a buffer three times the size of the send buffer. We do this on
   396    // with a vector to avoid allocating on the stack.
   397    int size = 3 * sendbuf_size_;
   398    std::vector<char> buf(size);
   399  
   400    // Try to write the whole thing.
   401    int n;
   402    ASSERT_THAT(n = RetryEINTR(send)(first_fd, buf.data(), size, 0),
   403                SyscallSucceeds());
   404  
   405    // We should have written something, but not the whole thing.
   406    EXPECT_GT(n, 0);
   407    EXPECT_LT(n, size);
   408  }
   409  
   410  // Same test as above, but calls send instead of write.
   411  TEST_P(TcpSocketTest, BlockingLargeSend) {
   412    // Allocate a buffer three times the size of the send buffer. We do this on
   413    // with a vector to avoid allocating on the stack.
   414    int size = 3 * sendbuf_size_;
   415    std::vector<char> writebuf(size);
   416  
   417    // Start reading the response in a loop.
   418    int read_bytes = 0;
   419    ScopedThread t([this, &read_bytes]() {
   420      // Avoid interrupting the blocking write in main thread.
   421      const DisableSave disable_save;
   422  
   423      // Take ownership of the FD so that we close it on failure. This will
   424      // unblock the blocking write below.
   425      FileDescriptor fd(second_fd);
   426      second_fd = -1;
   427  
   428      char readbuf[2500] = {};
   429      int n = -1;
   430      while (n != 0) {
   431        ASSERT_THAT(n = RetryEINTR(read)(fd.get(), &readbuf, sizeof(readbuf)),
   432                    SyscallSucceeds());
   433        read_bytes += n;
   434      }
   435    });
   436  
   437    // Try to send the whole thing.
   438    int n;
   439    ASSERT_THAT(n = SendFd(first_fd, writebuf.data(), size, 0),
   440                SyscallSucceeds());
   441  
   442    // We should have written the whole thing.
   443    EXPECT_EQ(n, size);
   444    EXPECT_THAT(close(first_fd), SyscallSucceedsWithValue(0));
   445    first_fd = -1;
   446    t.Join();
   447  
   448    // We should have read the whole thing.
   449    EXPECT_EQ(read_bytes, size);
   450  }
   451  
   452  // Test that polling on a socket with a full send buffer will block.
   453  TEST_P(TcpSocketTest, PollWithFullBufferBlocks) {
   454    FillSocketBuffers(first_fd, second_fd);
   455    // Now polling on the FD with a timeout should return 0 corresponding to no
   456    // FDs ready.
   457    struct pollfd poll_fd = {first_fd, POLLOUT, 0};
   458    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10), SyscallSucceedsWithValue(0));
   459  }
   460  
   461  TEST_P(TcpSocketTest, ClosedWriteBlockingSocket) {
   462    FillSocketBuffers(first_fd, second_fd);
   463    constexpr int timeout = 10;
   464    struct timeval tv = {.tv_sec = timeout, .tv_usec = 0};
   465    EXPECT_THAT(setsockopt(first_fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)),
   466                SyscallSucceeds());
   467  
   468    struct timespec begin;
   469    struct timespec end;
   470    const DisableSave disable_save;  // Timing-related.
   471    EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &begin), SyscallSucceeds());
   472  
   473    ScopedThread send_thread([this]() {
   474      char send_byte;
   475      // Expect the send() to be blocked until receive timeout.
   476      ASSERT_THAT(RetryEINTR(send)(first_fd, &send_byte, sizeof(send_byte), 0),
   477                  SyscallFailsWithErrno(EAGAIN));
   478    });
   479  
   480    // Wait for the thread to be blocked on write.
   481    absl::SleepFor(absl::Milliseconds(250));
   482    // Socket close does not have any effect on a blocked write.
   483    ASSERT_THAT(close(first_fd), SyscallSucceeds());
   484    // Indicate to the cleanup routine that we are already closed.
   485    first_fd = -1;
   486  
   487    send_thread.Join();
   488  
   489    EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &end), SyscallSucceeds());
   490    // Check the lower bound on the timeout.  Checking for an upper bound is
   491    // fragile because Linux can overrun the timeout due to scheduling delays.
   492    EXPECT_GT(ms_elapsed(begin, end), timeout * 1000 - 1);
   493  }
   494  
   495  TEST_P(TcpSocketTest, ClosedReadBlockingSocket) {
   496    constexpr int timeout = 10;
   497    struct timeval tv = {.tv_sec = timeout, .tv_usec = 0};
   498    EXPECT_THAT(setsockopt(first_fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
   499                SyscallSucceeds());
   500  
   501    struct timespec begin;
   502    struct timespec end;
   503    const DisableSave disable_save;  // Timing-related.
   504    EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &begin), SyscallSucceeds());
   505  
   506    ScopedThread read_thread([this]() {
   507      char read_byte;
   508      // Expect the read() to be blocked until receive timeout.
   509      ASSERT_THAT(read(first_fd, &read_byte, sizeof(read_byte)),
   510                  SyscallFailsWithErrno(EAGAIN));
   511    });
   512  
   513    // Wait for the thread to be blocked on read.
   514    absl::SleepFor(absl::Milliseconds(250));
   515    // Socket close does not have any effect on a blocked read.
   516    ASSERT_THAT(close(first_fd), SyscallSucceeds());
   517    // Indicate to the cleanup routine that we are already closed.
   518    first_fd = -1;
   519  
   520    read_thread.Join();
   521  
   522    EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &end), SyscallSucceeds());
   523    // Check the lower bound on the timeout.  Checking for an upper bound is
   524    // fragile because Linux can overrun the timeout due to scheduling delays.
   525    EXPECT_GT(ms_elapsed(begin, end), timeout * 1000 - 1);
   526  }
   527  
   528  TEST_P(TcpSocketTest, MsgTrunc) {
   529    char sent_data[512];
   530    RandomizeBuffer(sent_data, sizeof(sent_data));
   531    ASSERT_THAT(RetryEINTR(send)(first_fd, sent_data, sizeof(sent_data), 0),
   532                SyscallSucceedsWithValue(sizeof(sent_data)));
   533    char received_data[sizeof(sent_data)] = {};
   534    ASSERT_THAT(RetryEINTR(recv)(second_fd, received_data,
   535                                 sizeof(received_data) / 2, MSG_TRUNC),
   536                SyscallSucceedsWithValue(sizeof(sent_data) / 2));
   537  
   538    // Check that we didn't get anything.
   539    char zeros[sizeof(received_data)] = {};
   540    EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data)));
   541  }
   542  
   543  // MSG_CTRUNC is a return flag but linux allows it to be set on input flags
   544  // without returning an error.
   545  TEST_P(TcpSocketTest, MsgTruncWithCtrunc) {
   546    char sent_data[512];
   547    RandomizeBuffer(sent_data, sizeof(sent_data));
   548    ASSERT_THAT(RetryEINTR(send)(first_fd, sent_data, sizeof(sent_data), 0),
   549                SyscallSucceedsWithValue(sizeof(sent_data)));
   550    char received_data[sizeof(sent_data)] = {};
   551    ASSERT_THAT(
   552        RetryEINTR(recv)(second_fd, received_data, sizeof(received_data) / 2,
   553                         MSG_TRUNC | MSG_CTRUNC),
   554        SyscallSucceedsWithValue(sizeof(sent_data) / 2));
   555  
   556    // Check that we didn't get anything.
   557    char zeros[sizeof(received_data)] = {};
   558    EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data)));
   559  }
   560  
   561  // This test will verify that MSG_CTRUNC doesn't do anything when specified
   562  // on input.
   563  TEST_P(TcpSocketTest, MsgTruncWithCtruncOnly) {
   564    char sent_data[512];
   565    RandomizeBuffer(sent_data, sizeof(sent_data));
   566    ASSERT_THAT(RetryEINTR(send)(first_fd, sent_data, sizeof(sent_data), 0),
   567                SyscallSucceedsWithValue(sizeof(sent_data)));
   568    char received_data[sizeof(sent_data)] = {};
   569    ASSERT_THAT(RetryEINTR(recv)(second_fd, received_data,
   570                                 sizeof(received_data) / 2, MSG_CTRUNC),
   571                SyscallSucceedsWithValue(sizeof(sent_data) / 2));
   572  
   573    // Since MSG_CTRUNC here had no affect, it should not behave like MSG_TRUNC.
   574    EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2));
   575  }
   576  
   577  TEST_P(TcpSocketTest, MsgTruncLargeSize) {
   578    char sent_data[512];
   579    RandomizeBuffer(sent_data, sizeof(sent_data));
   580    ASSERT_THAT(RetryEINTR(send)(first_fd, sent_data, sizeof(sent_data), 0),
   581                SyscallSucceedsWithValue(sizeof(sent_data)));
   582    char received_data[sizeof(sent_data) * 2] = {};
   583    ASSERT_THAT(RetryEINTR(recv)(second_fd, received_data, sizeof(received_data),
   584                                 MSG_TRUNC),
   585                SyscallSucceedsWithValue(sizeof(sent_data)));
   586  
   587    // Check that we didn't get anything.
   588    char zeros[sizeof(received_data)] = {};
   589    EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data)));
   590  }
   591  
   592  TEST_P(TcpSocketTest, MsgTruncPeek) {
   593    char sent_data[512];
   594    RandomizeBuffer(sent_data, sizeof(sent_data));
   595    ASSERT_THAT(RetryEINTR(send)(first_fd, sent_data, sizeof(sent_data), 0),
   596                SyscallSucceedsWithValue(sizeof(sent_data)));
   597    char received_data[sizeof(sent_data)] = {};
   598    ASSERT_THAT(RetryEINTR(recv)(second_fd, received_data,
   599                                 sizeof(received_data) / 2, MSG_TRUNC | MSG_PEEK),
   600                SyscallSucceedsWithValue(sizeof(sent_data) / 2));
   601  
   602    // Check that we didn't get anything.
   603    char zeros[sizeof(received_data)] = {};
   604    EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data)));
   605  
   606    // Check that we can still get all of the data.
   607    ASSERT_THAT(
   608        RetryEINTR(recv)(second_fd, received_data, sizeof(received_data), 0),
   609        SyscallSucceedsWithValue(sizeof(sent_data)));
   610    EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
   611  }
   612  
   613  TEST_P(TcpSocketTest, NoDelayDefault) {
   614    int get = -1;
   615    socklen_t get_len = sizeof(get);
   616    EXPECT_THAT(getsockopt(first_fd, IPPROTO_TCP, TCP_NODELAY, &get, &get_len),
   617                SyscallSucceedsWithValue(0));
   618    EXPECT_EQ(get_len, sizeof(get));
   619    EXPECT_EQ(get, kSockOptOff);
   620  }
   621  
   622  TEST_P(TcpSocketTest, SetNoDelay) {
   623    ASSERT_THAT(setsockopt(first_fd, IPPROTO_TCP, TCP_NODELAY, &kSockOptOn,
   624                           sizeof(kSockOptOn)),
   625                SyscallSucceeds());
   626  
   627    int get = -1;
   628    socklen_t get_len = sizeof(get);
   629    EXPECT_THAT(getsockopt(first_fd, IPPROTO_TCP, TCP_NODELAY, &get, &get_len),
   630                SyscallSucceedsWithValue(0));
   631    EXPECT_EQ(get_len, sizeof(get));
   632    EXPECT_EQ(get, kSockOptOn);
   633  
   634    ASSERT_THAT(setsockopt(first_fd, IPPROTO_TCP, TCP_NODELAY, &kSockOptOff,
   635                           sizeof(kSockOptOff)),
   636                SyscallSucceeds());
   637  
   638    EXPECT_THAT(getsockopt(first_fd, IPPROTO_TCP, TCP_NODELAY, &get, &get_len),
   639                SyscallSucceedsWithValue(0));
   640    EXPECT_EQ(get_len, sizeof(get));
   641    EXPECT_EQ(get, kSockOptOff);
   642  }
   643  
   644  #ifndef TCP_INQ
   645  #define TCP_INQ 36
   646  #endif
   647  
   648  TEST_P(TcpSocketTest, TcpInqSetSockOpt) {
   649    char buf[1024];
   650    ASSERT_THAT(RetryEINTR(write)(first_fd, buf, sizeof(buf)),
   651                SyscallSucceedsWithValue(sizeof(buf)));
   652  
   653    // TCP_INQ is disabled by default.
   654    int val = -1;
   655    socklen_t slen = sizeof(val);
   656    EXPECT_THAT(getsockopt(second_fd, SOL_TCP, TCP_INQ, &val, &slen),
   657                SyscallSucceedsWithValue(0));
   658    ASSERT_EQ(val, 0);
   659  
   660    // Try to set TCP_INQ.
   661    val = 1;
   662    EXPECT_THAT(setsockopt(second_fd, SOL_TCP, TCP_INQ, &val, sizeof(val)),
   663                SyscallSucceedsWithValue(0));
   664    val = -1;
   665    slen = sizeof(val);
   666    EXPECT_THAT(getsockopt(second_fd, SOL_TCP, TCP_INQ, &val, &slen),
   667                SyscallSucceedsWithValue(0));
   668    ASSERT_EQ(val, 1);
   669  
   670    // Try to unset TCP_INQ.
   671    val = 0;
   672    EXPECT_THAT(setsockopt(second_fd, SOL_TCP, TCP_INQ, &val, sizeof(val)),
   673                SyscallSucceedsWithValue(0));
   674    val = -1;
   675    slen = sizeof(val);
   676    EXPECT_THAT(getsockopt(second_fd, SOL_TCP, TCP_INQ, &val, &slen),
   677                SyscallSucceedsWithValue(0));
   678    ASSERT_EQ(val, 0);
   679  }
   680  
   681  TEST_P(TcpSocketTest, TcpInq) {
   682    char buf[1024];
   683    // Write more than one TCP segment.
   684    int size = sizeof(buf);
   685    int kChunk = sizeof(buf) / 4;
   686    for (int i = 0; i < size; i += kChunk) {
   687      ASSERT_THAT(RetryEINTR(write)(first_fd, buf, kChunk),
   688                  SyscallSucceedsWithValue(kChunk));
   689    }
   690  
   691    int val = 1;
   692    kChunk = sizeof(buf) / 2;
   693    EXPECT_THAT(setsockopt(second_fd, SOL_TCP, TCP_INQ, &val, sizeof(val)),
   694                SyscallSucceedsWithValue(0));
   695  
   696    // Wait when all data will be in the received queue.
   697    while (true) {
   698      ASSERT_THAT(ioctl(second_fd, TIOCINQ, &size), SyscallSucceeds());
   699      if (size == sizeof(buf)) {
   700        break;
   701      }
   702      absl::SleepFor(absl::Milliseconds(10));
   703    }
   704  
   705    struct msghdr msg = {};
   706    std::vector<char> control(CMSG_SPACE(sizeof(int)));
   707    size = sizeof(buf);
   708    struct iovec iov;
   709    for (int i = 0; size != 0; i += kChunk) {
   710      msg.msg_control = &control[0];
   711      msg.msg_controllen = control.size();
   712  
   713      iov.iov_base = buf;
   714      iov.iov_len = kChunk;
   715      msg.msg_iov = &iov;
   716      msg.msg_iovlen = 1;
   717      ASSERT_THAT(RetryEINTR(recvmsg)(second_fd, &msg, 0),
   718                  SyscallSucceedsWithValue(kChunk));
   719      size -= kChunk;
   720  
   721      struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
   722      ASSERT_NE(cmsg, nullptr);
   723      ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int)));
   724      ASSERT_EQ(cmsg->cmsg_level, SOL_TCP);
   725      ASSERT_EQ(cmsg->cmsg_type, TCP_INQ);
   726  
   727      int inq = 0;
   728      memcpy(&inq, CMSG_DATA(cmsg), sizeof(int));
   729      ASSERT_EQ(inq, size);
   730    }
   731  }
   732  
   733  TEST_P(TcpSocketTest, Tiocinq) {
   734    char buf[1024];
   735    size_t size = sizeof(buf);
   736    ASSERT_THAT(RetryEINTR(write)(first_fd, buf, size),
   737                SyscallSucceedsWithValue(size));
   738  
   739    uint32_t seed = time(nullptr);
   740    const size_t max_chunk = size / 10;
   741    while (size > 0) {
   742      size_t chunk = (rand_r(&seed) % max_chunk) + 1;
   743      ssize_t read =
   744          RetryEINTR(recvfrom)(second_fd, buf, chunk, 0, nullptr, nullptr);
   745      ASSERT_THAT(read, SyscallSucceeds());
   746      size -= read;
   747  
   748      int inq = 0;
   749      ASSERT_THAT(ioctl(second_fd, TIOCINQ, &inq), SyscallSucceeds());
   750      ASSERT_EQ(inq, size);
   751    }
   752  }
   753  
   754  TEST_P(TcpSocketTest, TcpSCMPriority) {
   755    char buf[1024];
   756    ASSERT_THAT(RetryEINTR(write)(first_fd, buf, sizeof(buf)),
   757                SyscallSucceedsWithValue(sizeof(buf)));
   758  
   759    int val = 1;
   760    EXPECT_THAT(setsockopt(second_fd, SOL_TCP, TCP_INQ, &val, sizeof(val)),
   761                SyscallSucceedsWithValue(0));
   762    EXPECT_THAT(
   763        setsockopt(second_fd, SOL_SOCKET, SO_TIMESTAMP, &val, sizeof(val)),
   764        SyscallSucceedsWithValue(0));
   765  
   766    struct msghdr msg = {};
   767    std::vector<char> control(
   768        CMSG_SPACE(sizeof(struct timeval) + CMSG_SPACE(sizeof(int))));
   769    struct iovec iov;
   770    msg.msg_control = &control[0];
   771    msg.msg_controllen = control.size();
   772  
   773    iov.iov_base = buf;
   774    iov.iov_len = sizeof(buf);
   775    msg.msg_iov = &iov;
   776    msg.msg_iovlen = 1;
   777    ASSERT_THAT(RetryEINTR(recvmsg)(second_fd, &msg, 0),
   778                SyscallSucceedsWithValue(sizeof(buf)));
   779  
   780    struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
   781    ASSERT_NE(cmsg, nullptr);
   782    // TODO(b/78348848): SO_TIMESTAMP isn't implemented for TCP sockets.
   783    if (!IsRunningOnGvisor() || cmsg->cmsg_level == SOL_SOCKET) {
   784      ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
   785      ASSERT_EQ(cmsg->cmsg_type, SO_TIMESTAMP);
   786      ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct timeval)));
   787  
   788      cmsg = CMSG_NXTHDR(&msg, cmsg);
   789      ASSERT_NE(cmsg, nullptr);
   790    }
   791    ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int)));
   792    ASSERT_EQ(cmsg->cmsg_level, SOL_TCP);
   793    ASSERT_EQ(cmsg->cmsg_type, TCP_INQ);
   794  
   795    int inq = 0;
   796    memcpy(&inq, CMSG_DATA(cmsg), sizeof(int));
   797    ASSERT_EQ(inq, 0);
   798  
   799    cmsg = CMSG_NXTHDR(&msg, cmsg);
   800    ASSERT_EQ(cmsg, nullptr);
   801  }
   802  
   803  TEST_P(TcpSocketTest, TimeWaitPollHUP) {
   804    shutdown(first_fd, SHUT_RDWR);
   805    ScopedThread t([&]() {
   806      constexpr int kTimeout = 10000;
   807      constexpr int16_t want_events = POLLHUP;
   808      struct pollfd pfd = {
   809          .fd = first_fd,
   810          .events = want_events,
   811      };
   812      ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1));
   813    });
   814    shutdown(second_fd, SHUT_RDWR);
   815    t.Join();
   816    // At this point first_fd should be in TIME-WAIT and polling for POLLHUP
   817    // should return with 1 FD.
   818    constexpr int kTimeout = 10000;
   819    constexpr int16_t want_events = POLLHUP;
   820    struct pollfd pfd = {
   821        .fd = first_fd,
   822        .events = want_events,
   823    };
   824    ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1));
   825  }
   826  
   827  INSTANTIATE_TEST_SUITE_P(AllInetTests, TcpSocketTest,
   828                           ::testing::Values(AF_INET, AF_INET6));
   829  
   830  // Fixture for tests parameterized by address family that don't want the fixture
   831  // to do things.
   832  using SimpleTcpSocketTest = ::testing::TestWithParam<int>;
   833  
   834  TEST_P(SimpleTcpSocketTest, SendUnconnected) {
   835    int fd;
   836    ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
   837                SyscallSucceeds());
   838    FileDescriptor sock_fd(fd);
   839  
   840    char data = '\0';
   841    EXPECT_THAT(RetryEINTR(send)(fd, &data, sizeof(data), 0),
   842                SyscallFailsWithErrno(EPIPE));
   843  }
   844  
   845  TEST_P(SimpleTcpSocketTest, SendtoWithoutAddressUnconnected) {
   846    int fd;
   847    ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
   848                SyscallSucceeds());
   849    FileDescriptor sock_fd(fd);
   850  
   851    char data = '\0';
   852    EXPECT_THAT(RetryEINTR(sendto)(fd, &data, sizeof(data), 0, nullptr, 0),
   853                SyscallFailsWithErrno(EPIPE));
   854  }
   855  
   856  TEST_P(SimpleTcpSocketTest, SendtoWithAddressUnconnected) {
   857    int fd;
   858    ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
   859                SyscallSucceeds());
   860    FileDescriptor sock_fd(fd);
   861  
   862    sockaddr_storage addr =
   863        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
   864    char data = '\0';
   865    EXPECT_THAT(RetryEINTR(sendto)(fd, &data, sizeof(data), 0, AsSockAddr(&addr),
   866                                   sizeof(addr)),
   867                SyscallFailsWithErrno(EPIPE));
   868  }
   869  
   870  TEST_P(SimpleTcpSocketTest, GetPeerNameUnconnected) {
   871    int fd;
   872    ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
   873                SyscallSucceeds());
   874    FileDescriptor sock_fd(fd);
   875  
   876    sockaddr_storage addr;
   877    socklen_t addrlen = sizeof(addr);
   878    EXPECT_THAT(getpeername(fd, AsSockAddr(&addr), &addrlen),
   879                SyscallFailsWithErrno(ENOTCONN));
   880  }
   881  
   882  TEST_P(TcpSocketTest, FullBuffer) {
   883    // Set both FDs to be blocking.
   884    int flags = 0;
   885    ASSERT_THAT(flags = fcntl(first_fd, F_GETFL), SyscallSucceeds());
   886    EXPECT_THAT(fcntl(first_fd, F_SETFL, flags & ~O_NONBLOCK), SyscallSucceeds());
   887    flags = 0;
   888    ASSERT_THAT(flags = fcntl(second_fd, F_GETFL), SyscallSucceeds());
   889    EXPECT_THAT(fcntl(second_fd, F_SETFL, flags & ~O_NONBLOCK),
   890                SyscallSucceeds());
   891  
   892    // 2500 was chosen as a small value that can be set on Linux.
   893    int set_snd = 2500;
   894    EXPECT_THAT(
   895        setsockopt(first_fd, SOL_SOCKET, SO_SNDBUF, &set_snd, sizeof(set_snd)),
   896        SyscallSucceedsWithValue(0));
   897    int get_snd = -1;
   898    socklen_t get_snd_len = sizeof(get_snd);
   899    EXPECT_THAT(
   900        getsockopt(first_fd, SOL_SOCKET, SO_SNDBUF, &get_snd, &get_snd_len),
   901        SyscallSucceedsWithValue(0));
   902    EXPECT_EQ(get_snd_len, sizeof(get_snd));
   903    EXPECT_GT(get_snd, 0);
   904  
   905    // 2500 was chosen as a small value that can be set on Linux and gVisor.
   906    int set_rcv = 2500;
   907    EXPECT_THAT(
   908        setsockopt(second_fd, SOL_SOCKET, SO_RCVBUF, &set_rcv, sizeof(set_rcv)),
   909        SyscallSucceedsWithValue(0));
   910    int get_rcv = -1;
   911    socklen_t get_rcv_len = sizeof(get_rcv);
   912    EXPECT_THAT(
   913        getsockopt(second_fd, SOL_SOCKET, SO_RCVBUF, &get_rcv, &get_rcv_len),
   914        SyscallSucceedsWithValue(0));
   915    EXPECT_EQ(get_rcv_len, sizeof(get_rcv));
   916    EXPECT_GE(get_rcv, 2500);
   917  
   918    // Quick sanity test.
   919    EXPECT_LT(get_snd + get_rcv, 2500 * IOV_MAX);
   920  
   921    char data[2500] = {};
   922    std::vector<struct iovec> iovecs;
   923    for (int i = 0; i < IOV_MAX; i++) {
   924      struct iovec iov = {};
   925      iov.iov_base = data;
   926      iov.iov_len = sizeof(data);
   927      iovecs.push_back(iov);
   928    }
   929    ScopedThread t([this, &iovecs]() {
   930      int result = -1;
   931      EXPECT_THAT(
   932          result = RetryEINTR(writev)(first_fd, iovecs.data(), iovecs.size()),
   933          SyscallSucceeds());
   934      EXPECT_GT(result, 1);
   935      EXPECT_LT(result, sizeof(data) * iovecs.size());
   936    });
   937  
   938    char recv = 0;
   939    EXPECT_THAT(RetryEINTR(read)(second_fd, &recv, 1),
   940                SyscallSucceedsWithValue(1));
   941    EXPECT_THAT(close(second_fd), SyscallSucceedsWithValue(0));
   942    second_fd = -1;
   943  }
   944  
   945  TEST_P(TcpSocketTest, PollAfterShutdown) {
   946    ScopedThread client_thread([this]() {
   947      EXPECT_THAT(shutdown(first_fd, SHUT_WR), SyscallSucceedsWithValue(0));
   948      struct pollfd poll_fd = {first_fd, POLLIN | POLLERR | POLLHUP, 0};
   949      EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000),
   950                  SyscallSucceedsWithValue(1));
   951    });
   952  
   953    EXPECT_THAT(shutdown(second_fd, SHUT_WR), SyscallSucceedsWithValue(0));
   954    struct pollfd poll_fd = {second_fd, POLLIN | POLLERR | POLLHUP, 0};
   955    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000),
   956                SyscallSucceedsWithValue(1));
   957  }
   958  
   959  TEST_P(SimpleTcpSocketTest, NonBlockingConnectRetry) {
   960    const FileDescriptor listener =
   961        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
   962  
   963    // Initialize address to the loopback one.
   964    sockaddr_storage addr =
   965        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
   966    socklen_t addrlen = sizeof(addr);
   967  
   968    // Bind to some port but don't listen yet.
   969    ASSERT_THAT(bind(listener.get(), AsSockAddr(&addr), addrlen),
   970                SyscallSucceeds());
   971  
   972    // Get the address we're bound to, then connect to it. We need to do this
   973    // because we're allowing the stack to pick a port for us.
   974    ASSERT_THAT(getsockname(listener.get(), AsSockAddr(&addr), &addrlen),
   975                SyscallSucceeds());
   976  
   977    FileDescriptor connector =
   978        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
   979  
   980    // Verify that connect fails.
   981    ASSERT_THAT(RetryEINTR(connect)(connector.get(), AsSockAddr(&addr), addrlen),
   982                SyscallFailsWithErrno(ECONNREFUSED));
   983  
   984    // Now start listening
   985    ASSERT_THAT(listen(listener.get(), SOMAXCONN), SyscallSucceeds());
   986  
   987    // TODO(gvisor.dev/issue/3828): Issuing connect() again on a socket that
   988    //   failed first connect should succeed.
   989    if (IsRunningOnGvisor()) {
   990      ASSERT_THAT(
   991          RetryEINTR(connect)(connector.get(), AsSockAddr(&addr), addrlen),
   992          SyscallFailsWithErrno(ECONNABORTED));
   993      return;
   994    }
   995  
   996    // Verify that connect now succeeds.
   997    ASSERT_THAT(RetryEINTR(connect)(connector.get(), AsSockAddr(&addr), addrlen),
   998                SyscallSucceeds());
   999  
  1000    // Accept the connection.
  1001    const FileDescriptor accepted =
  1002        ASSERT_NO_ERRNO_AND_VALUE(Accept(listener.get(), nullptr, nullptr));
  1003  }
  1004  
  1005  // nonBlockingConnectNoListener returns a socket on which a connect that is
  1006  // expected to fail has been issued.
  1007  PosixErrorOr<FileDescriptor> nonBlockingConnectNoListener(const int family,
  1008                                                            sockaddr_storage addr,
  1009                                                            socklen_t addrlen) {
  1010    // We will first create a socket and bind to ensure we bind a port but will
  1011    // not call listen on this socket.
  1012    // Then we will create a new socket that will connect to the port bound by
  1013    // the first socket and that shoud fail.
  1014    constexpr int sock_type = SOCK_STREAM | SOCK_NONBLOCK;
  1015    int b_sock;
  1016    RETURN_ERROR_IF_SYSCALL_FAIL(b_sock = socket(family, sock_type, IPPROTO_TCP));
  1017    FileDescriptor b(b_sock);
  1018    EXPECT_THAT(bind(b.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds());
  1019  
  1020    // Get the address bound by the listening socket.
  1021    EXPECT_THAT(getsockname(b.get(), AsSockAddr(&addr), &addrlen),
  1022                SyscallSucceeds());
  1023  
  1024    // Now create another socket and issue a connect on this one. This connect
  1025    // should fail as there is no listener.
  1026    int c_sock;
  1027    RETURN_ERROR_IF_SYSCALL_FAIL(c_sock = socket(family, sock_type, IPPROTO_TCP));
  1028    FileDescriptor s(c_sock);
  1029  
  1030    // Now connect to the bound address and this should fail as nothing
  1031    // is listening on the bound address.
  1032    EXPECT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1033                SyscallFailsWithErrno(EINPROGRESS));
  1034  
  1035    // Wait for the connect to fail.
  1036    struct pollfd poll_fd = {s.get(), POLLERR, 0};
  1037    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 1000), SyscallSucceedsWithValue(1));
  1038    return std::move(s);
  1039  }
  1040  
  1041  TEST_P(SimpleTcpSocketTest, NonBlockingConnectNoListener) {
  1042    sockaddr_storage addr =
  1043        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
  1044    socklen_t addrlen = sizeof(addr);
  1045  
  1046    const FileDescriptor s =
  1047        nonBlockingConnectNoListener(GetParam(), addr, addrlen).ValueOrDie();
  1048  
  1049    int err;
  1050    socklen_t optlen = sizeof(err);
  1051    ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ERROR, &err, &optlen),
  1052                SyscallSucceeds());
  1053    ASSERT_THAT(optlen, sizeof(err));
  1054    EXPECT_EQ(err, ECONNREFUSED);
  1055  
  1056    unsigned char c;
  1057    ASSERT_THAT(read(s.get(), &c, sizeof(c)), SyscallSucceedsWithValue(0));
  1058    int opts;
  1059    EXPECT_THAT(opts = fcntl(s.get(), F_GETFL), SyscallSucceeds());
  1060    opts &= ~O_NONBLOCK;
  1061    EXPECT_THAT(fcntl(s.get(), F_SETFL, opts), SyscallSucceeds());
  1062    // Try connecting again.
  1063    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1064                SyscallFailsWithErrno(ECONNABORTED));
  1065  }
  1066  
  1067  TEST_P(SimpleTcpSocketTest, NonBlockingConnectNoListenerRead) {
  1068    sockaddr_storage addr =
  1069        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
  1070    socklen_t addrlen = sizeof(addr);
  1071  
  1072    const FileDescriptor s =
  1073        nonBlockingConnectNoListener(GetParam(), addr, addrlen).ValueOrDie();
  1074  
  1075    unsigned char c;
  1076    ASSERT_THAT(read(s.get(), &c, 1), SyscallFailsWithErrno(ECONNREFUSED));
  1077    ASSERT_THAT(read(s.get(), &c, 1), SyscallSucceedsWithValue(0));
  1078    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1079                SyscallFailsWithErrno(ECONNABORTED));
  1080  }
  1081  
  1082  TEST_P(SimpleTcpSocketTest, NonBlockingConnectNoListenerPeek) {
  1083    sockaddr_storage addr =
  1084        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
  1085    socklen_t addrlen = sizeof(addr);
  1086  
  1087    const FileDescriptor s =
  1088        nonBlockingConnectNoListener(GetParam(), addr, addrlen).ValueOrDie();
  1089  
  1090    unsigned char c;
  1091    ASSERT_THAT(recv(s.get(), &c, 1, MSG_PEEK),
  1092                SyscallFailsWithErrno(ECONNREFUSED));
  1093    ASSERT_THAT(recv(s.get(), &c, 1, MSG_PEEK), SyscallSucceedsWithValue(0));
  1094    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1095                SyscallFailsWithErrno(ECONNABORTED));
  1096  }
  1097  
  1098  TEST_P(SimpleTcpSocketTest, SelfConnectSendRecv) {
  1099    // Initialize address to the loopback one.
  1100    sockaddr_storage addr =
  1101        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
  1102    socklen_t addrlen = sizeof(addr);
  1103  
  1104    const FileDescriptor s =
  1105        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1106  
  1107    ASSERT_THAT((bind)(s.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds());
  1108    // Get the bound port.
  1109    ASSERT_THAT(getsockname(s.get(), AsSockAddr(&addr), &addrlen),
  1110                SyscallSucceeds());
  1111    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1112                SyscallSucceeds());
  1113  
  1114    constexpr int kBufSz = 1 << 20;  // 1 MiB
  1115    std::vector<char> writebuf(kBufSz);
  1116  
  1117    // Start reading the response in a loop.
  1118    int read_bytes = 0;
  1119    ScopedThread t([&s, &read_bytes]() {
  1120      // Too many syscalls.
  1121      const DisableSave disable_save;
  1122  
  1123      char readbuf[2500] = {};
  1124      int n = -1;
  1125      while (n != 0) {
  1126        ASSERT_THAT(n = RetryEINTR(read)(s.get(), &readbuf, sizeof(readbuf)),
  1127                    SyscallSucceeds());
  1128        read_bytes += n;
  1129      }
  1130    });
  1131  
  1132    // Try to send the whole thing.
  1133    int n;
  1134    ASSERT_THAT(n = SendFd(s.get(), writebuf.data(), kBufSz, 0),
  1135                SyscallSucceeds());
  1136  
  1137    // We should have written the whole thing.
  1138    EXPECT_EQ(n, kBufSz);
  1139    EXPECT_THAT(shutdown(s.get(), SHUT_WR), SyscallSucceedsWithValue(0));
  1140    t.Join();
  1141  
  1142    // We should have read the whole thing.
  1143    EXPECT_EQ(read_bytes, kBufSz);
  1144  }
  1145  
  1146  TEST_P(SimpleTcpSocketTest, SelfConnectSend) {
  1147    // Initialize address to the loopback one.
  1148    sockaddr_storage addr =
  1149        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
  1150    socklen_t addrlen = sizeof(addr);
  1151  
  1152    const FileDescriptor s =
  1153        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1154  
  1155    constexpr int max_seg = 256;
  1156    ASSERT_THAT(
  1157        setsockopt(s.get(), SOL_TCP, TCP_MAXSEG, &max_seg, sizeof(max_seg)),
  1158        SyscallSucceeds());
  1159  
  1160    ASSERT_THAT(bind(s.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds());
  1161    // Get the bound port.
  1162    ASSERT_THAT(getsockname(s.get(), AsSockAddr(&addr), &addrlen),
  1163                SyscallSucceeds());
  1164    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1165                SyscallSucceeds());
  1166  
  1167    // Ensure the write buffer is large enough not to block on a single write.
  1168    size_t write_size = 512 << 10;  // 512 KiB.
  1169    EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_SNDBUF, &write_size,
  1170                           sizeof(write_size)),
  1171                SyscallSucceedsWithValue(0));
  1172  
  1173    std::vector<char> writebuf(write_size);
  1174  
  1175    // Try to send the whole thing.
  1176    int n;
  1177    ASSERT_THAT(n = SendFd(s.get(), writebuf.data(), writebuf.size(), 0),
  1178                SyscallSucceeds());
  1179  
  1180    // We should have written the whole thing.
  1181    EXPECT_EQ(n, writebuf.size());
  1182    EXPECT_THAT(shutdown(s.get(), SHUT_WR), SyscallSucceedsWithValue(0));
  1183  }
  1184  
  1185  TEST_P(SimpleTcpSocketTest, SelfConnectSendShutdownWrite) {
  1186    // Initialize address to the loopback one.
  1187    sockaddr_storage addr =
  1188        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
  1189    socklen_t addrlen = sizeof(addr);
  1190  
  1191    const FileDescriptor s =
  1192        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1193  
  1194    ASSERT_THAT(bind(s.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds());
  1195    // Get the bound port.
  1196    ASSERT_THAT(getsockname(s.get(), AsSockAddr(&addr), &addrlen),
  1197                SyscallSucceeds());
  1198    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1199                SyscallSucceeds());
  1200  
  1201    // Write enough data to fill send and receive buffers.
  1202    size_t write_size = 24 << 20;  // 24 MiB.
  1203    std::vector<char> writebuf(write_size);
  1204  
  1205    ScopedThread t([&s]() {
  1206      absl::SleepFor(absl::Milliseconds(250));
  1207      ASSERT_THAT(shutdown(s.get(), SHUT_WR), SyscallSucceeds());
  1208    });
  1209  
  1210    // Try to send the whole thing.
  1211    int n;
  1212    ASSERT_THAT(n = SendFd(s.get(), writebuf.data(), writebuf.size(), 0),
  1213                SyscallFailsWithErrno(EPIPE));
  1214  }
  1215  
  1216  TEST_P(SimpleTcpSocketTest, SelfConnectRecvShutdownRead) {
  1217    // Initialize address to the loopback one.
  1218    sockaddr_storage addr =
  1219        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
  1220    socklen_t addrlen = sizeof(addr);
  1221  
  1222    const FileDescriptor s =
  1223        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1224  
  1225    ASSERT_THAT(bind(s.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds());
  1226    // Get the bound port.
  1227    ASSERT_THAT(getsockname(s.get(), AsSockAddr(&addr), &addrlen),
  1228                SyscallSucceeds());
  1229    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1230                SyscallSucceeds());
  1231  
  1232    ScopedThread t([&s]() {
  1233      absl::SleepFor(absl::Milliseconds(250));
  1234      ASSERT_THAT(shutdown(s.get(), SHUT_RD), SyscallSucceeds());
  1235    });
  1236  
  1237    char buf[1];
  1238    EXPECT_THAT(recv(s.get(), buf, 0, 0), SyscallSucceedsWithValue(0));
  1239  }
  1240  
  1241  void NonBlockingConnect(int family, int16_t pollMask) {
  1242    const FileDescriptor listener =
  1243        ASSERT_NO_ERRNO_AND_VALUE(Socket(family, SOCK_STREAM, IPPROTO_TCP));
  1244  
  1245    // Initialize address to the loopback one.
  1246    sockaddr_storage addr = ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(family));
  1247    socklen_t addrlen = sizeof(addr);
  1248  
  1249    // Bind to some port then start listening.
  1250    ASSERT_THAT(bind(listener.get(), AsSockAddr(&addr), addrlen),
  1251                SyscallSucceeds());
  1252  
  1253    ASSERT_THAT(listen(listener.get(), SOMAXCONN), SyscallSucceeds());
  1254  
  1255    FileDescriptor s =
  1256        ASSERT_NO_ERRNO_AND_VALUE(Socket(family, SOCK_STREAM, IPPROTO_TCP));
  1257  
  1258    // Set the FD to O_NONBLOCK.
  1259    int opts;
  1260    ASSERT_THAT(opts = fcntl(s.get(), F_GETFL), SyscallSucceeds());
  1261    opts |= O_NONBLOCK;
  1262    ASSERT_THAT(fcntl(s.get(), F_SETFL, opts), SyscallSucceeds());
  1263  
  1264    ASSERT_THAT(getsockname(listener.get(), AsSockAddr(&addr), &addrlen),
  1265                SyscallSucceeds());
  1266  
  1267    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1268                SyscallFailsWithErrno(EINPROGRESS));
  1269  
  1270    int t;
  1271    ASSERT_THAT(t = RetryEINTR(accept)(listener.get(), nullptr, nullptr),
  1272                SyscallSucceeds());
  1273  
  1274    struct pollfd poll_fd = {s.get(), pollMask, 0};
  1275    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000),
  1276                SyscallSucceedsWithValue(1));
  1277  
  1278    int err;
  1279    socklen_t optlen = sizeof(err);
  1280    ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ERROR, &err, &optlen),
  1281                SyscallSucceeds());
  1282  
  1283    EXPECT_EQ(err, 0);
  1284  
  1285    EXPECT_THAT(close(t), SyscallSucceeds());
  1286  }
  1287  
  1288  TEST_P(SimpleTcpSocketTest, NonBlockingConnect_PollOut) {
  1289    NonBlockingConnect(GetParam(), POLLOUT);
  1290  }
  1291  
  1292  TEST_P(SimpleTcpSocketTest, NonBlockingConnect_PollWrNorm) {
  1293    NonBlockingConnect(GetParam(), POLLWRNORM);
  1294  }
  1295  
  1296  TEST_P(SimpleTcpSocketTest, NonBlockingConnect_PollWrNorm_PollOut) {
  1297    NonBlockingConnect(GetParam(), POLLWRNORM | POLLOUT);
  1298  }
  1299  
  1300  TEST_P(SimpleTcpSocketTest, NonBlockingConnectRemoteClose) {
  1301    const FileDescriptor listener =
  1302        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1303  
  1304    // Initialize address to the loopback one.
  1305    sockaddr_storage addr =
  1306        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
  1307    socklen_t addrlen = sizeof(addr);
  1308  
  1309    // Bind to some port then start listening.
  1310    ASSERT_THAT(bind(listener.get(), AsSockAddr(&addr), addrlen),
  1311                SyscallSucceeds());
  1312  
  1313    ASSERT_THAT(listen(listener.get(), SOMAXCONN), SyscallSucceeds());
  1314  
  1315    FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
  1316        Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
  1317  
  1318    ASSERT_THAT(getsockname(listener.get(), AsSockAddr(&addr), &addrlen),
  1319                SyscallSucceeds());
  1320  
  1321    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1322                SyscallFailsWithErrno(EINPROGRESS));
  1323  
  1324    int t;
  1325    ASSERT_THAT(t = RetryEINTR(accept)(listener.get(), nullptr, nullptr),
  1326                SyscallSucceeds());
  1327  
  1328    EXPECT_THAT(close(t), SyscallSucceeds());
  1329  
  1330    // Now polling on the FD with a timeout should return 0 corresponding to no
  1331    // FDs ready.
  1332    struct pollfd poll_fd = {s.get(), POLLOUT, 0};
  1333    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10000),
  1334                SyscallSucceedsWithValue(1));
  1335  
  1336    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1337                SyscallSucceeds());
  1338  
  1339    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1340                SyscallFailsWithErrno(EISCONN));
  1341  }
  1342  
  1343  // Test that we get an ECONNREFUSED with a blocking socket when no one is
  1344  // listening on the other end.
  1345  TEST_P(SimpleTcpSocketTest, BlockingConnectRefused) {
  1346    FileDescriptor s =
  1347        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1348  
  1349    // Initialize address to the loopback one.
  1350    sockaddr_storage addr =
  1351        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
  1352    socklen_t addrlen = sizeof(addr);
  1353  
  1354    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1355                SyscallFailsWithErrno(ECONNREFUSED));
  1356  
  1357    // Avoiding triggering save in destructor of s.
  1358    EXPECT_THAT(close(s.release()), SyscallSucceeds());
  1359  }
  1360  
  1361  // Test that connecting to a non-listening port and thus receiving a RST is
  1362  // handled appropriately by the socket - the port that the socket was bound to
  1363  // is released and the expected error is returned.
  1364  TEST_P(SimpleTcpSocketTest, CleanupOnConnectionRefused) {
  1365    // Create a socket that is known to not be listening. As is it bound but not
  1366    // listening, when another socket connects to the port, it will refuse..
  1367    FileDescriptor bound_s =
  1368        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1369  
  1370    sockaddr_storage bound_addr =
  1371        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
  1372    socklen_t bound_addrlen = sizeof(bound_addr);
  1373  
  1374    ASSERT_THAT(bind(bound_s.get(), AsSockAddr(&bound_addr), bound_addrlen),
  1375                SyscallSucceeds());
  1376  
  1377    // Get the addresses the socket is bound to because the port is chosen by the
  1378    // stack.
  1379    ASSERT_THAT(
  1380        getsockname(bound_s.get(), AsSockAddr(&bound_addr), &bound_addrlen),
  1381        SyscallSucceeds());
  1382  
  1383    // Create, initialize, and bind the socket that is used to test connecting to
  1384    // the non-listening port.
  1385    FileDescriptor client_s =
  1386        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1387    // Initialize client address to the loopback one.
  1388    sockaddr_storage client_addr =
  1389        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
  1390    socklen_t client_addrlen = sizeof(client_addr);
  1391  
  1392    ASSERT_THAT(bind(client_s.get(), AsSockAddr(&client_addr), client_addrlen),
  1393                SyscallSucceeds());
  1394  
  1395    ASSERT_THAT(
  1396        getsockname(client_s.get(), AsSockAddr(&client_addr), &client_addrlen),
  1397        SyscallSucceeds());
  1398  
  1399    // Now the test: connect to the bound but not listening socket with the
  1400    // client socket. The bound socket should return a RST and cause the client
  1401    // socket to return an error and clean itself up immediately.
  1402    // The error being ECONNREFUSED diverges with RFC 793, page 37, but does what
  1403    // Linux does.
  1404    ASSERT_THAT(connect(client_s.get(),
  1405                        reinterpret_cast<const struct sockaddr*>(&bound_addr),
  1406                        bound_addrlen),
  1407                SyscallFailsWithErrno(ECONNREFUSED));
  1408  
  1409    FileDescriptor new_s =
  1410        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1411  
  1412    // Test binding to the address from the client socket. This should be okay
  1413    // if it was dropped correctly.
  1414    ASSERT_THAT(bind(new_s.get(), AsSockAddr(&client_addr), client_addrlen),
  1415                SyscallSucceeds());
  1416  
  1417    // Attempt #2, with the new socket and reused addr our connect should fail in
  1418    // the same way as before, not with an EADDRINUSE.
  1419    //
  1420    // TODO(gvisor.dev/issue/3828): 2nd connect on a socket which failed connect
  1421    //   first time should succeed.
  1422    // gVisor never issues the second connect and returns ECONNABORTED instead.
  1423    // Linux actually sends a SYN again and gets a RST and correctly returns
  1424    // ECONNREFUSED.
  1425    if (IsRunningOnGvisor()) {
  1426      ASSERT_THAT(connect(client_s.get(),
  1427                          reinterpret_cast<const struct sockaddr*>(&bound_addr),
  1428                          bound_addrlen),
  1429                  SyscallFailsWithErrno(ECONNABORTED));
  1430      return;
  1431    }
  1432    ASSERT_THAT(connect(client_s.get(),
  1433                        reinterpret_cast<const struct sockaddr*>(&bound_addr),
  1434                        bound_addrlen),
  1435                SyscallFailsWithErrno(ECONNREFUSED));
  1436  }
  1437  
  1438  // Test that we get an ECONNREFUSED with a nonblocking socket.
  1439  TEST_P(SimpleTcpSocketTest, NonBlockingConnectRefused) {
  1440    FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
  1441        Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
  1442  
  1443    // Initialize address to the loopback one.
  1444    sockaddr_storage addr =
  1445        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
  1446    socklen_t addrlen = sizeof(addr);
  1447  
  1448    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1449                SyscallFailsWithErrno(EINPROGRESS));
  1450  
  1451    // We don't need to specify any events to get POLLHUP or POLLERR as these
  1452    // are added before the poll.
  1453    struct pollfd poll_fd = {s.get(), /*events=*/0, 0};
  1454    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 1000), SyscallSucceedsWithValue(1));
  1455  
  1456    // The ECONNREFUSED should cause us to be woken up with POLLHUP.
  1457    EXPECT_NE(poll_fd.revents & (POLLHUP | POLLERR), 0);
  1458  
  1459    // Avoiding triggering save in destructor of s.
  1460    EXPECT_THAT(close(s.release()), SyscallSucceeds());
  1461  }
  1462  
  1463  // Test that setting a supported congestion control algorithm succeeds for an
  1464  // unconnected TCP socket
  1465  TEST_P(SimpleTcpSocketTest, SetCongestionControlSucceedsForSupported) {
  1466    // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
  1467    const int kTcpCaNameMax = 16;
  1468  
  1469    FileDescriptor s =
  1470        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1471    {
  1472      const char kSetCC[kTcpCaNameMax] = "reno";
  1473      ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC,
  1474                             strlen(kSetCC)),
  1475                  SyscallSucceedsWithValue(0));
  1476  
  1477      char got_cc[kTcpCaNameMax];
  1478      memset(got_cc, '1', sizeof(got_cc));
  1479      socklen_t optlen = sizeof(got_cc);
  1480      ASSERT_THAT(
  1481          getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
  1482          SyscallSucceedsWithValue(0));
  1483      // We ignore optlen here as the linux kernel sets optlen to the lower of the
  1484      // size of the buffer passed in or kTcpCaNameMax and not the length of the
  1485      // congestion control algorithm's actual name.
  1486      EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kTcpCaNameMax)));
  1487    }
  1488    {
  1489      const char kSetCC[kTcpCaNameMax] = "cubic";
  1490      ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC,
  1491                             strlen(kSetCC)),
  1492                  SyscallSucceedsWithValue(0));
  1493  
  1494      char got_cc[kTcpCaNameMax];
  1495      memset(got_cc, '1', sizeof(got_cc));
  1496      socklen_t optlen = sizeof(got_cc);
  1497      ASSERT_THAT(
  1498          getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
  1499          SyscallSucceedsWithValue(0));
  1500      // We ignore optlen here as the linux kernel sets optlen to the lower of the
  1501      // size of the buffer passed in or kTcpCaNameMax and not the length of the
  1502      // congestion control algorithm's actual name.
  1503      EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kTcpCaNameMax)));
  1504    }
  1505  }
  1506  
  1507  // This test verifies that a getsockopt(...TCP_CONGESTION) behaviour is
  1508  // consistent between linux and gvisor when the passed in buffer is smaller than
  1509  // kTcpCaNameMax.
  1510  TEST_P(SimpleTcpSocketTest, SetGetTCPCongestionShortReadBuffer) {
  1511    FileDescriptor s =
  1512        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1513    {
  1514      // Verify that getsockopt/setsockopt work with buffers smaller than
  1515      // kTcpCaNameMax.
  1516      const char kSetCC[] = "cubic";
  1517      ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC,
  1518                             strlen(kSetCC)),
  1519                  SyscallSucceedsWithValue(0));
  1520  
  1521      char got_cc[sizeof(kSetCC)];
  1522      socklen_t optlen = sizeof(got_cc);
  1523      ASSERT_THAT(
  1524          getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
  1525          SyscallSucceedsWithValue(0));
  1526      EXPECT_EQ(sizeof(got_cc), optlen);
  1527      EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(got_cc)));
  1528    }
  1529  }
  1530  
  1531  // This test verifies that a getsockopt(...TCP_CONGESTION) behaviour is
  1532  // consistent between linux and gvisor when the passed in buffer is larger than
  1533  // kTcpCaNameMax.
  1534  TEST_P(SimpleTcpSocketTest, SetGetTCPCongestionLargeReadBuffer) {
  1535    // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
  1536    const int kTcpCaNameMax = 16;
  1537  
  1538    FileDescriptor s =
  1539        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1540    {
  1541      // Verify that getsockopt works with buffers larger than
  1542      // kTcpCaNameMax.
  1543      const char kSetCC[] = "cubic";
  1544      ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC,
  1545                             strlen(kSetCC)),
  1546                  SyscallSucceedsWithValue(0));
  1547  
  1548      char got_cc[kTcpCaNameMax + 5];
  1549      socklen_t optlen = sizeof(got_cc);
  1550      ASSERT_THAT(
  1551          getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
  1552          SyscallSucceedsWithValue(0));
  1553      // Linux copies the minimum of kTcpCaNameMax or the length of the passed in
  1554      // buffer and sets optlen to the number of bytes actually copied
  1555      // irrespective of the actual length of the congestion control name.
  1556      EXPECT_EQ(kTcpCaNameMax, optlen);
  1557      EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kSetCC)));
  1558    }
  1559  }
  1560  
  1561  // Test that setting an unsupported congestion control algorithm fails for an
  1562  // unconnected TCP socket.
  1563  TEST_P(SimpleTcpSocketTest, SetCongestionControlFailsForUnsupported) {
  1564    // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
  1565    const int kTcpCaNameMax = 16;
  1566  
  1567    FileDescriptor s =
  1568        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1569    char old_cc[kTcpCaNameMax];
  1570    socklen_t optlen = sizeof(old_cc);
  1571    ASSERT_THAT(
  1572        getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &old_cc, &optlen),
  1573        SyscallSucceedsWithValue(0));
  1574  
  1575    const char kSetCC[] = "invalid_ca_kSetCC";
  1576    ASSERT_THAT(
  1577        setsockopt(s.get(), SOL_TCP, TCP_CONGESTION, &kSetCC, strlen(kSetCC)),
  1578        SyscallFailsWithErrno(ENOENT));
  1579  
  1580    char got_cc[kTcpCaNameMax];
  1581    ASSERT_THAT(
  1582        getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
  1583        SyscallSucceedsWithValue(0));
  1584    // We ignore optlen here as the linux kernel sets optlen to the lower of the
  1585    // size of the buffer passed in or kTcpCaNameMax and not the length of the
  1586    // congestion control algorithm's actual name.
  1587    EXPECT_EQ(0, memcmp(got_cc, old_cc, sizeof(kTcpCaNameMax)));
  1588  }
  1589  
  1590  TEST_P(SimpleTcpSocketTest, MaxSegDefault) {
  1591    FileDescriptor s =
  1592        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1593  
  1594    constexpr int kDefaultMSS = 536;
  1595    int tcp_max_seg;
  1596    socklen_t optlen = sizeof(tcp_max_seg);
  1597    ASSERT_THAT(
  1598        getsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg, &optlen),
  1599        SyscallSucceedsWithValue(0));
  1600  
  1601    EXPECT_EQ(kDefaultMSS, tcp_max_seg);
  1602    EXPECT_EQ(sizeof(tcp_max_seg), optlen);
  1603  }
  1604  
  1605  TEST_P(SimpleTcpSocketTest, SetMaxSeg) {
  1606    FileDescriptor s =
  1607        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1608  
  1609    constexpr int kDefaultMSS = 536;
  1610    constexpr int kTCPMaxSeg = 1024;
  1611    ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &kTCPMaxSeg,
  1612                           sizeof(kTCPMaxSeg)),
  1613                SyscallSucceedsWithValue(0));
  1614  
  1615    // Linux actually never returns the user_mss value. It will always return the
  1616    // default MSS value defined above for an unconnected socket and always return
  1617    // the actual current MSS for a connected one.
  1618    int optval;
  1619    socklen_t optlen = sizeof(optval);
  1620    ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &optval, &optlen),
  1621                SyscallSucceedsWithValue(0));
  1622  
  1623    EXPECT_EQ(kDefaultMSS, optval);
  1624    EXPECT_EQ(sizeof(optval), optlen);
  1625  }
  1626  
  1627  TEST_P(SimpleTcpSocketTest, SetMaxSegFailsForInvalidMSSValues) {
  1628    FileDescriptor s =
  1629        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1630  
  1631    {
  1632      constexpr int tcp_max_seg = 10;
  1633      ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg,
  1634                             sizeof(tcp_max_seg)),
  1635                  SyscallFailsWithErrno(EINVAL));
  1636    }
  1637    {
  1638      constexpr int tcp_max_seg = 75000;
  1639      ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg,
  1640                             sizeof(tcp_max_seg)),
  1641                  SyscallFailsWithErrno(EINVAL));
  1642    }
  1643  }
  1644  
  1645  TEST_P(SimpleTcpSocketTest, SetTCPUserTimeout) {
  1646    FileDescriptor s =
  1647        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1648  
  1649    {
  1650      constexpr int kTCPUserTimeout = -1;
  1651      EXPECT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT,
  1652                             &kTCPUserTimeout, sizeof(kTCPUserTimeout)),
  1653                  SyscallFailsWithErrno(EINVAL));
  1654    }
  1655  
  1656    // kTCPUserTimeout is in milliseconds.
  1657    constexpr int kTCPUserTimeout = 100;
  1658    ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT,
  1659                           &kTCPUserTimeout, sizeof(kTCPUserTimeout)),
  1660                SyscallSucceedsWithValue(0));
  1661    int get = -1;
  1662    socklen_t get_len = sizeof(get);
  1663    ASSERT_THAT(
  1664        getsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, &get, &get_len),
  1665        SyscallSucceedsWithValue(0));
  1666    EXPECT_EQ(get_len, sizeof(get));
  1667    EXPECT_EQ(get, kTCPUserTimeout);
  1668  }
  1669  
  1670  TEST_P(SimpleTcpSocketTest, SetTCPDeferAcceptNeg) {
  1671    FileDescriptor s =
  1672        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1673  
  1674    // -ve TCP_DEFER_ACCEPT is same as setting it to zero.
  1675    constexpr int kNeg = -1;
  1676    EXPECT_THAT(
  1677        setsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &kNeg, sizeof(kNeg)),
  1678        SyscallSucceeds());
  1679    int get = -1;
  1680    socklen_t get_len = sizeof(get);
  1681    ASSERT_THAT(
  1682        getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len),
  1683        SyscallSucceedsWithValue(0));
  1684    EXPECT_EQ(get_len, sizeof(get));
  1685    EXPECT_EQ(get, 0);
  1686  }
  1687  
  1688  TEST_P(SimpleTcpSocketTest, GetTCPDeferAcceptDefault) {
  1689    FileDescriptor s =
  1690        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1691  
  1692    int get = -1;
  1693    socklen_t get_len = sizeof(get);
  1694    ASSERT_THAT(
  1695        getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len),
  1696        SyscallSucceedsWithValue(0));
  1697    EXPECT_EQ(get_len, sizeof(get));
  1698    EXPECT_EQ(get, 0);
  1699  }
  1700  
  1701  TEST_P(SimpleTcpSocketTest, SetTCPDeferAcceptGreaterThanZero) {
  1702    FileDescriptor s =
  1703        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1704    // kTCPDeferAccept is in seconds.
  1705    // NOTE: linux translates seconds to # of retries and back from
  1706    //   #of retries to seconds. Which means only certain values
  1707    //   translate back exactly. That's why we use 3 here, a value of
  1708    //   5 will result in us getting back 7 instead of 5 in the
  1709    //   getsockopt.
  1710    constexpr int kTCPDeferAccept = 3;
  1711    ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT,
  1712                           &kTCPDeferAccept, sizeof(kTCPDeferAccept)),
  1713                SyscallSucceeds());
  1714    int get = -1;
  1715    socklen_t get_len = sizeof(get);
  1716    ASSERT_THAT(
  1717        getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len),
  1718        SyscallSucceeds());
  1719    EXPECT_EQ(get_len, sizeof(get));
  1720    EXPECT_EQ(get, kTCPDeferAccept);
  1721  }
  1722  
  1723  TEST_P(SimpleTcpSocketTest, RecvOnClosedSocket) {
  1724    auto s =
  1725        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1726    char buf[1];
  1727    EXPECT_THAT(recv(s.get(), buf, 0, 0), SyscallFailsWithErrno(ENOTCONN));
  1728    EXPECT_THAT(recv(s.get(), buf, sizeof(buf), 0),
  1729                SyscallFailsWithErrno(ENOTCONN));
  1730  }
  1731  
  1732  TEST_P(SimpleTcpSocketTest, TCPConnectSoRcvBufRace) {
  1733    auto s = ASSERT_NO_ERRNO_AND_VALUE(
  1734        Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
  1735    sockaddr_storage addr =
  1736        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
  1737    socklen_t addrlen = sizeof(addr);
  1738  
  1739    RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen);
  1740    int buf_sz = 1 << 18;
  1741    EXPECT_THAT(
  1742        setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &buf_sz, sizeof(buf_sz)),
  1743        SyscallSucceedsWithValue(0));
  1744  }
  1745  
  1746  TEST_P(SimpleTcpSocketTest, SetTCPSynCntLessThanOne) {
  1747    FileDescriptor s =
  1748        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1749  
  1750    int get = -1;
  1751    socklen_t get_len = sizeof(get);
  1752    ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
  1753                SyscallSucceedsWithValue(0));
  1754    EXPECT_EQ(get_len, sizeof(get));
  1755    int default_syn_cnt = get;
  1756  
  1757    {
  1758      // TCP_SYNCNT less than 1 should be rejected with an EINVAL.
  1759      constexpr int kZero = 0;
  1760      EXPECT_THAT(
  1761          setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kZero, sizeof(kZero)),
  1762          SyscallFailsWithErrno(EINVAL));
  1763  
  1764      // TCP_SYNCNT less than 1 should be rejected with an EINVAL.
  1765      constexpr int kNeg = -1;
  1766      EXPECT_THAT(
  1767          setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kNeg, sizeof(kNeg)),
  1768          SyscallFailsWithErrno(EINVAL));
  1769  
  1770      int get = -1;
  1771      socklen_t get_len = sizeof(get);
  1772  
  1773      ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
  1774                  SyscallSucceedsWithValue(0));
  1775      EXPECT_EQ(get_len, sizeof(get));
  1776      EXPECT_EQ(default_syn_cnt, get);
  1777    }
  1778  }
  1779  
  1780  TEST_P(SimpleTcpSocketTest, GetTCPSynCntDefault) {
  1781    FileDescriptor s =
  1782        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1783  
  1784    int get = -1;
  1785    socklen_t get_len = sizeof(get);
  1786    constexpr int kDefaultSynCnt = 6;
  1787  
  1788    ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
  1789                SyscallSucceedsWithValue(0));
  1790    EXPECT_EQ(get_len, sizeof(get));
  1791    EXPECT_EQ(get, kDefaultSynCnt);
  1792  }
  1793  
  1794  TEST_P(SimpleTcpSocketTest, SetTCPSynCntGreaterThanOne) {
  1795    FileDescriptor s =
  1796        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1797    constexpr int kTCPSynCnt = 20;
  1798    ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kTCPSynCnt,
  1799                           sizeof(kTCPSynCnt)),
  1800                SyscallSucceeds());
  1801  
  1802    int get = -1;
  1803    socklen_t get_len = sizeof(get);
  1804    ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
  1805                SyscallSucceeds());
  1806    EXPECT_EQ(get_len, sizeof(get));
  1807    EXPECT_EQ(get, kTCPSynCnt);
  1808  }
  1809  
  1810  TEST_P(SimpleTcpSocketTest, SetTCPSynCntAboveMax) {
  1811    FileDescriptor s =
  1812        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1813    int get = -1;
  1814    socklen_t get_len = sizeof(get);
  1815    ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
  1816                SyscallSucceedsWithValue(0));
  1817    EXPECT_EQ(get_len, sizeof(get));
  1818    int default_syn_cnt = get;
  1819    {
  1820      constexpr int kTCPSynCnt = 256;
  1821      ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kTCPSynCnt,
  1822                             sizeof(kTCPSynCnt)),
  1823                  SyscallFailsWithErrno(EINVAL));
  1824  
  1825      int get = -1;
  1826      socklen_t get_len = sizeof(get);
  1827      ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
  1828                  SyscallSucceeds());
  1829      EXPECT_EQ(get_len, sizeof(get));
  1830      EXPECT_EQ(get, default_syn_cnt);
  1831    }
  1832  }
  1833  
  1834  TEST_P(SimpleTcpSocketTest, SetTCPWindowClampBelowMinRcvBuf) {
  1835    FileDescriptor s =
  1836        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1837  
  1838    // Discover minimum receive buf by setting a really low value
  1839    // for the receive buffer.
  1840    constexpr int kZero = 0;
  1841    EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &kZero, sizeof(kZero)),
  1842                SyscallSucceeds());
  1843  
  1844    // Now retrieve the minimum value for SO_RCVBUF as the set above should
  1845    // have caused SO_RCVBUF for the socket to be set to the minimum.
  1846    int get = -1;
  1847    socklen_t get_len = sizeof(get);
  1848    ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &get, &get_len),
  1849                SyscallSucceedsWithValue(0));
  1850    EXPECT_EQ(get_len, sizeof(get));
  1851    int min_so_rcvbuf = get;
  1852  
  1853    {
  1854      // TCP_WINDOW_CLAMP less than min_so_rcvbuf/2 should be set to
  1855      // min_so_rcvbuf/2.
  1856      int below_half_min_rcvbuf = min_so_rcvbuf / 2 - 1;
  1857      EXPECT_THAT(
  1858          setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
  1859                     &below_half_min_rcvbuf, sizeof(below_half_min_rcvbuf)),
  1860          SyscallSucceeds());
  1861  
  1862      int get = -1;
  1863      socklen_t get_len = sizeof(get);
  1864  
  1865      ASSERT_THAT(
  1866          getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len),
  1867          SyscallSucceedsWithValue(0));
  1868      EXPECT_EQ(get_len, sizeof(get));
  1869      EXPECT_EQ(min_so_rcvbuf / 2, get);
  1870    }
  1871  }
  1872  
  1873  TEST_P(SimpleTcpSocketTest, SetTCPWindowClampZeroClosedSocket) {
  1874    FileDescriptor s =
  1875        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1876    constexpr int kZero = 0;
  1877    ASSERT_THAT(
  1878        setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &kZero, sizeof(kZero)),
  1879        SyscallSucceeds());
  1880  
  1881    int get = -1;
  1882    socklen_t get_len = sizeof(get);
  1883    ASSERT_THAT(
  1884        getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len),
  1885        SyscallSucceeds());
  1886    EXPECT_EQ(get_len, sizeof(get));
  1887    EXPECT_EQ(get, kZero);
  1888  }
  1889  
  1890  TEST_P(SimpleTcpSocketTest, SetTCPWindowClampAboveHalfMinRcvBuf) {
  1891    FileDescriptor s =
  1892        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1893  
  1894    // Discover minimum receive buf by setting a really low value
  1895    // for the receive buffer.
  1896    constexpr int kZero = 0;
  1897    EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &kZero, sizeof(kZero)),
  1898                SyscallSucceeds());
  1899  
  1900    // Now retrieve the minimum value for SO_RCVBUF as the set above should
  1901    // have caused SO_RCVBUF for the socket to be set to the minimum.
  1902    int get = -1;
  1903    socklen_t get_len = sizeof(get);
  1904    ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &get, &get_len),
  1905                SyscallSucceedsWithValue(0));
  1906    EXPECT_EQ(get_len, sizeof(get));
  1907    int min_so_rcvbuf = get;
  1908  
  1909    {
  1910      int above_half_min_rcv_buf = min_so_rcvbuf / 2 + 1;
  1911      EXPECT_THAT(
  1912          setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
  1913                     &above_half_min_rcv_buf, sizeof(above_half_min_rcv_buf)),
  1914          SyscallSucceeds());
  1915  
  1916      int get = -1;
  1917      socklen_t get_len = sizeof(get);
  1918  
  1919      ASSERT_THAT(
  1920          getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len),
  1921          SyscallSucceedsWithValue(0));
  1922      EXPECT_EQ(get_len, sizeof(get));
  1923      EXPECT_EQ(above_half_min_rcv_buf, get);
  1924    }
  1925  }
  1926  
  1927  #ifdef __linux__
  1928  
  1929  // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER.
  1930  // gVisor currently silently ignores attaching a filter.
  1931  TEST_P(SimpleTcpSocketTest, SetSocketAttachDetachFilter) {
  1932    FileDescriptor s =
  1933        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1934    // Program generated using sudo tcpdump -i lo tcp and port 1234 -dd
  1935    struct sock_filter code[] = {
  1936        {0x28, 0, 0, 0x0000000c},  {0x15, 0, 6, 0x000086dd},
  1937        {0x30, 0, 0, 0x00000014},  {0x15, 0, 15, 0x00000006},
  1938        {0x28, 0, 0, 0x00000036},  {0x15, 12, 0, 0x000004d2},
  1939        {0x28, 0, 0, 0x00000038},  {0x15, 10, 11, 0x000004d2},
  1940        {0x15, 0, 10, 0x00000800}, {0x30, 0, 0, 0x00000017},
  1941        {0x15, 0, 8, 0x00000006},  {0x28, 0, 0, 0x00000014},
  1942        {0x45, 6, 0, 0x00001fff},  {0xb1, 0, 0, 0x0000000e},
  1943        {0x48, 0, 0, 0x0000000e},  {0x15, 2, 0, 0x000004d2},
  1944        {0x48, 0, 0, 0x00000010},  {0x15, 0, 1, 0x000004d2},
  1945        {0x6, 0, 0, 0x00040000},   {0x6, 0, 0, 0x00000000},
  1946    };
  1947    struct sock_fprog bpf = {
  1948        .len = ABSL_ARRAYSIZE(code),
  1949        .filter = code,
  1950    };
  1951    ASSERT_THAT(
  1952        setsockopt(s.get(), SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)),
  1953        SyscallSucceeds());
  1954  
  1955    constexpr int val = 0;
  1956    ASSERT_THAT(
  1957        setsockopt(s.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)),
  1958        SyscallSucceeds());
  1959  }
  1960  
  1961  #endif  // __linux__
  1962  
  1963  TEST_P(SimpleTcpSocketTest, SetSocketDetachFilterNoInstalledFilter) {
  1964    // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER.
  1965    SKIP_IF(IsRunningOnGvisor());
  1966    FileDescriptor s =
  1967        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1968    constexpr int val = 0;
  1969    ASSERT_THAT(
  1970        setsockopt(s.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)),
  1971        SyscallFailsWithErrno(ENOENT));
  1972  }
  1973  
  1974  TEST_P(SimpleTcpSocketTest, GetSocketDetachFilter) {
  1975    FileDescriptor s =
  1976        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1977  
  1978    int val = 0;
  1979    socklen_t val_len = sizeof(val);
  1980    ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, &val_len),
  1981                SyscallFailsWithErrno(ENOPROTOOPT));
  1982  }
  1983  
  1984  TEST_P(SimpleTcpSocketTest, CloseNonConnectedLingerOption) {
  1985    FileDescriptor s =
  1986        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1987  
  1988    constexpr int kLingerTimeout = 10;  // Seconds.
  1989  
  1990    // Set the SO_LINGER option.
  1991    struct linger sl = {
  1992        .l_onoff = 1,
  1993        .l_linger = kLingerTimeout,
  1994    };
  1995    ASSERT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)),
  1996                SyscallSucceeds());
  1997  
  1998    struct pollfd poll_fd = {
  1999        .fd = s.get(),
  2000        .events = POLLHUP,
  2001    };
  2002    constexpr int kPollTimeoutMs = 0;
  2003    ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
  2004                SyscallSucceedsWithValue(1));
  2005  
  2006    auto const start_time = absl::Now();
  2007    EXPECT_THAT(close(s.release()), SyscallSucceeds());
  2008    auto const end_time = absl::Now();
  2009  
  2010    // Close() should not linger and return immediately.
  2011    ASSERT_LT((end_time - start_time), absl::Seconds(kLingerTimeout));
  2012  }
  2013  
  2014  // Tests that SO_ACCEPTCONN returns non zero value for listening sockets.
  2015  TEST_P(TcpSocketTest, GetSocketAcceptConnListener) {
  2016    int got = -1;
  2017    socklen_t length = sizeof(got);
  2018    ASSERT_THAT(getsockopt(listener_, SOL_SOCKET, SO_ACCEPTCONN, &got, &length),
  2019                SyscallSucceeds());
  2020    ASSERT_EQ(length, sizeof(got));
  2021    EXPECT_EQ(got, 1);
  2022  }
  2023  
  2024  // Tests that SO_ACCEPTCONN returns zero value for not listening sockets.
  2025  TEST_P(TcpSocketTest, GetSocketAcceptConnNonListener) {
  2026    int got = -1;
  2027    socklen_t length = sizeof(got);
  2028    ASSERT_THAT(getsockopt(first_fd, SOL_SOCKET, SO_ACCEPTCONN, &got, &length),
  2029                SyscallSucceeds());
  2030    ASSERT_EQ(length, sizeof(got));
  2031    EXPECT_EQ(got, 0);
  2032  
  2033    ASSERT_THAT(getsockopt(second_fd, SOL_SOCKET, SO_ACCEPTCONN, &got, &length),
  2034                SyscallSucceeds());
  2035    ASSERT_EQ(length, sizeof(got));
  2036    EXPECT_EQ(got, 0);
  2037  }
  2038  
  2039  TEST_P(SimpleTcpSocketTest, GetSocketAcceptConnWithShutdown) {
  2040    // TODO(b/171345701): Fix the TCP state for listening socket on shutdown.
  2041    SKIP_IF(IsRunningOnGvisor());
  2042  
  2043    FileDescriptor s =
  2044        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  2045  
  2046    // Initialize address to the loopback one.
  2047    sockaddr_storage addr =
  2048        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddr(GetParam()));
  2049    socklen_t addrlen = sizeof(addr);
  2050  
  2051    // Bind to some port then start listening.
  2052    ASSERT_THAT(bind(s.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds());
  2053  
  2054    ASSERT_THAT(listen(s.get(), SOMAXCONN), SyscallSucceeds());
  2055  
  2056    int got = -1;
  2057    socklen_t length = sizeof(got);
  2058    ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ACCEPTCONN, &got, &length),
  2059                SyscallSucceeds());
  2060    ASSERT_EQ(length, sizeof(got));
  2061    EXPECT_EQ(got, 1);
  2062  
  2063    EXPECT_THAT(shutdown(s.get(), SHUT_RD), SyscallSucceeds());
  2064    ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ACCEPTCONN, &got, &length),
  2065                SyscallSucceeds());
  2066    ASSERT_EQ(length, sizeof(got));
  2067    EXPECT_EQ(got, 0);
  2068  }
  2069  
  2070  // Tests that connecting to an unspecified address results in ECONNREFUSED.
  2071  TEST_P(SimpleTcpSocketTest, ConnectUnspecifiedAddress) {
  2072    sockaddr_storage addr;
  2073    socklen_t addrlen = sizeof(addr);
  2074    memset(&addr, 0, addrlen);
  2075    addr.ss_family = GetParam();
  2076    auto do_connect = [&addr, addrlen]() {
  2077      FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
  2078          Socket(addr.ss_family, SOCK_STREAM, IPPROTO_TCP));
  2079      ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  2080                  SyscallFailsWithErrno(ECONNREFUSED));
  2081    };
  2082    do_connect();
  2083    // Test the v4 mapped address as well.
  2084    if (GetParam() == AF_INET6) {
  2085      auto sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr);
  2086      sin6->sin6_addr.s6_addr[10] = sin6->sin6_addr.s6_addr[11] = 0xff;
  2087      do_connect();
  2088    }
  2089  }
  2090  
  2091  INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest,
  2092                           ::testing::Values(AF_INET, AF_INET6));
  2093  
  2094  }  // namespace
  2095  
  2096  }  // namespace testing
  2097  }  // namespace gvisor