gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/test/syscalls/linux/tcp_socket.cc (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include <fcntl.h>
    16  
    17  #ifdef __linux__
    18  #include <linux/filter.h>
    19  #include <sys/epoll.h>
    20  #endif  // __linux__
    21  #include <errno.h>
    22  #include <netinet/in.h>
    23  #include <netinet/tcp.h>
    24  #include <poll.h>
    25  #include <sys/ioctl.h>
    26  #include <sys/socket.h>
    27  #include <unistd.h>
    28  
    29  #include <limits>
    30  #include <vector>
    31  
    32  #include "gmock/gmock.h"
    33  #include "gtest/gtest.h"
    34  #include "absl/status/statusor.h"
    35  #include "absl/time/clock.h"
    36  #include "absl/time/time.h"
    37  #include "test/util/file_descriptor.h"
    38  #include "test/util/posix_error.h"
    39  #include "test/util/socket_util.h"
    40  #include "test/util/test_util.h"
    41  #include "test/util/thread_util.h"
    42  
    43  using ::testing::AnyOf;
    44  
    45  namespace gvisor {
    46  namespace testing {
    47  
    48  namespace {
    49  
    50  constexpr int kTimeoutMillis = 10000;
    51  
    52  PosixErrorOr<sockaddr_storage> InetLoopbackAddrZeroPort(int family) {
    53    struct sockaddr_storage addr;
    54    memset(&addr, 0, sizeof(addr));
    55    addr.ss_family = family;
    56    switch (family) {
    57      case AF_INET: {
    58        auto& addr_in = reinterpret_cast<struct sockaddr_in&>(addr);
    59        addr_in.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
    60        break;
    61      }
    62      case AF_INET6: {
    63        auto& addr_in6 = reinterpret_cast<struct sockaddr_in6&>(addr);
    64        addr_in6.sin6_addr = in6addr_loopback;
    65        break;
    66      }
    67      default:
    68        return PosixError(EINVAL,
    69                          absl::StrCat("unknown socket family: ", family));
    70    }
    71    return addr;
    72  }
    73  
    74  // Gets the port number from the address, assuming it is an IPv4 or IPv6 socket
    75  // address.
    76  absl::StatusOr<uint16_t> GetPort(const sockaddr_storage& addr) {
    77    switch (addr.ss_family) {
    78      case AF_INET:
    79        return reinterpret_cast<const struct sockaddr_in&>(addr).sin_port;
    80      case AF_INET6:
    81        return reinterpret_cast<const struct sockaddr_in6&>(addr).sin6_port;
    82      default:
    83        return absl::InvalidArgumentError("not an IPv4 or IPv6 address");
    84    }
    85  }
    86  
    87  // Allocates a file descriptor that is bound to a local port but not listening.
    88  // Sets `addr` and `addrlen` to the bound address.
    89  PosixErrorOr<FileDescriptor> ReserveLocalPort(int family,
    90                                                sockaddr_storage& addr,
    91                                                socklen_t& addrlen) {
    92    // Reserve a port by binding to it but not listening.
    93    ASSIGN_OR_RETURN_ERRNO(FileDescriptor reserving,
    94                           Socket(family, SOCK_STREAM, IPPROTO_TCP));
    95    if (int err = bind(reserving.get(), AsSockAddr(&addr), addrlen); err != 0) {
    96      return PosixError(err, "bind failed");
    97    }
    98    // Get the address with the reserved port because the port is chosen by the
    99    // stack.
   100    if (int err = getsockname(reserving.get(), AsSockAddr(&addr), &addrlen);
   101        err != 0) {
   102      return PosixError(err, "getsockname failed");
   103    }
   104    return reserving;
   105  }
   106  
   107  static void FillSocketBuffers(int sender, int receiver) {
   108    // Set the FD to O_NONBLOCK.
   109    int opts;
   110    int orig_opts;
   111    ASSERT_THAT(opts = fcntl(sender, F_GETFL), SyscallSucceeds());
   112    orig_opts = opts;
   113    opts |= O_NONBLOCK;
   114    ASSERT_THAT(fcntl(sender, F_SETFL, opts), SyscallSucceeds());
   115  
   116    // Set TCP_NODELAY, which will cause linux to fill the receive buffer from the
   117    // send buffer as quickly as possibly. This way we can fill up both buffers
   118    // faster.
   119    constexpr int tcp_nodelay_flag = 1;
   120    ASSERT_THAT(setsockopt(sender, IPPROTO_TCP, TCP_NODELAY, &tcp_nodelay_flag,
   121                           sizeof(tcp_nodelay_flag)),
   122                SyscallSucceeds());
   123  
   124    // Set a 256KB send/receive buffer.
   125    int buf_sz = 1 << 18;
   126    EXPECT_THAT(
   127        setsockopt(receiver, SOL_SOCKET, SO_RCVBUF, &buf_sz, sizeof(buf_sz)),
   128        SyscallSucceedsWithValue(0));
   129    EXPECT_THAT(
   130        setsockopt(sender, SOL_SOCKET, SO_SNDBUF, &buf_sz, sizeof(buf_sz)),
   131        SyscallSucceedsWithValue(0));
   132  
   133    // Create a large buffer that will be used for sending.
   134    std::vector<char> buf(buf_sz << 2);
   135  
   136    // Write until we receive an error.
   137    while (RetryEINTR(send)(sender, buf.data(), buf.size(), 0) != -1) {
   138      // Sleep to give linux a chance to move data from the send buffer to the
   139      // receive buffer.
   140      absl::SleepFor(absl::Milliseconds(100));  // 100ms.
   141    }
   142    // The last error should have been EWOULDBLOCK.
   143    ASSERT_EQ(errno, EWOULDBLOCK);
   144  
   145    // Restore the fcntl opts
   146    ASSERT_THAT(fcntl(sender, F_SETFL, orig_opts), SyscallSucceeds());
   147  }
   148  
   149  // Fixture for tests parameterized by the address family to use (AF_INET and
   150  // AF_INET6) when creating sockets.
   151  class TcpSocketTest : public ::testing::TestWithParam<int> {
   152   protected:
   153    // Creates three sockets that will be used by test cases -- a listener, one
   154    // that connects, and the accepted one.
   155    void SetUp() override;
   156  
   157    // Listening socket.
   158    FileDescriptor listener_;
   159  
   160    // Socket connected via connect().
   161    FileDescriptor connected_;
   162  
   163    // Socket connected via accept().
   164    FileDescriptor accepted_;
   165  
   166    // Initial size of the send buffer.
   167    int sendbuf_size_ = -1;
   168  };
   169  
   170  void TcpSocketTest::SetUp() {
   171    listener_ =
   172        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
   173  
   174    connected_ =
   175        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
   176  
   177    // Initialize address to the loopback one.
   178    sockaddr_storage addr =
   179        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
   180    socklen_t addrlen = sizeof(addr);
   181  
   182    // Bind to some port then start listening.
   183    ASSERT_THAT(bind(listener_.get(), AsSockAddr(&addr), addrlen),
   184                SyscallSucceeds());
   185  
   186    ASSERT_THAT(listen(listener_.get(), SOMAXCONN), SyscallSucceeds());
   187  
   188    // Get the address we're listening on, then connect to it. We need to do this
   189    // because we're allowing the stack to pick a port for us.
   190    ASSERT_THAT(getsockname(listener_.get(), AsSockAddr(&addr), &addrlen),
   191                SyscallSucceeds());
   192  
   193    ASSERT_THAT(RetryEINTR(connect)(connected_.get(), AsSockAddr(&addr), addrlen),
   194                SyscallSucceeds());
   195  
   196    // Get the initial send buffer size.
   197    socklen_t optlen = sizeof(sendbuf_size_);
   198    ASSERT_THAT(getsockopt(connected_.get(), SOL_SOCKET, SO_SNDBUF,
   199                           &sendbuf_size_, &optlen),
   200                SyscallSucceeds());
   201  
   202    // Accept the connection.
   203    accepted_ =
   204        ASSERT_NO_ERRNO_AND_VALUE(Accept(listener_.get(), nullptr, nullptr));
   205  }
   206  
   207  TEST_P(TcpSocketTest, ConnectedAcceptedPeerAndLocalAreReciprocals) {
   208    struct FdAndAddrs {
   209      int fd;
   210      sockaddr_storage peer;
   211      socklen_t peer_len = sizeof(peer);
   212      sockaddr_storage name;
   213      socklen_t name_len = sizeof(name);
   214    };
   215  
   216    FdAndAddrs connected{.fd = connected_.get()}, accepted{.fd = accepted_.get()};
   217  
   218    for (FdAndAddrs* fd_and_addrs : {&connected, &accepted}) {
   219      ASSERT_THAT(getpeername(fd_and_addrs->fd, AsSockAddr(&fd_and_addrs->peer),
   220                              &fd_and_addrs->peer_len),
   221                  SyscallSucceeds());
   222      ASSERT_NE(fd_and_addrs->peer_len, 0);
   223      ASSERT_THAT(getsockname(fd_and_addrs->fd, AsSockAddr(&fd_and_addrs->name),
   224                              &fd_and_addrs->name_len),
   225                  SyscallSucceeds());
   226      ASSERT_NE(fd_and_addrs->name_len, 0);
   227    }
   228  
   229    ASSERT_EQ(connected.peer_len, accepted.name_len);
   230    EXPECT_EQ(memcmp(&connected.peer, &accepted.name, connected.peer_len), 0);
   231  
   232    ASSERT_EQ(connected.name_len, accepted.peer_len);
   233    EXPECT_EQ(memcmp(&connected.name, &accepted.peer, connected.name_len), 0);
   234  }
   235  
   236  TEST_P(TcpSocketTest, ConnectOnEstablishedConnection) {
   237    sockaddr_storage addr;
   238    socklen_t addrlen = sizeof(addr);
   239    ASSERT_THAT(getpeername(connected_.get(), AsSockAddr(&addr), &addrlen),
   240                SyscallSucceeds());
   241  
   242    ASSERT_THAT(RetryEINTR(connect)(
   243                    connected_.get(),
   244                    reinterpret_cast<const struct sockaddr*>(&addr), addrlen),
   245                SyscallFailsWithErrno(EISCONN));
   246  
   247    ASSERT_THAT(RetryEINTR(connect)(
   248                    accepted_.get(),
   249                    reinterpret_cast<const struct sockaddr*>(&addr), addrlen),
   250                SyscallFailsWithErrno(EISCONN));
   251  }
   252  
   253  TEST_P(TcpSocketTest, ShutdownWriteInTimeWait) {
   254    EXPECT_THAT(shutdown(accepted_.get(), SHUT_WR), SyscallSucceeds());
   255    EXPECT_THAT(shutdown(connected_.get(), SHUT_RDWR), SyscallSucceeds());
   256    absl::SleepFor(absl::Seconds(1));  // Wait to enter TIME_WAIT.
   257    EXPECT_THAT(shutdown(accepted_.get(), SHUT_WR),
   258                SyscallFailsWithErrno(ENOTCONN));
   259  }
   260  
   261  TEST_P(TcpSocketTest, ShutdownWriteInFinWait1) {
   262    EXPECT_THAT(shutdown(accepted_.get(), SHUT_WR), SyscallSucceeds());
   263    EXPECT_THAT(shutdown(accepted_.get(), SHUT_WR), SyscallSucceeds());
   264    absl::SleepFor(absl::Seconds(1));  // Wait to enter FIN-WAIT2.
   265    EXPECT_THAT(shutdown(accepted_.get(), SHUT_WR), SyscallSucceeds());
   266  }
   267  
   268  TEST_P(TcpSocketTest, DataCoalesced) {
   269    char buf[10];
   270  
   271    // Write in two steps.
   272    ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, sizeof(buf) / 2),
   273                SyscallSucceedsWithValue(sizeof(buf) / 2));
   274    ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, sizeof(buf) / 2),
   275                SyscallSucceedsWithValue(sizeof(buf) / 2));
   276  
   277    // Allow stack to process both packets.
   278    absl::SleepFor(absl::Seconds(1));
   279  
   280    // Read in one shot.
   281    EXPECT_THAT(RetryEINTR(recv)(accepted_.get(), buf, sizeof(buf), 0),
   282                SyscallSucceedsWithValue(sizeof(buf)));
   283  }
   284  
   285  TEST_P(TcpSocketTest, SenderAddressIgnored) {
   286    char buf[3];
   287    ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, sizeof(buf)),
   288                SyscallSucceedsWithValue(sizeof(buf)));
   289  
   290    struct sockaddr_storage addr;
   291    socklen_t addrlen = sizeof(addr);
   292    memset(&addr, 0, sizeof(addr));
   293  
   294    ASSERT_THAT(RetryEINTR(recvfrom)(accepted_.get(), buf, sizeof(buf), 0,
   295                                     AsSockAddr(&addr), &addrlen),
   296                SyscallSucceedsWithValue(3));
   297  
   298    // Check that addr remains zeroed-out.
   299    const char* ptr = reinterpret_cast<char*>(&addr);
   300    for (size_t i = 0; i < sizeof(addr); i++) {
   301      EXPECT_EQ(ptr[i], 0);
   302    }
   303  }
   304  
   305  TEST_P(TcpSocketTest, SenderAddressIgnoredOnPeek) {
   306    char buf[3];
   307    ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, sizeof(buf)),
   308                SyscallSucceedsWithValue(sizeof(buf)));
   309  
   310    struct sockaddr_storage addr;
   311    socklen_t addrlen = sizeof(addr);
   312    memset(&addr, 0, sizeof(addr));
   313  
   314    ASSERT_THAT(RetryEINTR(recvfrom)(accepted_.get(), buf, sizeof(buf), MSG_PEEK,
   315                                     AsSockAddr(&addr), &addrlen),
   316                SyscallSucceedsWithValue(3));
   317  
   318    // Check that addr remains zeroed-out.
   319    const char* ptr = reinterpret_cast<char*>(&addr);
   320    for (size_t i = 0; i < sizeof(addr); i++) {
   321      EXPECT_EQ(ptr[i], 0);
   322    }
   323  }
   324  
   325  TEST_P(TcpSocketTest, SendtoAddressIgnored) {
   326    struct sockaddr_storage addr;
   327    memset(&addr, 0, sizeof(addr));
   328    addr.ss_family = GetParam();  // FIXME(b/63803955)
   329  
   330    char data = '\0';
   331    EXPECT_THAT(RetryEINTR(sendto)(connected_.get(), &data, sizeof(data), 0,
   332                                   AsSockAddr(&addr), sizeof(addr)),
   333                SyscallSucceedsWithValue(1));
   334  }
   335  
   336  TEST_P(TcpSocketTest, WritevZeroIovec) {
   337    // 2 bytes just to be safe and have vecs[1] not point to something random
   338    // (even though length is 0).
   339    char buf[2];
   340    char recv_buf[1];
   341  
   342    // Construct a vec where the final vector is of length 0.
   343    iovec vecs[2] = {};
   344    vecs[0].iov_base = buf;
   345    vecs[0].iov_len = 1;
   346    vecs[1].iov_base = buf + 1;
   347    vecs[1].iov_len = 0;
   348  
   349    EXPECT_THAT(RetryEINTR(writev)(connected_.get(), vecs, 2),
   350                SyscallSucceedsWithValue(1));
   351  
   352    EXPECT_THAT(RetryEINTR(recv)(accepted_.get(), recv_buf, 1, 0),
   353                SyscallSucceedsWithValue(1));
   354    EXPECT_EQ(memcmp(recv_buf, buf, 1), 0);
   355  }
   356  
   357  TEST_P(TcpSocketTest, ZeroWriteAllowed) {
   358    char buf[3];
   359    // Send a zero length packet.
   360    ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, 0),
   361                SyscallSucceedsWithValue(0));
   362    // Verify that there is no packet available.
   363    EXPECT_THAT(RetryEINTR(recv)(accepted_.get(), buf, sizeof(buf), MSG_DONTWAIT),
   364                SyscallFailsWithErrno(EAGAIN));
   365  }
   366  
   367  // Test that a non-blocking write with a buffer that is larger than the send
   368  // buffer size will not actually write the whole thing at once. Regression test
   369  // for b/64438887.
   370  TEST_P(TcpSocketTest, NonblockingLargeWrite) {
   371    // Set the FD to O_NONBLOCK.
   372    int opts;
   373    ASSERT_THAT(opts = fcntl(connected_.get(), F_GETFL), SyscallSucceeds());
   374    opts |= O_NONBLOCK;
   375    ASSERT_THAT(fcntl(connected_.get(), F_SETFL, opts), SyscallSucceeds());
   376  
   377    // Allocate a buffer three times the size of the send buffer. We do this with
   378    // a vector to avoid allocating on the stack.
   379    int size = 3 * sendbuf_size_;
   380    std::vector<char> buf(size);
   381  
   382    // Try to write the whole thing.
   383    int n;
   384    ASSERT_THAT(n = RetryEINTR(write)(connected_.get(), buf.data(), size),
   385                SyscallSucceeds());
   386  
   387    // We should have written something, but not the whole thing.
   388    EXPECT_GT(n, 0);
   389    EXPECT_LT(n, size);
   390  }
   391  
   392  // Test that a blocking write with a buffer that is larger than the send buffer
   393  // will block until the entire buffer is sent.
   394  TEST_P(TcpSocketTest, BlockingLargeWrite) {
   395    // Allocate a buffer three times the size of the send buffer on the heap. We
   396    // do this as a vector to avoid allocating on the stack.
   397    int size = 3 * sendbuf_size_;
   398    std::vector<char> writebuf(size);
   399  
   400    // Start reading the response in a loop.
   401    int read_bytes = 0;
   402    ScopedThread t([this, &read_bytes]() {
   403      // Avoid interrupting the blocking write in main thread.
   404      const DisableSave disable_save;
   405  
   406      // Take ownership of the FD so that we close it on failure. This will
   407      // unblock the blocking write below.
   408      FileDescriptor fd(std::move(accepted_));
   409  
   410      char readbuf[2500] = {};
   411      int n = -1;
   412      while (n != 0) {
   413        ASSERT_THAT(n = RetryEINTR(read)(fd.get(), &readbuf, sizeof(readbuf)),
   414                    SyscallSucceeds());
   415        read_bytes += n;
   416      }
   417    });
   418  
   419    // Try to write the whole thing.
   420    int n;
   421    ASSERT_THAT(n = WriteFd(connected_.get(), writebuf.data(), size),
   422                SyscallSucceeds());
   423  
   424    // We should have written the whole thing.
   425    EXPECT_EQ(n, size);
   426    EXPECT_THAT(close(connected_.release()), SyscallSucceedsWithValue(0));
   427    t.Join();
   428  
   429    // We should have read the whole thing.
   430    EXPECT_EQ(read_bytes, size);
   431  }
   432  
   433  // Test that a send with MSG_DONTWAIT flag and buffer that larger than the send
   434  // buffer size will not write the whole thing.
   435  TEST_P(TcpSocketTest, LargeSendDontWait) {
   436    // Allocate a buffer three times the size of the send buffer. We do this on
   437    // with a vector to avoid allocating on the stack.
   438    int size = 3 * sendbuf_size_;
   439    std::vector<char> buf(size);
   440  
   441    // Try to write the whole thing with MSG_DONTWAIT flag, which can
   442    // return a partial write.
   443    int n;
   444    ASSERT_THAT(
   445        n = RetryEINTR(send)(connected_.get(), buf.data(), size, MSG_DONTWAIT),
   446        SyscallSucceeds());
   447  
   448    // We should have written something, but not the whole thing.
   449    EXPECT_GT(n, 0);
   450    EXPECT_LT(n, size);
   451  }
   452  
   453  // Test that a send on a non-blocking socket with a buffer that larger than the
   454  // send buffer will not write the whole thing at once.
   455  TEST_P(TcpSocketTest, NonblockingLargeSend) {
   456    // Set the FD to O_NONBLOCK.
   457    int opts;
   458    ASSERT_THAT(opts = fcntl(connected_.get(), F_GETFL), SyscallSucceeds());
   459    opts |= O_NONBLOCK;
   460    ASSERT_THAT(fcntl(connected_.get(), F_SETFL, opts), SyscallSucceeds());
   461  
   462    // Allocate a buffer three times the size of the send buffer. We do this on
   463    // with a vector to avoid allocating on the stack.
   464    int size = 3 * sendbuf_size_;
   465    std::vector<char> buf(size);
   466  
   467    // Try to write the whole thing.
   468    int n;
   469    ASSERT_THAT(n = RetryEINTR(send)(connected_.get(), buf.data(), size, 0),
   470                SyscallSucceeds());
   471  
   472    // We should have written something, but not the whole thing.
   473    EXPECT_GT(n, 0);
   474    EXPECT_LT(n, size);
   475  }
   476  
   477  // Same test as above, but calls send instead of write.
   478  TEST_P(TcpSocketTest, BlockingLargeSend) {
   479    // Allocate a buffer three times the size of the send buffer. We do this on
   480    // with a vector to avoid allocating on the stack.
   481    int size = 3 * sendbuf_size_;
   482    std::vector<char> writebuf(size);
   483  
   484    // Start reading the response in a loop.
   485    int read_bytes = 0;
   486    ScopedThread t([this, &read_bytes]() {
   487      // Avoid interrupting the blocking write in main thread.
   488      const DisableSave disable_save;
   489  
   490      // Take ownership of the FD so that we close it on failure. This will
   491      // unblock the blocking write below.
   492      FileDescriptor fd(std::move(accepted_));
   493  
   494      char readbuf[2500] = {};
   495      int n = -1;
   496      while (n != 0) {
   497        ASSERT_THAT(n = RetryEINTR(read)(fd.get(), &readbuf, sizeof(readbuf)),
   498                    SyscallSucceeds());
   499        read_bytes += n;
   500      }
   501    });
   502  
   503    // Try to send the whole thing.
   504    int n;
   505    ASSERT_THAT(n = SendFd(connected_.get(), writebuf.data(), size, 0),
   506                SyscallSucceeds());
   507  
   508    // We should have written the whole thing.
   509    EXPECT_EQ(n, size);
   510    EXPECT_THAT(close(connected_.release()), SyscallSucceedsWithValue(0));
   511    t.Join();
   512  
   513    // We should have read the whole thing.
   514    EXPECT_EQ(read_bytes, size);
   515  }
   516  
   517  // Test that polling on a socket with a full send buffer will block.
   518  TEST_P(TcpSocketTest, PollWithFullBufferBlocks) {
   519    FillSocketBuffers(connected_.get(), accepted_.get());
   520    // Now polling on the FD with a timeout should return 0 corresponding to no
   521    // FDs ready.
   522    struct pollfd poll_fd = {connected_.get(), POLLOUT, 0};
   523    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10), SyscallSucceedsWithValue(0));
   524  }
   525  
   526  TEST_P(TcpSocketTest, ClosedWriteBlockingSocket) {
   527    FillSocketBuffers(connected_.get(), accepted_.get());
   528    constexpr int timeout = 10;
   529    struct timeval tv = {.tv_sec = timeout, .tv_usec = 0};
   530    EXPECT_THAT(
   531        setsockopt(connected_.get(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)),
   532        SyscallSucceeds());
   533  
   534    struct timespec begin;
   535    struct timespec end;
   536    const DisableSave disable_save;  // Timing-related.
   537    EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &begin), SyscallSucceeds());
   538  
   539    ScopedThread send_thread([this]() {
   540      char send_byte;
   541      // Expect the send() to be blocked until receive timeout.
   542      ASSERT_THAT(
   543          RetryEINTR(send)(connected_.get(), &send_byte, sizeof(send_byte), 0),
   544          SyscallFailsWithErrno(EAGAIN));
   545    });
   546  
   547    // Wait for the thread to be blocked on write.
   548    absl::SleepFor(absl::Milliseconds(250));
   549    // Socket close does not have any effect on a blocked write.
   550    ASSERT_THAT(close(connected_.release()), SyscallSucceeds());
   551  
   552    send_thread.Join();
   553  
   554    EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &end), SyscallSucceeds());
   555    // Check the lower bound on the timeout.  Checking for an upper bound is
   556    // fragile because Linux can overrun the timeout due to scheduling delays.
   557    EXPECT_GT(ms_elapsed(begin, end), timeout * 1000 - 1);
   558  }
   559  
   560  TEST_P(TcpSocketTest, ClosedReadBlockingSocket) {
   561    constexpr int timeout = 10;
   562    struct timeval tv = {.tv_sec = timeout, .tv_usec = 0};
   563    EXPECT_THAT(
   564        setsockopt(connected_.get(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)),
   565        SyscallSucceeds());
   566  
   567    struct timespec begin;
   568    struct timespec end;
   569    const DisableSave disable_save;  // Timing-related.
   570    EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &begin), SyscallSucceeds());
   571  
   572    ScopedThread read_thread([this]() {
   573      char read_byte;
   574      // Expect the read() to be blocked until receive timeout.
   575      ASSERT_THAT(read(connected_.get(), &read_byte, sizeof(read_byte)),
   576                  SyscallFailsWithErrno(EAGAIN));
   577    });
   578  
   579    // Wait for the thread to be blocked on read.
   580    absl::SleepFor(absl::Milliseconds(250));
   581    // Socket close does not have any effect on a blocked read.
   582    ASSERT_THAT(close(connected_.release()), SyscallSucceeds());
   583  
   584    read_thread.Join();
   585  
   586    EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &end), SyscallSucceeds());
   587    // Check the lower bound on the timeout.  Checking for an upper bound is
   588    // fragile because Linux can overrun the timeout due to scheduling delays.
   589    EXPECT_GT(ms_elapsed(begin, end), timeout * 1000 - 1);
   590  }
   591  
   592  TEST_P(TcpSocketTest, MsgTrunc) {
   593    char sent_data[512];
   594    RandomizeBuffer(sent_data, sizeof(sent_data));
   595    ASSERT_THAT(
   596        RetryEINTR(send)(connected_.get(), sent_data, sizeof(sent_data), 0),
   597        SyscallSucceedsWithValue(sizeof(sent_data)));
   598    char received_data[sizeof(sent_data)] = {};
   599    ASSERT_THAT(RetryEINTR(recv)(accepted_.get(), received_data,
   600                                 sizeof(received_data) / 2, MSG_TRUNC),
   601                SyscallSucceedsWithValue(sizeof(sent_data) / 2));
   602  
   603    // Check that we didn't get anything.
   604    char zeros[sizeof(received_data)] = {};
   605    EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data)));
   606  }
   607  
   608  // MSG_CTRUNC is a return flag but linux allows it to be set on input flags
   609  // without returning an error.
   610  TEST_P(TcpSocketTest, MsgTruncWithCtrunc) {
   611    char sent_data[512];
   612    RandomizeBuffer(sent_data, sizeof(sent_data));
   613    ASSERT_THAT(
   614        RetryEINTR(send)(connected_.get(), sent_data, sizeof(sent_data), 0),
   615        SyscallSucceedsWithValue(sizeof(sent_data)));
   616    char received_data[sizeof(sent_data)] = {};
   617    ASSERT_THAT(
   618        RetryEINTR(recv)(accepted_.get(), received_data,
   619                         sizeof(received_data) / 2, MSG_TRUNC | MSG_CTRUNC),
   620        SyscallSucceedsWithValue(sizeof(sent_data) / 2));
   621  
   622    // Check that we didn't get anything.
   623    char zeros[sizeof(received_data)] = {};
   624    EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data)));
   625  }
   626  
   627  // This test will verify that MSG_CTRUNC doesn't do anything when specified
   628  // on input.
   629  TEST_P(TcpSocketTest, MsgTruncWithCtruncOnly) {
   630    char sent_data[512];
   631    RandomizeBuffer(sent_data, sizeof(sent_data));
   632    ASSERT_THAT(
   633        RetryEINTR(send)(connected_.get(), sent_data, sizeof(sent_data), 0),
   634        SyscallSucceedsWithValue(sizeof(sent_data)));
   635    char received_data[sizeof(sent_data)] = {};
   636    ASSERT_THAT(RetryEINTR(recv)(accepted_.get(), received_data,
   637                                 sizeof(received_data) / 2, MSG_CTRUNC),
   638                SyscallSucceedsWithValue(sizeof(sent_data) / 2));
   639  
   640    // Since MSG_CTRUNC here had no affect, it should not behave like MSG_TRUNC.
   641    EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2));
   642  }
   643  
   644  TEST_P(TcpSocketTest, MsgTruncLargeSize) {
   645    char sent_data[512];
   646    RandomizeBuffer(sent_data, sizeof(sent_data));
   647    ASSERT_THAT(
   648        RetryEINTR(send)(connected_.get(), sent_data, sizeof(sent_data), 0),
   649        SyscallSucceedsWithValue(sizeof(sent_data)));
   650    char received_data[sizeof(sent_data) * 2] = {};
   651    ASSERT_THAT(RetryEINTR(recv)(accepted_.get(), received_data,
   652                                 sizeof(received_data), MSG_TRUNC),
   653                SyscallSucceedsWithValue(sizeof(sent_data)));
   654  
   655    // Check that we didn't get anything.
   656    char zeros[sizeof(received_data)] = {};
   657    EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data)));
   658  }
   659  
   660  TEST_P(TcpSocketTest, MsgTruncPeek) {
   661    char sent_data[512];
   662    RandomizeBuffer(sent_data, sizeof(sent_data));
   663    ASSERT_THAT(
   664        RetryEINTR(send)(connected_.get(), sent_data, sizeof(sent_data), 0),
   665        SyscallSucceedsWithValue(sizeof(sent_data)));
   666    char received_data[sizeof(sent_data)] = {};
   667    ASSERT_THAT(RetryEINTR(recv)(accepted_.get(), received_data,
   668                                 sizeof(received_data) / 2, MSG_TRUNC | MSG_PEEK),
   669                SyscallSucceedsWithValue(sizeof(sent_data) / 2));
   670  
   671    // Check that we didn't get anything.
   672    char zeros[sizeof(received_data)] = {};
   673    EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data)));
   674  
   675    // Check that we can still get all of the data.
   676    ASSERT_THAT(RetryEINTR(recv)(accepted_.get(), received_data,
   677                                 sizeof(received_data), 0),
   678                SyscallSucceedsWithValue(sizeof(sent_data)));
   679    EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data)));
   680  }
   681  
   682  TEST_P(TcpSocketTest, NoDelayDefault) {
   683    int get = -1;
   684    socklen_t get_len = sizeof(get);
   685    EXPECT_THAT(
   686        getsockopt(connected_.get(), IPPROTO_TCP, TCP_NODELAY, &get, &get_len),
   687        SyscallSucceedsWithValue(0));
   688    EXPECT_EQ(get_len, sizeof(get));
   689    EXPECT_EQ(get, kSockOptOff);
   690  }
   691  
   692  TEST_P(TcpSocketTest, SetNoDelay) {
   693    ASSERT_THAT(setsockopt(connected_.get(), IPPROTO_TCP, TCP_NODELAY,
   694                           &kSockOptOn, sizeof(kSockOptOn)),
   695                SyscallSucceeds());
   696  
   697    int get = -1;
   698    socklen_t get_len = sizeof(get);
   699    EXPECT_THAT(
   700        getsockopt(connected_.get(), IPPROTO_TCP, TCP_NODELAY, &get, &get_len),
   701        SyscallSucceedsWithValue(0));
   702    EXPECT_EQ(get_len, sizeof(get));
   703    EXPECT_EQ(get, kSockOptOn);
   704  
   705    ASSERT_THAT(setsockopt(connected_.get(), IPPROTO_TCP, TCP_NODELAY,
   706                           &kSockOptOff, sizeof(kSockOptOff)),
   707                SyscallSucceeds());
   708  
   709    EXPECT_THAT(
   710        getsockopt(connected_.get(), IPPROTO_TCP, TCP_NODELAY, &get, &get_len),
   711        SyscallSucceedsWithValue(0));
   712    EXPECT_EQ(get_len, sizeof(get));
   713    EXPECT_EQ(get, kSockOptOff);
   714  }
   715  
   716  #ifndef TCP_INQ
   717  #define TCP_INQ 36
   718  #endif
   719  
   720  TEST_P(TcpSocketTest, TcpInqSetSockOpt) {
   721    char buf[1024];
   722    ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, sizeof(buf)),
   723                SyscallSucceedsWithValue(sizeof(buf)));
   724  
   725    // TCP_INQ is disabled by default.
   726    int val = -1;
   727    socklen_t slen = sizeof(val);
   728    EXPECT_THAT(getsockopt(accepted_.get(), SOL_TCP, TCP_INQ, &val, &slen),
   729                SyscallSucceedsWithValue(0));
   730    ASSERT_EQ(val, 0);
   731  
   732    // Try to set TCP_INQ.
   733    val = 1;
   734    EXPECT_THAT(setsockopt(accepted_.get(), SOL_TCP, TCP_INQ, &val, sizeof(val)),
   735                SyscallSucceedsWithValue(0));
   736    val = -1;
   737    slen = sizeof(val);
   738    EXPECT_THAT(getsockopt(accepted_.get(), SOL_TCP, TCP_INQ, &val, &slen),
   739                SyscallSucceedsWithValue(0));
   740    ASSERT_EQ(val, 1);
   741  
   742    // Try to unset TCP_INQ.
   743    val = 0;
   744    EXPECT_THAT(setsockopt(accepted_.get(), SOL_TCP, TCP_INQ, &val, sizeof(val)),
   745                SyscallSucceedsWithValue(0));
   746    val = -1;
   747    slen = sizeof(val);
   748    EXPECT_THAT(getsockopt(accepted_.get(), SOL_TCP, TCP_INQ, &val, &slen),
   749                SyscallSucceedsWithValue(0));
   750    ASSERT_EQ(val, 0);
   751  }
   752  
   753  TEST_P(TcpSocketTest, TcpInq) {
   754    char buf[1024];
   755    // Write more than one TCP segment.
   756    int size = sizeof(buf);
   757    int kChunk = sizeof(buf) / 4;
   758    for (int i = 0; i < size; i += kChunk) {
   759      ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, kChunk),
   760                  SyscallSucceedsWithValue(kChunk));
   761    }
   762  
   763    int val = 1;
   764    kChunk = sizeof(buf) / 2;
   765    EXPECT_THAT(setsockopt(accepted_.get(), SOL_TCP, TCP_INQ, &val, sizeof(val)),
   766                SyscallSucceedsWithValue(0));
   767  
   768    // Wait when all data will be in the received queue.
   769    while (true) {
   770      ASSERT_THAT(ioctl(accepted_.get(), TIOCINQ, &size), SyscallSucceeds());
   771      if (size == sizeof(buf)) {
   772        break;
   773      }
   774      absl::SleepFor(absl::Milliseconds(10));
   775    }
   776  
   777    struct msghdr msg = {};
   778    std::vector<char> control(CMSG_SPACE(sizeof(int)));
   779    size = sizeof(buf);
   780    struct iovec iov;
   781    while (size != 0) {
   782      msg.msg_control = &control[0];
   783      msg.msg_controllen = control.size();
   784  
   785      iov.iov_base = buf;
   786      iov.iov_len = kChunk;
   787      msg.msg_iov = &iov;
   788      msg.msg_iovlen = 1;
   789      ASSERT_THAT(RetryEINTR(recvmsg)(accepted_.get(), &msg, 0),
   790                  SyscallSucceedsWithValue(kChunk));
   791      size -= kChunk;
   792  
   793      struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
   794      ASSERT_NE(cmsg, nullptr);
   795      ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int)));
   796      ASSERT_EQ(cmsg->cmsg_level, SOL_TCP);
   797      ASSERT_EQ(cmsg->cmsg_type, TCP_INQ);
   798  
   799      int inq = 0;
   800      memcpy(&inq, CMSG_DATA(cmsg), sizeof(int));
   801      ASSERT_EQ(inq, size);
   802    }
   803  }
   804  
   805  TEST_P(TcpSocketTest, Tiocinq) {
   806    char buf[1024];
   807    int size = sizeof(buf);
   808    ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, size),
   809                SyscallSucceedsWithValue(size));
   810  
   811    uint32_t seed = time(nullptr);
   812    const size_t max_chunk = size / 10;
   813    while (size > 0) {
   814      size_t chunk = (rand_r(&seed) % max_chunk) + 1;
   815      ssize_t read =
   816          RetryEINTR(recvfrom)(accepted_.get(), buf, chunk, 0, nullptr, nullptr);
   817      ASSERT_THAT(read, SyscallSucceeds());
   818      size -= read;
   819  
   820      // The remaining data should end up in the receive queue.
   821      constexpr absl::Duration kSleepFor = absl::Milliseconds(10);
   822      int inq = 0;
   823      for (const auto start = absl::Now();
   824           absl::Now() <= start + absl::Milliseconds(kTimeoutMillis);) {
   825        ASSERT_THAT(ioctl(accepted_.get(), TIOCINQ, &inq), SyscallSucceeds());
   826        if (size == inq) {
   827          break;
   828        }
   829        absl::SleepFor(kSleepFor);
   830      }
   831  
   832      ASSERT_EQ(inq, size);
   833    }
   834  }
   835  
   836  TEST_P(TcpSocketTest, TcpSCMPriority) {
   837    char buf[1024];
   838    ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, sizeof(buf)),
   839                SyscallSucceedsWithValue(sizeof(buf)));
   840  
   841    int val = 1;
   842    EXPECT_THAT(setsockopt(accepted_.get(), SOL_TCP, TCP_INQ, &val, sizeof(val)),
   843                SyscallSucceedsWithValue(0));
   844    EXPECT_THAT(
   845        setsockopt(accepted_.get(), SOL_SOCKET, SO_TIMESTAMP, &val, sizeof(val)),
   846        SyscallSucceedsWithValue(0));
   847  
   848    struct msghdr msg = {};
   849    std::vector<char> control(
   850        CMSG_SPACE(sizeof(struct timeval) + CMSG_SPACE(sizeof(int))));
   851    struct iovec iov;
   852    msg.msg_control = &control[0];
   853    msg.msg_controllen = control.size();
   854  
   855    iov.iov_base = buf;
   856    iov.iov_len = sizeof(buf);
   857    msg.msg_iov = &iov;
   858    msg.msg_iovlen = 1;
   859    ASSERT_THAT(RetryEINTR(recvmsg)(accepted_.get(), &msg, 0),
   860                SyscallSucceedsWithValue(sizeof(buf)));
   861  
   862    struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
   863    ASSERT_NE(cmsg, nullptr);
   864    // TODO(b/78348848): SO_TIMESTAMP isn't implemented for TCP sockets.
   865    if (!IsRunningOnGvisor() || cmsg->cmsg_level == SOL_SOCKET) {
   866      ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET);
   867      ASSERT_EQ(cmsg->cmsg_type, SO_TIMESTAMP);
   868      ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct timeval)));
   869  
   870      cmsg = CMSG_NXTHDR(&msg, cmsg);
   871      ASSERT_NE(cmsg, nullptr);
   872    }
   873    ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int)));
   874    ASSERT_EQ(cmsg->cmsg_level, SOL_TCP);
   875    ASSERT_EQ(cmsg->cmsg_type, TCP_INQ);
   876  
   877    int inq = 0;
   878    memcpy(&inq, CMSG_DATA(cmsg), sizeof(int));
   879    ASSERT_EQ(inq, 0);
   880  
   881    cmsg = CMSG_NXTHDR(&msg, cmsg);
   882    ASSERT_EQ(cmsg, nullptr);
   883  }
   884  
   885  TEST_P(TcpSocketTest, TimeWaitPollHUP) {
   886    shutdown(connected_.get(), SHUT_RDWR);
   887    ScopedThread t([&]() {
   888      constexpr int16_t want_events = POLLHUP;
   889      struct pollfd pfd = {
   890          .fd = connected_.get(),
   891          .events = want_events,
   892      };
   893      ASSERT_THAT(poll(&pfd, 1, kTimeoutMillis), SyscallSucceedsWithValue(1));
   894    });
   895    shutdown(accepted_.get(), SHUT_RDWR);
   896    t.Join();
   897    // At this point first_fd should be in TIME-WAIT and polling for POLLHUP
   898    // should return with 1 FD.
   899    constexpr int16_t want_events = POLLHUP;
   900    struct pollfd pfd = {
   901        .fd = connected_.get(),
   902        .events = want_events,
   903    };
   904    ASSERT_THAT(poll(&pfd, 1, kTimeoutMillis), SyscallSucceedsWithValue(1));
   905  }
   906  
   907  // Tests that send will return EWOULDBLOCK initially with large buffer and will
   908  // succeed after the send buffer size is increased.
   909  TEST_P(TcpSocketTest, SendUnblocksOnSendBufferIncrease) {
   910    // Set the FD to O_NONBLOCK.
   911    int opts;
   912    ASSERT_THAT(opts = fcntl(connected_.get(), F_GETFL), SyscallSucceeds());
   913    opts |= O_NONBLOCK;
   914    ASSERT_THAT(fcntl(connected_.get(), F_SETFL, opts), SyscallSucceeds());
   915  
   916    // Get maximum buffer size by trying to set it to a large value.
   917    constexpr int kSndBufSz = 0xffffffff;
   918    ASSERT_THAT(setsockopt(connected_.get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz,
   919                           sizeof(kSndBufSz)),
   920                SyscallSucceeds());
   921  
   922    int max_buffer_sz = 0;
   923    socklen_t max_len = sizeof(max_buffer_sz);
   924    ASSERT_THAT(getsockopt(connected_.get(), SOL_SOCKET, SO_SNDBUF,
   925                           &max_buffer_sz, &max_len),
   926                SyscallSucceeds());
   927  
   928    int buffer_sz = max_buffer_sz >> 2;
   929    EXPECT_THAT(setsockopt(connected_.get(), SOL_SOCKET, SO_SNDBUF, &buffer_sz,
   930                           sizeof(buffer_sz)),
   931                SyscallSucceedsWithValue(0));
   932  
   933    // Create a large buffer that will be used for sending.
   934    std::vector<char> buffer(max_buffer_sz);
   935  
   936    // Write until we receive an error.
   937    while (RetryEINTR(send)(connected_.get(), buffer.data(), buffer.size(), 0) !=
   938           -1) {
   939      // Sleep to give linux a chance to move data from the send buffer to the
   940      // receive buffer.
   941      absl::SleepFor(absl::Milliseconds(10));  // 10ms.
   942    }
   943  
   944    // The last error should have been EWOULDBLOCK.
   945    ASSERT_EQ(errno, EWOULDBLOCK);
   946  
   947    ScopedThread send_thread([this]() {
   948      int flags = 0;
   949      ASSERT_THAT(flags = fcntl(connected_.get(), F_GETFL), SyscallSucceeds());
   950      EXPECT_THAT(fcntl(connected_.get(), F_SETFL, flags & ~O_NONBLOCK),
   951                  SyscallSucceeds());
   952  
   953      // Expect the send() to succeed.
   954      char buffer;
   955      ASSERT_THAT(RetryEINTR(send)(connected_.get(), &buffer, sizeof(buffer), 0),
   956                  SyscallSucceeds());
   957    });
   958  
   959    // Set SO_SNDBUF to maximum buffer size allowed.
   960    buffer_sz = max_buffer_sz >> 1;
   961    EXPECT_THAT(setsockopt(connected_.get(), SOL_SOCKET, SO_SNDBUF, &buffer_sz,
   962                           sizeof(buffer_sz)),
   963                SyscallSucceedsWithValue(0));
   964  
   965    send_thread.Join();
   966  }
   967  
   968  INSTANTIATE_TEST_SUITE_P(AllInetTests, TcpSocketTest,
   969                           ::testing::Values(AF_INET, AF_INET6));
   970  
   971  // Fixture for tests parameterized by address family that don't want the fixture
   972  // to do things.
   973  using SimpleTcpSocketTest = ::testing::TestWithParam<int>;
   974  
   975  TEST_P(SimpleTcpSocketTest, SendUnconnected) {
   976    int fd;
   977    ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
   978                SyscallSucceeds());
   979    FileDescriptor sock_fd(fd);
   980  
   981    char data = '\0';
   982    EXPECT_THAT(RetryEINTR(send)(fd, &data, sizeof(data), 0),
   983                SyscallFailsWithErrno(EPIPE));
   984  }
   985  
   986  TEST_P(SimpleTcpSocketTest, SendtoWithoutAddressUnconnected) {
   987    int fd;
   988    ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
   989                SyscallSucceeds());
   990    FileDescriptor sock_fd(fd);
   991  
   992    char data = '\0';
   993    EXPECT_THAT(RetryEINTR(sendto)(fd, &data, sizeof(data), 0, nullptr, 0),
   994                SyscallFailsWithErrno(EPIPE));
   995  }
   996  
   997  TEST_P(SimpleTcpSocketTest, SendtoWithAddressUnconnected) {
   998    int fd;
   999    ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
  1000                SyscallSucceeds());
  1001    FileDescriptor sock_fd(fd);
  1002  
  1003    sockaddr_storage addr =
  1004        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  1005    char data = '\0';
  1006    EXPECT_THAT(RetryEINTR(sendto)(fd, &data, sizeof(data), 0, AsSockAddr(&addr),
  1007                                   sizeof(addr)),
  1008                SyscallFailsWithErrno(EPIPE));
  1009  }
  1010  
  1011  TEST_P(SimpleTcpSocketTest, GetPeerNameUnconnected) {
  1012    int fd;
  1013    ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
  1014                SyscallSucceeds());
  1015    FileDescriptor sock_fd(fd);
  1016  
  1017    sockaddr_storage addr;
  1018    socklen_t addrlen = sizeof(addr);
  1019    EXPECT_THAT(getpeername(fd, AsSockAddr(&addr), &addrlen),
  1020                SyscallFailsWithErrno(ENOTCONN));
  1021  }
  1022  
  1023  TEST_P(SimpleTcpSocketTest, GetSockNameUnbound) {
  1024    int fd;
  1025    ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
  1026                SyscallSucceeds());
  1027    FileDescriptor sock_fd(fd);
  1028  
  1029    sockaddr_storage addr;
  1030    // Ensure that any 0s we read later have been explicitly set by getsockname.
  1031    memset(&addr, -1, sizeof(addr));
  1032    socklen_t addrlen = sizeof(addr);
  1033    EXPECT_THAT(getsockname(fd, AsSockAddr(&addr), &addrlen), SyscallSucceeds());
  1034    switch (GetParam()) {
  1035      case AF_INET: {
  1036        ASSERT_EQ(addrlen, sizeof(sockaddr_in));
  1037        auto sock_addr_in = reinterpret_cast<const sockaddr_in*>(&addr);
  1038        ASSERT_EQ(sock_addr_in->sin_addr.s_addr, 0);
  1039        ASSERT_EQ(sock_addr_in->sin_port, 0);
  1040        break;
  1041      }
  1042      case AF_INET6: {
  1043        ASSERT_EQ(addrlen, sizeof(sockaddr_in6));
  1044        auto sock_addr_in6 = reinterpret_cast<const sockaddr_in6*>(&addr);
  1045        ASSERT_TRUE(IN6_IS_ADDR_UNSPECIFIED(&sock_addr_in6->sin6_addr));
  1046        ASSERT_EQ(sock_addr_in6->sin6_port, 0);
  1047        break;
  1048      }
  1049      default: {
  1050        ADD_FAILURE() << "unreachable";
  1051        break;
  1052      }
  1053    }
  1054  }
  1055  
  1056  TEST_P(TcpSocketTest, FullBuffer) {
  1057    // Set both FDs to be blocking.
  1058    int flags = 0;
  1059    ASSERT_THAT(flags = fcntl(connected_.get(), F_GETFL), SyscallSucceeds());
  1060    EXPECT_THAT(fcntl(connected_.get(), F_SETFL, flags & ~O_NONBLOCK),
  1061                SyscallSucceeds());
  1062    flags = 0;
  1063    ASSERT_THAT(flags = fcntl(accepted_.get(), F_GETFL), SyscallSucceeds());
  1064    EXPECT_THAT(fcntl(accepted_.get(), F_SETFL, flags & ~O_NONBLOCK),
  1065                SyscallSucceeds());
  1066  
  1067    // 2500 was chosen as a small value that can be set on Linux.
  1068    int set_snd = 2500;
  1069    EXPECT_THAT(setsockopt(connected_.get(), SOL_SOCKET, SO_SNDBUF, &set_snd,
  1070                           sizeof(set_snd)),
  1071                SyscallSucceedsWithValue(0));
  1072    int get_snd = -1;
  1073    socklen_t get_snd_len = sizeof(get_snd);
  1074    EXPECT_THAT(getsockopt(connected_.get(), SOL_SOCKET, SO_SNDBUF, &get_snd,
  1075                           &get_snd_len),
  1076                SyscallSucceedsWithValue(0));
  1077    EXPECT_EQ(get_snd_len, sizeof(get_snd));
  1078    EXPECT_GT(get_snd, 0);
  1079  
  1080    // 2500 was chosen as a small value that can be set on Linux and gVisor.
  1081    int set_rcv = 2500;
  1082    EXPECT_THAT(setsockopt(accepted_.get(), SOL_SOCKET, SO_RCVBUF, &set_rcv,
  1083                           sizeof(set_rcv)),
  1084                SyscallSucceedsWithValue(0));
  1085    int get_rcv = -1;
  1086    socklen_t get_rcv_len = sizeof(get_rcv);
  1087    EXPECT_THAT(getsockopt(accepted_.get(), SOL_SOCKET, SO_RCVBUF, &get_rcv,
  1088                           &get_rcv_len),
  1089                SyscallSucceedsWithValue(0));
  1090    EXPECT_EQ(get_rcv_len, sizeof(get_rcv));
  1091    EXPECT_GE(get_rcv, 2500);
  1092  
  1093    // Quick sanity test.
  1094    EXPECT_LT(get_snd + get_rcv, 2500 * IOV_MAX);
  1095  
  1096    char data[2500] = {};
  1097    std::vector<struct iovec> iovecs;
  1098    for (int i = 0; i < IOV_MAX; i++) {
  1099      struct iovec iov = {};
  1100      iov.iov_base = data;
  1101      iov.iov_len = sizeof(data);
  1102      iovecs.push_back(iov);
  1103    }
  1104    ScopedThread t([this, &iovecs]() {
  1105      int result = -1;
  1106      EXPECT_THAT(result = RetryEINTR(writev)(connected_.get(), iovecs.data(),
  1107                                              iovecs.size()),
  1108                  SyscallSucceeds());
  1109      EXPECT_GT(result, 1);
  1110      EXPECT_LT(result, sizeof(data) * iovecs.size());
  1111    });
  1112  
  1113    char recv = 0;
  1114    EXPECT_THAT(RetryEINTR(read)(accepted_.get(), &recv, 1),
  1115                SyscallSucceedsWithValue(1));
  1116    EXPECT_THAT(close(accepted_.release()), SyscallSucceedsWithValue(0));
  1117  }
  1118  
  1119  TEST_P(TcpSocketTest, PollAfterShutdown) {
  1120    ScopedThread client_thread([this]() {
  1121      EXPECT_THAT(shutdown(connected_.get(), SHUT_WR),
  1122                  SyscallSucceedsWithValue(0));
  1123      struct pollfd poll_fd = {connected_.get(), POLLIN | POLLERR | POLLHUP, 0};
  1124      EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMillis),
  1125                  SyscallSucceedsWithValue(1));
  1126    });
  1127  
  1128    EXPECT_THAT(shutdown(accepted_.get(), SHUT_WR), SyscallSucceedsWithValue(0));
  1129    struct pollfd poll_fd = {accepted_.get(), POLLIN | POLLERR | POLLHUP, 0};
  1130    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMillis),
  1131                SyscallSucceedsWithValue(1));
  1132  }
  1133  
  1134  TEST_P(SimpleTcpSocketTest, PollAroundAccept) {
  1135    const FileDescriptor listener =
  1136        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1137    sockaddr_storage addr =
  1138        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  1139    socklen_t addrlen = sizeof(addr);
  1140  
  1141    // Bind to some port.
  1142    ASSERT_THAT(bind(listener.get(), AsSockAddr(&addr), addrlen),
  1143                SyscallSucceeds());
  1144    ASSERT_THAT(listen(listener.get(), SOMAXCONN), SyscallSucceeds());
  1145  
  1146    // Get the address we're bound to. We need to do this because we're allowing
  1147    // the stack to pick a port for us.
  1148    ASSERT_THAT(getsockname(listener.get(), AsSockAddr(&addr), &addrlen),
  1149                SyscallSucceeds());
  1150    switch (GetParam()) {
  1151      case AF_INET:
  1152        ASSERT_EQ(addrlen, sizeof(sockaddr_in));
  1153        break;
  1154      case AF_INET6:
  1155        ASSERT_EQ(addrlen, sizeof(sockaddr_in6));
  1156        break;
  1157    }
  1158  
  1159    // Before the listener socket receives a connection, it should not be eligible
  1160    // for reading.
  1161    struct pollfd poll_fd = {listener.get(), POLLIN, 0};
  1162    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, /* nfds */ 1, /* timeout */ 0),
  1163                SyscallSucceedsWithValue(0));
  1164  
  1165    FileDescriptor connector =
  1166        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1167    ASSERT_THAT(RetryEINTR(connect)(connector.get(), AsSockAddr(&addr), addrlen),
  1168                SyscallSucceeds());
  1169  
  1170    // Now that a connection is pending, the listener is ready for a read.
  1171    ASSERT_THAT(
  1172        RetryEINTR(poll)(&poll_fd, /* nfds */ 1, /* infinite timeout */ -1),
  1173        SyscallSucceedsWithValue(1));
  1174  
  1175    // Accept the connection. This should make the listener no longer ready for a
  1176    // read.
  1177    const FileDescriptor accepted =
  1178        ASSERT_NO_ERRNO_AND_VALUE(Accept(listener.get(), nullptr, nullptr));
  1179    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, /* nfds*/ 1, /* timeout */ 0),
  1180                SyscallSucceedsWithValue(0));
  1181  }
  1182  
  1183  TEST_P(SimpleTcpSocketTest, NonBlockingConnectRetry) {
  1184    const FileDescriptor listener =
  1185        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1186  
  1187    // Initialize address to the loopback one.
  1188    sockaddr_storage addr =
  1189        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  1190    socklen_t addrlen = sizeof(addr);
  1191  
  1192    // Bind to some port but don't listen yet.
  1193    ASSERT_THAT(bind(listener.get(), AsSockAddr(&addr), addrlen),
  1194                SyscallSucceeds());
  1195  
  1196    // Get the address we're bound to, then connect to it. We need to do this
  1197    // because we're allowing the stack to pick a port for us.
  1198    ASSERT_THAT(getsockname(listener.get(), AsSockAddr(&addr), &addrlen),
  1199                SyscallSucceeds());
  1200  
  1201    FileDescriptor connector =
  1202        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1203  
  1204    // Verify that connect fails.
  1205    ASSERT_THAT(RetryEINTR(connect)(connector.get(), AsSockAddr(&addr), addrlen),
  1206                SyscallFailsWithErrno(ECONNREFUSED));
  1207  
  1208    // Now start listening
  1209    ASSERT_THAT(listen(listener.get(), SOMAXCONN), SyscallSucceeds());
  1210  
  1211    // TODO(gvisor.dev/issue/3828): Issuing connect() again on a socket that
  1212    //   failed first connect should succeed.
  1213    if (IsRunningOnGvisor()) {
  1214      ASSERT_THAT(
  1215          RetryEINTR(connect)(connector.get(), AsSockAddr(&addr), addrlen),
  1216          SyscallFailsWithErrno(ECONNABORTED));
  1217      return;
  1218    }
  1219  
  1220    // Verify that connect now succeeds.
  1221    ASSERT_THAT(RetryEINTR(connect)(connector.get(), AsSockAddr(&addr), addrlen),
  1222                SyscallSucceeds());
  1223  
  1224    // Accept the connection.
  1225    const FileDescriptor accepted =
  1226        ASSERT_NO_ERRNO_AND_VALUE(Accept(listener.get(), nullptr, nullptr));
  1227  }
  1228  
  1229  // nonBlockingConnectNoListener returns a socket on which a connect that is
  1230  // expected to fail has been issued. The address to which the connect is issued
  1231  // is written to `addr` and `addrlen`.
  1232  PosixErrorOr<FileDescriptor> nonBlockingConnectNoListener(
  1233      const int family, sockaddr_storage& addr, socklen_t& addrlen) {
  1234    // We will first create a socket and bind to ensure we bind a port but will
  1235    // not call listen on this socket.
  1236    // Then we will create a new socket that will connect to the port bound by
  1237    // the first socket and that shoud fail.
  1238    constexpr int sock_type = SOCK_STREAM | SOCK_NONBLOCK;
  1239    int b_sock;
  1240    RETURN_ERROR_IF_SYSCALL_FAIL(b_sock = socket(family, sock_type, IPPROTO_TCP));
  1241    FileDescriptor b(b_sock);
  1242    EXPECT_THAT(bind(b.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds());
  1243  
  1244    // Get the address bound by the listening socket.
  1245    EXPECT_THAT(getsockname(b.get(), AsSockAddr(&addr), &addrlen),
  1246                SyscallSucceeds());
  1247  
  1248    // Now create another socket and issue a connect on this one. This connect
  1249    // should fail as there is no listener.
  1250    int c_sock;
  1251    RETURN_ERROR_IF_SYSCALL_FAIL(c_sock = socket(family, sock_type, IPPROTO_TCP));
  1252    FileDescriptor s(c_sock);
  1253  
  1254    // Now connect to the bound address and this should fail as nothing
  1255    // is listening on the bound address.
  1256    EXPECT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1257                SyscallFailsWithErrno(EINPROGRESS));
  1258  
  1259    // Wait for the connect to fail.
  1260    struct pollfd poll_fd = {s.get(), POLLERR, 0};
  1261    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMillis),
  1262                SyscallSucceedsWithValue(1));
  1263    return std::move(s);
  1264  }
  1265  
  1266  TEST_P(SimpleTcpSocketTest, NonBlockingConnectNoListener) {
  1267    sockaddr_storage addr =
  1268        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  1269    socklen_t addrlen = sizeof(addr);
  1270  
  1271    const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
  1272        nonBlockingConnectNoListener(GetParam(), addr, addrlen));
  1273    ASSERT_NE(GetPort(addr).value(), 0);
  1274  
  1275    int err;
  1276    socklen_t optlen = sizeof(err);
  1277    ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ERROR, &err, &optlen),
  1278                SyscallSucceeds());
  1279    ASSERT_EQ(optlen, sizeof(err));
  1280    EXPECT_EQ(err, ECONNREFUSED);
  1281  
  1282    unsigned char c;
  1283    ASSERT_THAT(read(s.get(), &c, sizeof(c)), SyscallSucceedsWithValue(0));
  1284    int opts;
  1285    EXPECT_THAT(opts = fcntl(s.get(), F_GETFL), SyscallSucceeds());
  1286    opts &= ~O_NONBLOCK;
  1287    EXPECT_THAT(fcntl(s.get(), F_SETFL, opts), SyscallSucceeds());
  1288    // Try connecting again.
  1289    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1290                SyscallFailsWithErrno(ECONNABORTED));
  1291  }
  1292  
  1293  TEST_P(SimpleTcpSocketTest, ListenConnectParallel) {
  1294    // TODO(b/171436815): Re-enable when S/R is fixed.
  1295    const DisableSave disable_save;
  1296    int family = GetParam();
  1297    sockaddr_storage addr =
  1298        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  1299    socklen_t addrlen = sizeof(addr);
  1300    constexpr int sock_type = SOCK_STREAM;
  1301  
  1302    FileDescriptor l =
  1303        ASSERT_NO_ERRNO_AND_VALUE(Socket(family, sock_type, IPPROTO_TCP));
  1304    EXPECT_THAT(bind(l.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds());
  1305  
  1306    // Get the address bound by the listening socket.
  1307    EXPECT_THAT(getsockname(l.get(), AsSockAddr(&addr), &addrlen),
  1308                SyscallSucceeds());
  1309  
  1310    constexpr int num_threads = 100;
  1311    ScopedThread t([&l]() {
  1312      absl::SleepFor(absl::Microseconds(1000));
  1313      EXPECT_THAT(listen(l.get(), num_threads), SyscallSucceeds());
  1314    });
  1315  
  1316    // Initiate connects in a separate thread.
  1317    std::vector<std::unique_ptr<ScopedThread>> threads;
  1318    threads.reserve(num_threads);
  1319    for (int i = 0; i < num_threads; i++) {
  1320      threads.push_back(
  1321          std::make_unique<ScopedThread>([&addr, &addrlen, family]() {
  1322            const FileDescriptor c = ASSERT_NO_ERRNO_AND_VALUE(
  1323                Socket(family, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
  1324  
  1325            // Now connect to the bound address and this should fail as nothing
  1326            // is listening on the bound address.
  1327            EXPECT_THAT(RetryEINTR(connect)(c.get(), AsSockAddr(&addr), addrlen),
  1328                        SyscallFailsWithErrno(EINPROGRESS));
  1329            // Wait for the connect to fail or succeed as it can race with the
  1330            // socket listening.
  1331            struct pollfd poll_fd = {c.get(), POLLERR | POLLOUT, 0};
  1332            EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 1000),
  1333                        SyscallSucceedsWithValue(1));
  1334          }));
  1335    }
  1336  }
  1337  
  1338  TEST_P(SimpleTcpSocketTest, NonBlockingConnectNoListenerRead) {
  1339    sockaddr_storage addr =
  1340        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  1341    socklen_t addrlen = sizeof(addr);
  1342  
  1343    const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
  1344        nonBlockingConnectNoListener(GetParam(), addr, addrlen));
  1345    ASSERT_NE(GetPort(addr).value(), 0);
  1346  
  1347    unsigned char c;
  1348    ASSERT_THAT(read(s.get(), &c, 1), SyscallFailsWithErrno(ECONNREFUSED));
  1349    ASSERT_THAT(read(s.get(), &c, 1), SyscallSucceedsWithValue(0));
  1350    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1351                SyscallFailsWithErrno(ECONNABORTED));
  1352  }
  1353  
  1354  TEST_P(SimpleTcpSocketTest, NonBlockingConnectNoListenerPeek) {
  1355    sockaddr_storage addr =
  1356        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  1357    socklen_t addrlen = sizeof(addr);
  1358  
  1359    const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
  1360        nonBlockingConnectNoListener(GetParam(), addr, addrlen));
  1361    ASSERT_NE(GetPort(addr).value(), 0);
  1362  
  1363    unsigned char c;
  1364    ASSERT_THAT(recv(s.get(), &c, 1, MSG_PEEK),
  1365                SyscallFailsWithErrno(ECONNREFUSED));
  1366    ASSERT_THAT(recv(s.get(), &c, 1, MSG_PEEK), SyscallSucceedsWithValue(0));
  1367    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1368                SyscallFailsWithErrno(ECONNABORTED));
  1369  }
  1370  
  1371  TEST_P(SimpleTcpSocketTest, SelfConnectSendRecv) {
  1372    // Initialize address to the loopback one.
  1373    sockaddr_storage addr =
  1374        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  1375    socklen_t addrlen = sizeof(addr);
  1376  
  1377    const FileDescriptor s =
  1378        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1379  
  1380    ASSERT_THAT((bind)(s.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds());
  1381    // Get the bound port.
  1382    ASSERT_THAT(getsockname(s.get(), AsSockAddr(&addr), &addrlen),
  1383                SyscallSucceeds());
  1384    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1385                SyscallSucceeds());
  1386  
  1387    constexpr int kBufSz = 1 << 20;  // 1 MiB
  1388    std::vector<char> writebuf(kBufSz);
  1389  
  1390    // Start reading the response in a loop.
  1391    int read_bytes = 0;
  1392    ScopedThread t([&s, &read_bytes]() {
  1393      // Too many syscalls.
  1394      const DisableSave disable_save;
  1395  
  1396      char readbuf[2500] = {};
  1397      int n = -1;
  1398      while (n != 0) {
  1399        ASSERT_THAT(n = RetryEINTR(read)(s.get(), &readbuf, sizeof(readbuf)),
  1400                    SyscallSucceeds());
  1401        read_bytes += n;
  1402      }
  1403    });
  1404  
  1405    // Try to send the whole thing.
  1406    int n;
  1407    ASSERT_THAT(n = SendFd(s.get(), writebuf.data(), kBufSz, 0),
  1408                SyscallSucceeds());
  1409  
  1410    // We should have written the whole thing.
  1411    EXPECT_EQ(n, kBufSz);
  1412    EXPECT_THAT(shutdown(s.get(), SHUT_WR), SyscallSucceedsWithValue(0));
  1413    t.Join();
  1414  
  1415    // We should have read the whole thing.
  1416    EXPECT_EQ(read_bytes, kBufSz);
  1417  }
  1418  
  1419  TEST_P(SimpleTcpSocketTest, SelfConnectSend) {
  1420    // Initialize address to the loopback one.
  1421    sockaddr_storage addr =
  1422        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  1423    socklen_t addrlen = sizeof(addr);
  1424  
  1425    const FileDescriptor s =
  1426        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1427  
  1428    constexpr int max_seg = 256;
  1429    ASSERT_THAT(
  1430        setsockopt(s.get(), SOL_TCP, TCP_MAXSEG, &max_seg, sizeof(max_seg)),
  1431        SyscallSucceeds());
  1432  
  1433    ASSERT_THAT(bind(s.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds());
  1434    // Get the bound port.
  1435    ASSERT_THAT(getsockname(s.get(), AsSockAddr(&addr), &addrlen),
  1436                SyscallSucceeds());
  1437    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1438                SyscallSucceeds());
  1439  
  1440    // Ensure the write buffer is large enough not to block on a single write.
  1441    size_t write_size = 128 << 10;  // 128 KiB.
  1442    EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_SNDBUF, &write_size,
  1443                           sizeof(write_size)),
  1444                SyscallSucceedsWithValue(0));
  1445  
  1446    std::vector<char> writebuf(write_size);
  1447  
  1448    // Try to send the whole thing.
  1449    int n;
  1450    ASSERT_THAT(n = SendFd(s.get(), writebuf.data(), writebuf.size(), 0),
  1451                SyscallSucceeds());
  1452  
  1453    // We should have written the whole thing.
  1454    EXPECT_EQ(n, writebuf.size());
  1455    EXPECT_THAT(shutdown(s.get(), SHUT_WR), SyscallSucceedsWithValue(0));
  1456  }
  1457  
  1458  TEST_P(SimpleTcpSocketTest, SelfConnectSendShutdownWrite) {
  1459    // Initialize address to the loopback one.
  1460    sockaddr_storage addr =
  1461        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  1462    socklen_t addrlen = sizeof(addr);
  1463  
  1464    const FileDescriptor s =
  1465        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1466  
  1467    ASSERT_THAT(bind(s.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds());
  1468    // Get the bound port.
  1469    ASSERT_THAT(getsockname(s.get(), AsSockAddr(&addr), &addrlen),
  1470                SyscallSucceeds());
  1471    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1472                SyscallSucceeds());
  1473  
  1474    // Write enough data to fill send and receive buffers.
  1475    size_t write_size = 24 << 20;  // 24 MiB.
  1476    std::vector<char> writebuf(write_size);
  1477  
  1478    ScopedThread t([&s]() {
  1479      absl::SleepFor(absl::Milliseconds(250));
  1480      ASSERT_THAT(shutdown(s.get(), SHUT_WR), SyscallSucceeds());
  1481    });
  1482  
  1483    // Try to send the whole thing.
  1484    int n;
  1485    ASSERT_THAT(n = SendFd(s.get(), writebuf.data(), writebuf.size(), 0),
  1486                SyscallFailsWithErrno(EPIPE));
  1487  }
  1488  
  1489  TEST_P(SimpleTcpSocketTest, SelfConnectRecvShutdownRead) {
  1490    // Initialize address to the loopback one.
  1491    sockaddr_storage addr =
  1492        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  1493    socklen_t addrlen = sizeof(addr);
  1494  
  1495    const FileDescriptor s =
  1496        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1497  
  1498    ASSERT_THAT(bind(s.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds());
  1499    // Get the bound port.
  1500    ASSERT_THAT(getsockname(s.get(), AsSockAddr(&addr), &addrlen),
  1501                SyscallSucceeds());
  1502    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1503                SyscallSucceeds());
  1504  
  1505    ScopedThread t([&s]() {
  1506      absl::SleepFor(absl::Milliseconds(250));
  1507      ASSERT_THAT(shutdown(s.get(), SHUT_RD), SyscallSucceeds());
  1508    });
  1509  
  1510    char buf[1];
  1511    EXPECT_THAT(recv(s.get(), buf, 0, 0), SyscallSucceedsWithValue(0));
  1512  }
  1513  
  1514  void NonBlockingConnect(int family, int16_t pollMask) {
  1515    const FileDescriptor listener =
  1516        ASSERT_NO_ERRNO_AND_VALUE(Socket(family, SOCK_STREAM, IPPROTO_TCP));
  1517  
  1518    // Initialize address to the loopback one.
  1519    sockaddr_storage addr =
  1520        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(family));
  1521    socklen_t addrlen = sizeof(addr);
  1522  
  1523    // Bind to some port then start listening.
  1524    ASSERT_THAT(bind(listener.get(), AsSockAddr(&addr), addrlen),
  1525                SyscallSucceeds());
  1526  
  1527    ASSERT_THAT(listen(listener.get(), SOMAXCONN), SyscallSucceeds());
  1528  
  1529    FileDescriptor s =
  1530        ASSERT_NO_ERRNO_AND_VALUE(Socket(family, SOCK_STREAM, IPPROTO_TCP));
  1531  
  1532    // Set the FD to O_NONBLOCK.
  1533    int opts;
  1534    ASSERT_THAT(opts = fcntl(s.get(), F_GETFL), SyscallSucceeds());
  1535    opts |= O_NONBLOCK;
  1536    ASSERT_THAT(fcntl(s.get(), F_SETFL, opts), SyscallSucceeds());
  1537  
  1538    ASSERT_THAT(getsockname(listener.get(), AsSockAddr(&addr), &addrlen),
  1539                SyscallSucceeds());
  1540  
  1541    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1542                SyscallFailsWithErrno(EINPROGRESS));
  1543  
  1544    int t;
  1545    ASSERT_THAT(t = RetryEINTR(accept)(listener.get(), nullptr, nullptr),
  1546                SyscallSucceeds());
  1547  
  1548    struct pollfd poll_fd = {s.get(), pollMask, 0};
  1549    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMillis),
  1550                SyscallSucceedsWithValue(1));
  1551  
  1552    int err;
  1553    socklen_t optlen = sizeof(err);
  1554    ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ERROR, &err, &optlen),
  1555                SyscallSucceeds());
  1556    ASSERT_EQ(optlen, sizeof(err));
  1557  
  1558    EXPECT_EQ(err, 0);
  1559  
  1560    EXPECT_THAT(close(t), SyscallSucceeds());
  1561  }
  1562  
  1563  TEST_P(SimpleTcpSocketTest, NonBlockingConnect_PollOut) {
  1564    NonBlockingConnect(GetParam(), POLLOUT);
  1565  }
  1566  
  1567  TEST_P(SimpleTcpSocketTest, NonBlockingConnect_PollWrNorm) {
  1568    NonBlockingConnect(GetParam(), POLLWRNORM);
  1569  }
  1570  
  1571  TEST_P(SimpleTcpSocketTest, NonBlockingConnect_PollWrNorm_PollOut) {
  1572    NonBlockingConnect(GetParam(), POLLWRNORM | POLLOUT);
  1573  }
  1574  
  1575  TEST_P(SimpleTcpSocketTest, NonBlockingConnectRemoteClose) {
  1576    const FileDescriptor listener =
  1577        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1578  
  1579    // Initialize address to the loopback one.
  1580    sockaddr_storage addr =
  1581        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  1582    socklen_t addrlen = sizeof(addr);
  1583  
  1584    // Bind to some port then start listening.
  1585    ASSERT_THAT(bind(listener.get(), AsSockAddr(&addr), addrlen),
  1586                SyscallSucceeds());
  1587  
  1588    ASSERT_THAT(listen(listener.get(), SOMAXCONN), SyscallSucceeds());
  1589  
  1590    FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
  1591        Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
  1592  
  1593    ASSERT_THAT(getsockname(listener.get(), AsSockAddr(&addr), &addrlen),
  1594                SyscallSucceeds());
  1595  
  1596    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1597                SyscallFailsWithErrno(EINPROGRESS));
  1598  
  1599    int t;
  1600    ASSERT_THAT(t = RetryEINTR(accept)(listener.get(), nullptr, nullptr),
  1601                SyscallSucceeds());
  1602  
  1603    EXPECT_THAT(close(t), SyscallSucceeds());
  1604  
  1605    // Now polling on the FD with a timeout should return 0 corresponding to no
  1606    // FDs ready.
  1607    struct pollfd poll_fd = {s.get(), POLLOUT, 0};
  1608    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMillis),
  1609                SyscallSucceedsWithValue(1));
  1610  
  1611    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1612                SyscallSucceeds());
  1613  
  1614    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1615                SyscallFailsWithErrno(EISCONN));
  1616  }
  1617  
  1618  // Test that we get an ECONNREFUSED with a blocking socket when no one is
  1619  // listening on the other end.
  1620  TEST_P(SimpleTcpSocketTest, BlockingConnectRefused) {
  1621    FileDescriptor s =
  1622        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1623  
  1624    // Initialize address to the loopback one.
  1625    sockaddr_storage addr =
  1626        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  1627    socklen_t addrlen = sizeof(addr);
  1628  
  1629    auto reservation = ReserveLocalPort(GetParam(), addr, addrlen);
  1630    ASSERT_NE(GetPort(addr).value(), 0);
  1631  
  1632    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1633                SyscallFailsWithErrno(ECONNREFUSED));
  1634  
  1635    // Avoiding triggering save in destructor of s.
  1636    EXPECT_THAT(close(s.release()), SyscallSucceeds());
  1637  }
  1638  
  1639  // Test that connecting to a non-listening port and thus receiving a RST is
  1640  // handled appropriately by the socket - the port that the socket was bound to
  1641  // is released and the expected error is returned.
  1642  TEST_P(SimpleTcpSocketTest, CleanupOnConnectionRefused) {
  1643    // Create a socket that is known to not be listening. As is it bound but not
  1644    // listening, when another socket connects to the port, it will refuse..
  1645    FileDescriptor bound_s =
  1646        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1647  
  1648    sockaddr_storage bound_addr =
  1649        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  1650    socklen_t bound_addrlen = sizeof(bound_addr);
  1651  
  1652    ASSERT_THAT(bind(bound_s.get(), AsSockAddr(&bound_addr), bound_addrlen),
  1653                SyscallSucceeds());
  1654  
  1655    // Get the addresses the socket is bound to because the port is chosen by the
  1656    // stack.
  1657    ASSERT_THAT(
  1658        getsockname(bound_s.get(), AsSockAddr(&bound_addr), &bound_addrlen),
  1659        SyscallSucceeds());
  1660  
  1661    // Create, initialize, and bind the socket that is used to test connecting to
  1662    // the non-listening port.
  1663    FileDescriptor client_s =
  1664        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1665    // Initialize client address to the loopback one.
  1666    sockaddr_storage client_addr =
  1667        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  1668    socklen_t client_addrlen = sizeof(client_addr);
  1669  
  1670    ASSERT_THAT(bind(client_s.get(), AsSockAddr(&client_addr), client_addrlen),
  1671                SyscallSucceeds());
  1672  
  1673    ASSERT_THAT(
  1674        getsockname(client_s.get(), AsSockAddr(&client_addr), &client_addrlen),
  1675        SyscallSucceeds());
  1676  
  1677    // Now the test: connect to the bound but not listening socket with the
  1678    // client socket. The bound socket should return a RST and cause the client
  1679    // socket to return an error and clean itself up immediately.
  1680    // The error being ECONNREFUSED diverges with RFC 793, page 37, but does what
  1681    // Linux does.
  1682    ASSERT_THAT(
  1683        RetryEINTR(connect)(client_s.get(),
  1684                            reinterpret_cast<const struct sockaddr*>(&bound_addr),
  1685                            bound_addrlen),
  1686        SyscallFailsWithErrno(ECONNREFUSED));
  1687  
  1688    FileDescriptor new_s =
  1689        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1690  
  1691    // Test binding to the address from the client socket. This should be okay
  1692    // if it was dropped correctly.
  1693    ASSERT_THAT(bind(new_s.get(), AsSockAddr(&client_addr), client_addrlen),
  1694                SyscallSucceeds());
  1695  
  1696    // Attempt #2, with the new socket and reused addr our connect should fail in
  1697    // the same way as before, not with an EADDRINUSE.
  1698    //
  1699    // TODO(gvisor.dev/issue/3828): 2nd connect on a socket which failed connect
  1700    //   first time should succeed.
  1701    // gVisor never issues the second connect and returns ECONNABORTED instead.
  1702    // Linux actually sends a SYN again and gets a RST and correctly returns
  1703    // ECONNREFUSED.
  1704    if (IsRunningOnGvisor()) {
  1705      ASSERT_THAT(RetryEINTR(connect)(
  1706                      client_s.get(),
  1707                      reinterpret_cast<const struct sockaddr*>(&bound_addr),
  1708                      bound_addrlen),
  1709                  SyscallFailsWithErrno(ECONNABORTED));
  1710      return;
  1711    }
  1712    ASSERT_THAT(
  1713        RetryEINTR(connect)(client_s.get(),
  1714                            reinterpret_cast<const struct sockaddr*>(&bound_addr),
  1715                            bound_addrlen),
  1716        SyscallFailsWithErrno(ECONNREFUSED));
  1717  }
  1718  
  1719  // Test that we get an ECONNREFUSED with a nonblocking socket.
  1720  TEST_P(SimpleTcpSocketTest, NonBlockingConnectRefused) {
  1721    FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
  1722        Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
  1723  
  1724    // Initialize address to the loopback one.
  1725    sockaddr_storage addr =
  1726        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  1727    socklen_t addrlen = sizeof(addr);
  1728    auto reservation = ReserveLocalPort(GetParam(), addr, addrlen);
  1729    ASSERT_NE(GetPort(addr).value(), 0);
  1730  
  1731    ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  1732                SyscallFailsWithErrno(EINPROGRESS));
  1733  
  1734    // We don't need to specify any events to get POLLHUP or POLLERR as these
  1735    // are added before the poll.
  1736    struct pollfd poll_fd = {s.get(), /*events=*/0, 0};
  1737    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMillis),
  1738                SyscallSucceedsWithValue(1));
  1739  
  1740    // The ECONNREFUSED should cause us to be woken up with POLLHUP.
  1741    EXPECT_NE(poll_fd.revents & (POLLHUP | POLLERR), 0);
  1742  
  1743    // Avoiding triggering save in destructor of s.
  1744    EXPECT_THAT(close(s.release()), SyscallSucceeds());
  1745  }
  1746  
  1747  // Test that setting a supported congestion control algorithm succeeds for an
  1748  // unconnected TCP socket
  1749  TEST_P(SimpleTcpSocketTest, SetCongestionControlSucceedsForSupported) {
  1750    // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
  1751    const int kTcpCaNameMax = 16;
  1752  
  1753    FileDescriptor s =
  1754        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1755    {
  1756      const char kSetCC[kTcpCaNameMax] = "reno";
  1757      ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC,
  1758                             strlen(kSetCC)),
  1759                  SyscallSucceedsWithValue(0));
  1760  
  1761      char got_cc[kTcpCaNameMax];
  1762      memset(got_cc, '1', sizeof(got_cc));
  1763      socklen_t optlen = sizeof(got_cc);
  1764      ASSERT_THAT(
  1765          getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
  1766          SyscallSucceedsWithValue(0));
  1767      // We ignore optlen here as the linux kernel sets optlen to the lower of the
  1768      // size of the buffer passed in or kTcpCaNameMax and not the length of the
  1769      // congestion control algorithm's actual name.
  1770      EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kTcpCaNameMax)));
  1771    }
  1772    {
  1773      const char kSetCC[kTcpCaNameMax] = "cubic";
  1774      ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC,
  1775                             strlen(kSetCC)),
  1776                  SyscallSucceedsWithValue(0));
  1777  
  1778      char got_cc[kTcpCaNameMax];
  1779      memset(got_cc, '1', sizeof(got_cc));
  1780      socklen_t optlen = sizeof(got_cc);
  1781      ASSERT_THAT(
  1782          getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
  1783          SyscallSucceedsWithValue(0));
  1784      // We ignore optlen here as the linux kernel sets optlen to the lower of the
  1785      // size of the buffer passed in or kTcpCaNameMax and not the length of the
  1786      // congestion control algorithm's actual name.
  1787      EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kTcpCaNameMax)));
  1788    }
  1789  }
  1790  
  1791  // This test verifies that a getsockopt(...TCP_CONGESTION) behaviour is
  1792  // consistent between linux and gvisor when the passed in buffer is smaller than
  1793  // kTcpCaNameMax.
  1794  TEST_P(SimpleTcpSocketTest, SetGetTCPCongestionShortReadBuffer) {
  1795    FileDescriptor s =
  1796        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1797    {
  1798      // Verify that getsockopt/setsockopt work with buffers smaller than
  1799      // kTcpCaNameMax.
  1800      const char kSetCC[] = "cubic";
  1801      ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC,
  1802                             strlen(kSetCC)),
  1803                  SyscallSucceedsWithValue(0));
  1804  
  1805      char got_cc[sizeof(kSetCC)];
  1806      socklen_t optlen = sizeof(got_cc);
  1807      ASSERT_THAT(
  1808          getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
  1809          SyscallSucceedsWithValue(0));
  1810      EXPECT_EQ(sizeof(got_cc), optlen);
  1811      EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(got_cc)));
  1812    }
  1813  }
  1814  
  1815  // This test verifies that a getsockopt(...TCP_CONGESTION) behaviour is
  1816  // consistent between linux and gvisor when the passed in buffer is larger than
  1817  // kTcpCaNameMax.
  1818  TEST_P(SimpleTcpSocketTest, SetGetTCPCongestionLargeReadBuffer) {
  1819    // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
  1820    const int kTcpCaNameMax = 16;
  1821  
  1822    FileDescriptor s =
  1823        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1824    {
  1825      // Verify that getsockopt works with buffers larger than
  1826      // kTcpCaNameMax.
  1827      const char kSetCC[] = "cubic";
  1828      ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC,
  1829                             strlen(kSetCC)),
  1830                  SyscallSucceedsWithValue(0));
  1831  
  1832      char got_cc[kTcpCaNameMax + 5];
  1833      socklen_t optlen = sizeof(got_cc);
  1834      ASSERT_THAT(
  1835          getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
  1836          SyscallSucceedsWithValue(0));
  1837      // Linux copies the minimum of kTcpCaNameMax or the length of the passed in
  1838      // buffer and sets optlen to the number of bytes actually copied
  1839      // irrespective of the actual length of the congestion control name.
  1840      EXPECT_EQ(kTcpCaNameMax, optlen);
  1841      EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kSetCC)));
  1842    }
  1843  }
  1844  
  1845  // Test that setting an unsupported congestion control algorithm fails for an
  1846  // unconnected TCP socket.
  1847  TEST_P(SimpleTcpSocketTest, SetCongestionControlFailsForUnsupported) {
  1848    // This is Linux's net/tcp.h TCP_CA_NAME_MAX.
  1849    const int kTcpCaNameMax = 16;
  1850  
  1851    FileDescriptor s =
  1852        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1853    char old_cc[kTcpCaNameMax];
  1854    socklen_t optlen = sizeof(old_cc);
  1855    ASSERT_THAT(
  1856        getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &old_cc, &optlen),
  1857        SyscallSucceedsWithValue(0));
  1858  
  1859    const char kSetCC[] = "invalid_ca_kSetCC";
  1860    ASSERT_THAT(
  1861        setsockopt(s.get(), SOL_TCP, TCP_CONGESTION, &kSetCC, strlen(kSetCC)),
  1862        SyscallFailsWithErrno(ENOENT));
  1863  
  1864    char got_cc[kTcpCaNameMax];
  1865    ASSERT_THAT(
  1866        getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen),
  1867        SyscallSucceedsWithValue(0));
  1868    // We ignore optlen here as the linux kernel sets optlen to the lower of the
  1869    // size of the buffer passed in or kTcpCaNameMax and not the length of the
  1870    // congestion control algorithm's actual name.
  1871    EXPECT_EQ(0, memcmp(got_cc, old_cc, sizeof(kTcpCaNameMax)));
  1872  }
  1873  
  1874  TEST_P(SimpleTcpSocketTest, MaxSegDefault) {
  1875    FileDescriptor s =
  1876        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1877  
  1878    constexpr int kDefaultMSS = 536;
  1879    int tcp_max_seg;
  1880    socklen_t optlen = sizeof(tcp_max_seg);
  1881    ASSERT_THAT(
  1882        getsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg, &optlen),
  1883        SyscallSucceedsWithValue(0));
  1884  
  1885    EXPECT_EQ(kDefaultMSS, tcp_max_seg);
  1886    EXPECT_EQ(sizeof(tcp_max_seg), optlen);
  1887  }
  1888  
  1889  TEST_P(SimpleTcpSocketTest, SetMaxSeg) {
  1890    FileDescriptor s =
  1891        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1892  
  1893    constexpr int kDefaultMSS = 536;
  1894    constexpr int kTCPMaxSeg = 1024;
  1895    ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &kTCPMaxSeg,
  1896                           sizeof(kTCPMaxSeg)),
  1897                SyscallSucceedsWithValue(0));
  1898  
  1899    int optval;
  1900    socklen_t optlen = sizeof(optval);
  1901    ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &optval, &optlen),
  1902                SyscallSucceedsWithValue(0));
  1903    ASSERT_EQ(optlen, sizeof(optval));
  1904  
  1905    // In older Linux versions, user_mss value was never actually returned. Linux
  1906    // would always return the default MSS value for an unconnected socket and
  1907    // always return the actual current MSS for a connected one. However, the
  1908    // behavior changed since 34dfde4ad87b ("tcp: Return user_mss for TCP_MAXSEG
  1909    // in CLOSE/LISTEN state if user_mss set"). With this change, user_mss is
  1910    // returned if set for unconnected sockets. So allow both.
  1911    EXPECT_THAT(optval, AnyOf(kDefaultMSS, kTCPMaxSeg));
  1912  }
  1913  
  1914  TEST_P(SimpleTcpSocketTest, SetMaxSegFailsForInvalidMSSValues) {
  1915    FileDescriptor s =
  1916        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1917  
  1918    {
  1919      constexpr int tcp_max_seg = 10;
  1920      ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg,
  1921                             sizeof(tcp_max_seg)),
  1922                  SyscallFailsWithErrno(EINVAL));
  1923    }
  1924    {
  1925      constexpr int tcp_max_seg = 75000;
  1926      ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg,
  1927                             sizeof(tcp_max_seg)),
  1928                  SyscallFailsWithErrno(EINVAL));
  1929    }
  1930  }
  1931  
  1932  TEST_P(SimpleTcpSocketTest, SetTCPUserTimeout) {
  1933    FileDescriptor s =
  1934        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1935  
  1936    {
  1937      constexpr int kTCPUserTimeout = -1;
  1938      EXPECT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT,
  1939                             &kTCPUserTimeout, sizeof(kTCPUserTimeout)),
  1940                  SyscallFailsWithErrno(EINVAL));
  1941    }
  1942  
  1943    // kTCPUserTimeout is in milliseconds.
  1944    constexpr int kTCPUserTimeout = 100;
  1945    ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT,
  1946                           &kTCPUserTimeout, sizeof(kTCPUserTimeout)),
  1947                SyscallSucceedsWithValue(0));
  1948    int get = -1;
  1949    socklen_t get_len = sizeof(get);
  1950    ASSERT_THAT(
  1951        getsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, &get, &get_len),
  1952        SyscallSucceedsWithValue(0));
  1953    EXPECT_EQ(get_len, sizeof(get));
  1954    EXPECT_EQ(get, kTCPUserTimeout);
  1955  }
  1956  
  1957  TEST_P(SimpleTcpSocketTest, SetTCPDeferAcceptNeg) {
  1958    FileDescriptor s =
  1959        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1960  
  1961    // -ve TCP_DEFER_ACCEPT is same as setting it to zero.
  1962    constexpr int kNeg = -1;
  1963    EXPECT_THAT(
  1964        setsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &kNeg, sizeof(kNeg)),
  1965        SyscallSucceeds());
  1966    int get = -1;
  1967    socklen_t get_len = sizeof(get);
  1968    ASSERT_THAT(
  1969        getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len),
  1970        SyscallSucceedsWithValue(0));
  1971    EXPECT_EQ(get_len, sizeof(get));
  1972    EXPECT_EQ(get, 0);
  1973  }
  1974  
  1975  TEST_P(SimpleTcpSocketTest, GetTCPDeferAcceptDefault) {
  1976    FileDescriptor s =
  1977        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1978  
  1979    int get = -1;
  1980    socklen_t get_len = sizeof(get);
  1981    ASSERT_THAT(
  1982        getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len),
  1983        SyscallSucceedsWithValue(0));
  1984    EXPECT_EQ(get_len, sizeof(get));
  1985    EXPECT_EQ(get, 0);
  1986  }
  1987  
  1988  TEST_P(SimpleTcpSocketTest, SetTCPDeferAcceptGreaterThanZero) {
  1989    FileDescriptor s =
  1990        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  1991    // kTCPDeferAccept is in seconds.
  1992    // NOTE: linux translates seconds to # of retries and back from
  1993    //   #of retries to seconds. Which means only certain values
  1994    //   translate back exactly. That's why we use 3 here, a value of
  1995    //   5 will result in us getting back 7 instead of 5 in the
  1996    //   getsockopt.
  1997    constexpr int kTCPDeferAccept = 3;
  1998    ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT,
  1999                           &kTCPDeferAccept, sizeof(kTCPDeferAccept)),
  2000                SyscallSucceeds());
  2001    int get = -1;
  2002    socklen_t get_len = sizeof(get);
  2003    ASSERT_THAT(
  2004        getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len),
  2005        SyscallSucceeds());
  2006    EXPECT_EQ(get_len, sizeof(get));
  2007    EXPECT_EQ(get, kTCPDeferAccept);
  2008  }
  2009  
  2010  TEST_P(SimpleTcpSocketTest, RecvOnClosedSocket) {
  2011    auto s =
  2012        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  2013    char buf[1];
  2014    EXPECT_THAT(recv(s.get(), buf, 0, 0), SyscallFailsWithErrno(ENOTCONN));
  2015    EXPECT_THAT(recv(s.get(), buf, sizeof(buf), 0),
  2016                SyscallFailsWithErrno(ENOTCONN));
  2017  }
  2018  
  2019  TEST_P(SimpleTcpSocketTest, TCPConnectSoRcvBufRace) {
  2020    auto s = ASSERT_NO_ERRNO_AND_VALUE(
  2021        Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
  2022    sockaddr_storage addr =
  2023        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  2024    socklen_t addrlen = sizeof(addr);
  2025    auto reservation = ReserveLocalPort(GetParam(), addr, addrlen);
  2026    ASSERT_NE(GetPort(addr).value(), 0);
  2027  
  2028    RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen);
  2029    int buf_sz = 1 << 18;
  2030    EXPECT_THAT(
  2031        setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &buf_sz, sizeof(buf_sz)),
  2032        SyscallSucceedsWithValue(0));
  2033  }
  2034  
  2035  TEST_P(SimpleTcpSocketTest, SetTCPSynCntLessThanOne) {
  2036    FileDescriptor s =
  2037        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  2038  
  2039    int get = -1;
  2040    socklen_t get_len = sizeof(get);
  2041    ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
  2042                SyscallSucceedsWithValue(0));
  2043    EXPECT_EQ(get_len, sizeof(get));
  2044    int default_syn_cnt = get;
  2045  
  2046    {
  2047      // TCP_SYNCNT less than 1 should be rejected with an EINVAL.
  2048      constexpr int kZero = 0;
  2049      EXPECT_THAT(
  2050          setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kZero, sizeof(kZero)),
  2051          SyscallFailsWithErrno(EINVAL));
  2052  
  2053      // TCP_SYNCNT less than 1 should be rejected with an EINVAL.
  2054      constexpr int kNeg = -1;
  2055      EXPECT_THAT(
  2056          setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kNeg, sizeof(kNeg)),
  2057          SyscallFailsWithErrno(EINVAL));
  2058  
  2059      int get = -1;
  2060      socklen_t get_len = sizeof(get);
  2061  
  2062      ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
  2063                  SyscallSucceedsWithValue(0));
  2064      EXPECT_EQ(get_len, sizeof(get));
  2065      EXPECT_EQ(default_syn_cnt, get);
  2066    }
  2067  }
  2068  
  2069  TEST_P(SimpleTcpSocketTest, GetTCPSynCntDefault) {
  2070    FileDescriptor s =
  2071        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  2072  
  2073    int get = -1;
  2074    socklen_t get_len = sizeof(get);
  2075    constexpr int kDefaultSynCnt = 6;
  2076  
  2077    ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
  2078                SyscallSucceedsWithValue(0));
  2079    EXPECT_EQ(get_len, sizeof(get));
  2080    EXPECT_EQ(get, kDefaultSynCnt);
  2081  }
  2082  
  2083  TEST_P(SimpleTcpSocketTest, SetTCPSynCntGreaterThanOne) {
  2084    FileDescriptor s =
  2085        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  2086    constexpr int kTCPSynCnt = 20;
  2087    ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kTCPSynCnt,
  2088                           sizeof(kTCPSynCnt)),
  2089                SyscallSucceeds());
  2090  
  2091    int get = -1;
  2092    socklen_t get_len = sizeof(get);
  2093    ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
  2094                SyscallSucceeds());
  2095    EXPECT_EQ(get_len, sizeof(get));
  2096    EXPECT_EQ(get, kTCPSynCnt);
  2097  }
  2098  
  2099  TEST_P(SimpleTcpSocketTest, SetTCPSynCntAboveMax) {
  2100    FileDescriptor s =
  2101        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  2102    int get = -1;
  2103    socklen_t get_len = sizeof(get);
  2104    ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
  2105                SyscallSucceedsWithValue(0));
  2106    EXPECT_EQ(get_len, sizeof(get));
  2107    int default_syn_cnt = get;
  2108    {
  2109      constexpr int kTCPSynCnt = 256;
  2110      ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kTCPSynCnt,
  2111                             sizeof(kTCPSynCnt)),
  2112                  SyscallFailsWithErrno(EINVAL));
  2113  
  2114      int get = -1;
  2115      socklen_t get_len = sizeof(get);
  2116      ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len),
  2117                  SyscallSucceeds());
  2118      EXPECT_EQ(get_len, sizeof(get));
  2119      EXPECT_EQ(get, default_syn_cnt);
  2120    }
  2121  }
  2122  
  2123  TEST_P(SimpleTcpSocketTest, SetTCPWindowClampBelowMinRcvBuf) {
  2124    FileDescriptor s =
  2125        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  2126  
  2127    // Discover minimum receive buf by setting a really low value
  2128    // for the receive buffer.
  2129    constexpr int kZero = 0;
  2130    EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &kZero, sizeof(kZero)),
  2131                SyscallSucceeds());
  2132  
  2133    // Now retrieve the minimum value for SO_RCVBUF as the set above should
  2134    // have caused SO_RCVBUF for the socket to be set to the minimum.
  2135    int get = -1;
  2136    socklen_t get_len = sizeof(get);
  2137    ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &get, &get_len),
  2138                SyscallSucceedsWithValue(0));
  2139    EXPECT_EQ(get_len, sizeof(get));
  2140    int min_so_rcvbuf = get;
  2141  
  2142    {
  2143      // TCP_WINDOW_CLAMP less than min_so_rcvbuf/2 should be set to
  2144      // min_so_rcvbuf/2.
  2145      int below_half_min_rcvbuf = min_so_rcvbuf / 2 - 1;
  2146      EXPECT_THAT(
  2147          setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
  2148                     &below_half_min_rcvbuf, sizeof(below_half_min_rcvbuf)),
  2149          SyscallSucceeds());
  2150  
  2151      int get = -1;
  2152      socklen_t get_len = sizeof(get);
  2153  
  2154      ASSERT_THAT(
  2155          getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len),
  2156          SyscallSucceedsWithValue(0));
  2157      EXPECT_EQ(get_len, sizeof(get));
  2158      EXPECT_EQ(min_so_rcvbuf / 2, get);
  2159    }
  2160  }
  2161  
  2162  TEST_P(SimpleTcpSocketTest, SetTCPWindowClampZeroClosedSocket) {
  2163    FileDescriptor s =
  2164        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  2165    constexpr int kZero = 0;
  2166    ASSERT_THAT(
  2167        setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &kZero, sizeof(kZero)),
  2168        SyscallSucceeds());
  2169  
  2170    int get = -1;
  2171    socklen_t get_len = sizeof(get);
  2172    ASSERT_THAT(
  2173        getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len),
  2174        SyscallSucceeds());
  2175    EXPECT_EQ(get_len, sizeof(get));
  2176    EXPECT_EQ(get, kZero);
  2177  }
  2178  
  2179  TEST_P(SimpleTcpSocketTest, SetTCPWindowClampAboveHalfMinRcvBuf) {
  2180    FileDescriptor s =
  2181        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  2182  
  2183    // Discover minimum receive buf by setting a really low value
  2184    // for the receive buffer.
  2185    constexpr int kZero = 0;
  2186    EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &kZero, sizeof(kZero)),
  2187                SyscallSucceeds());
  2188  
  2189    // Now retrieve the minimum value for SO_RCVBUF as the set above should
  2190    // have caused SO_RCVBUF for the socket to be set to the minimum.
  2191    int get = -1;
  2192    socklen_t get_len = sizeof(get);
  2193    ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &get, &get_len),
  2194                SyscallSucceedsWithValue(0));
  2195    EXPECT_EQ(get_len, sizeof(get));
  2196    int min_so_rcvbuf = get;
  2197  
  2198    {
  2199      int above_half_min_rcv_buf = min_so_rcvbuf / 2 + 1;
  2200      EXPECT_THAT(
  2201          setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP,
  2202                     &above_half_min_rcv_buf, sizeof(above_half_min_rcv_buf)),
  2203          SyscallSucceeds());
  2204  
  2205      int get = -1;
  2206      socklen_t get_len = sizeof(get);
  2207  
  2208      ASSERT_THAT(
  2209          getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len),
  2210          SyscallSucceedsWithValue(0));
  2211      EXPECT_EQ(get_len, sizeof(get));
  2212      EXPECT_EQ(above_half_min_rcv_buf, get);
  2213    }
  2214  }
  2215  
  2216  #ifdef __linux__
  2217  
  2218  // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER.
  2219  // gVisor currently silently ignores attaching a filter.
  2220  TEST_P(SimpleTcpSocketTest, SetSocketAttachDetachFilter) {
  2221    FileDescriptor s =
  2222        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  2223    // Program generated using sudo tcpdump -i lo tcp and port 1234 -dd
  2224    struct sock_filter code[] = {
  2225        {0x28, 0, 0, 0x0000000c},  {0x15, 0, 6, 0x000086dd},
  2226        {0x30, 0, 0, 0x00000014},  {0x15, 0, 15, 0x00000006},
  2227        {0x28, 0, 0, 0x00000036},  {0x15, 12, 0, 0x000004d2},
  2228        {0x28, 0, 0, 0x00000038},  {0x15, 10, 11, 0x000004d2},
  2229        {0x15, 0, 10, 0x00000800}, {0x30, 0, 0, 0x00000017},
  2230        {0x15, 0, 8, 0x00000006},  {0x28, 0, 0, 0x00000014},
  2231        {0x45, 6, 0, 0x00001fff},  {0xb1, 0, 0, 0x0000000e},
  2232        {0x48, 0, 0, 0x0000000e},  {0x15, 2, 0, 0x000004d2},
  2233        {0x48, 0, 0, 0x00000010},  {0x15, 0, 1, 0x000004d2},
  2234        {0x6, 0, 0, 0x00040000},   {0x6, 0, 0, 0x00000000},
  2235    };
  2236    struct sock_fprog bpf = {
  2237        .len = ABSL_ARRAYSIZE(code),
  2238        .filter = code,
  2239    };
  2240    ASSERT_THAT(
  2241        setsockopt(s.get(), SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)),
  2242        SyscallSucceeds());
  2243  
  2244    constexpr int val = 0;
  2245    ASSERT_THAT(
  2246        setsockopt(s.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)),
  2247        SyscallSucceeds());
  2248  }
  2249  
  2250  #endif  // __linux__
  2251  
  2252  TEST_P(SimpleTcpSocketTest, SetSocketDetachFilterNoInstalledFilter) {
  2253    // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER.
  2254    SKIP_IF(IsRunningOnGvisor());
  2255    FileDescriptor s =
  2256        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  2257    constexpr int val = 0;
  2258    ASSERT_THAT(
  2259        setsockopt(s.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)),
  2260        SyscallFailsWithErrno(ENOENT));
  2261  }
  2262  
  2263  TEST_P(SimpleTcpSocketTest, GetSocketDetachFilter) {
  2264    FileDescriptor s =
  2265        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  2266  
  2267    int val = 0;
  2268    socklen_t val_len = sizeof(val);
  2269    ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, &val_len),
  2270                SyscallFailsWithErrno(ENOPROTOOPT));
  2271  }
  2272  
  2273  TEST_P(SimpleTcpSocketTest, CloseNonConnectedLingerOption) {
  2274    FileDescriptor s =
  2275        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  2276  
  2277    constexpr int kLingerTimeout = 10;  // Seconds.
  2278  
  2279    // Set the SO_LINGER option.
  2280    struct linger sl = {
  2281        .l_onoff = 1,
  2282        .l_linger = kLingerTimeout,
  2283    };
  2284    ASSERT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)),
  2285                SyscallSucceeds());
  2286  
  2287    struct pollfd poll_fd = {
  2288        .fd = s.get(),
  2289        .events = POLLHUP,
  2290    };
  2291    constexpr int kPollTimeoutMs = 0;
  2292    ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs),
  2293                SyscallSucceedsWithValue(1));
  2294  
  2295    auto const start_time = absl::Now();
  2296    EXPECT_THAT(close(s.release()), SyscallSucceeds());
  2297    auto const end_time = absl::Now();
  2298  
  2299    // Close() should not linger and return immediately.
  2300    ASSERT_LT((end_time - start_time), absl::Seconds(kLingerTimeout));
  2301  }
  2302  
  2303  // Tests that SO_ACCEPTCONN returns non zero value for listening sockets.
  2304  TEST_P(TcpSocketTest, GetSocketAcceptConnListener) {
  2305    int got = -1;
  2306    socklen_t length = sizeof(got);
  2307    ASSERT_THAT(
  2308        getsockopt(listener_.get(), SOL_SOCKET, SO_ACCEPTCONN, &got, &length),
  2309        SyscallSucceeds());
  2310    ASSERT_EQ(length, sizeof(got));
  2311    EXPECT_EQ(got, 1);
  2312  }
  2313  
  2314  // Tests that SO_ACCEPTCONN returns zero value for not listening sockets.
  2315  TEST_P(TcpSocketTest, GetSocketAcceptConnNonListener) {
  2316    int got = -1;
  2317    socklen_t length = sizeof(got);
  2318    ASSERT_THAT(
  2319        getsockopt(connected_.get(), SOL_SOCKET, SO_ACCEPTCONN, &got, &length),
  2320        SyscallSucceeds());
  2321    ASSERT_EQ(length, sizeof(got));
  2322    EXPECT_EQ(got, 0);
  2323  
  2324    ASSERT_THAT(
  2325        getsockopt(accepted_.get(), SOL_SOCKET, SO_ACCEPTCONN, &got, &length),
  2326        SyscallSucceeds());
  2327    ASSERT_EQ(length, sizeof(got));
  2328    EXPECT_EQ(got, 0);
  2329  }
  2330  
  2331  TEST_P(TcpSocketTest, SetPMTUD) {
  2332    // IP_PMTUDISC_WANT should be default.
  2333    int got = -1;
  2334    socklen_t length = sizeof(got);
  2335    ASSERT_THAT(
  2336        getsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &got, &length),
  2337        SyscallSucceeds());
  2338    EXPECT_EQ(got, IP_PMTUDISC_WANT);
  2339  
  2340    int set = IP_PMTUDISC_DO;
  2341    ASSERT_THAT(
  2342        setsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &set, length),
  2343        SyscallSucceeds());
  2344    ASSERT_THAT(
  2345        getsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &got, &length),
  2346        SyscallSucceeds());
  2347    EXPECT_EQ(got, IP_PMTUDISC_DO);
  2348    set = IP_PMTUDISC_DONT;
  2349    ASSERT_THAT(
  2350        setsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &set, length),
  2351        SyscallSucceeds());
  2352    ASSERT_THAT(
  2353        getsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &got, &length),
  2354        SyscallSucceeds());
  2355    EXPECT_EQ(got, IP_PMTUDISC_DONT);
  2356  
  2357    // IP_PMTUDISC_PROBE is not supported by gVisor.
  2358    set = IP_PMTUDISC_PROBE;
  2359    if (IsRunningOnGvisor() && !IsRunningWithHostinet()) {
  2360      ASSERT_THAT(
  2361          setsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &set, length),
  2362          SyscallFailsWithErrno(ENOTSUP));
  2363    } else {
  2364      ASSERT_THAT(
  2365          setsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &set, length),
  2366          SyscallSucceeds());
  2367      ASSERT_THAT(
  2368          getsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &got, &length),
  2369          SyscallSucceeds());
  2370      EXPECT_EQ(got, IP_PMTUDISC_PROBE);
  2371    }
  2372  }
  2373  
  2374  TEST_P(SimpleTcpSocketTest, GetSocketAcceptConnWithShutdown) {
  2375    // TODO(b/171345701): Fix the TCP state for listening socket on shutdown.
  2376    SKIP_IF(IsRunningOnGvisor());
  2377  
  2378    FileDescriptor s =
  2379        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  2380  
  2381    // Initialize address to the loopback one.
  2382    sockaddr_storage addr =
  2383        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  2384    socklen_t addrlen = sizeof(addr);
  2385  
  2386    // Bind to some port then start listening.
  2387    ASSERT_THAT(bind(s.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds());
  2388  
  2389    ASSERT_THAT(listen(s.get(), SOMAXCONN), SyscallSucceeds());
  2390  
  2391    int got = -1;
  2392    socklen_t length = sizeof(got);
  2393    ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ACCEPTCONN, &got, &length),
  2394                SyscallSucceeds());
  2395    ASSERT_EQ(length, sizeof(got));
  2396    EXPECT_EQ(got, 1);
  2397  
  2398    EXPECT_THAT(shutdown(s.get(), SHUT_RD), SyscallSucceeds());
  2399    ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ACCEPTCONN, &got, &length),
  2400                SyscallSucceeds());
  2401    ASSERT_EQ(length, sizeof(got));
  2402    EXPECT_EQ(got, 0);
  2403  }
  2404  
  2405  void ShutdownConnectingSocket(int domain, int shutdown_mode) {
  2406    FileDescriptor bound_s =
  2407        ASSERT_NO_ERRNO_AND_VALUE(Socket(domain, SOCK_STREAM, IPPROTO_TCP));
  2408  
  2409    sockaddr_storage bound_addr =
  2410        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(domain));
  2411    socklen_t bound_addrlen = sizeof(bound_addr);
  2412  
  2413    ASSERT_THAT(bind(bound_s.get(), AsSockAddr(&bound_addr), bound_addrlen),
  2414                SyscallSucceeds());
  2415  
  2416    // Start listening. Use a zero backlog to only allow one connection in the
  2417    // accept queue.
  2418    ASSERT_THAT(listen(bound_s.get(), 0), SyscallSucceeds());
  2419  
  2420    // Get the addresses the socket is bound to because the port is chosen by the
  2421    // stack.
  2422    ASSERT_THAT(
  2423        getsockname(bound_s.get(), AsSockAddr(&bound_addr), &bound_addrlen),
  2424        SyscallSucceeds());
  2425  
  2426    // Establish a connection. But do not accept it. That way, subsequent
  2427    // connections will not get a SYN-ACK because the queue is full.
  2428    FileDescriptor connected_s =
  2429        ASSERT_NO_ERRNO_AND_VALUE(Socket(domain, SOCK_STREAM, IPPROTO_TCP));
  2430    ASSERT_THAT(
  2431        RetryEINTR(connect)(connected_s.get(),
  2432                            reinterpret_cast<const struct sockaddr*>(&bound_addr),
  2433                            bound_addrlen),
  2434        SyscallSucceeds());
  2435  
  2436    FileDescriptor connecting_s = ASSERT_NO_ERRNO_AND_VALUE(
  2437        Socket(domain, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
  2438    ASSERT_THAT(
  2439        RetryEINTR(connect)(connecting_s.get(),
  2440                            reinterpret_cast<const struct sockaddr*>(&bound_addr),
  2441                            bound_addrlen),
  2442        SyscallFailsWithErrno(EINPROGRESS));
  2443  
  2444    // Now the test: when a connecting socket is shutdown, the socket should enter
  2445    // an error state.
  2446    EXPECT_THAT(shutdown(connecting_s.get(), shutdown_mode), SyscallSucceeds());
  2447  
  2448    // We don't need to specify any events to get POLLHUP or POLLERR because these
  2449    // are always tracked.
  2450    struct pollfd poll_fd = {
  2451        .fd = connecting_s.get(),
  2452    };
  2453  
  2454    EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1));
  2455    EXPECT_EQ(poll_fd.revents, POLLHUP | POLLERR);
  2456  }
  2457  
  2458  TEST_P(SimpleTcpSocketTest, ShutdownReadConnectingSocket) {
  2459    // TODO(b/171436815): Re-enable when S/R is fixed.
  2460    const DisableSave disable_save;
  2461    // TODO(b/175409607): Fix this test for hostinet.
  2462    SKIP_IF(IsRunningWithHostinet());
  2463    ShutdownConnectingSocket(GetParam(), SHUT_RD);
  2464  }
  2465  
  2466  TEST_P(SimpleTcpSocketTest, ShutdownWriteConnectingSocket) {
  2467    // TODO(b/171436815): Re-enable when S/R is fixed.
  2468    const DisableSave disable_save;
  2469    // TODO(b/175409607): Fix this test for hostinet.
  2470    SKIP_IF(IsRunningWithHostinet());
  2471    ShutdownConnectingSocket(GetParam(), SHUT_WR);
  2472  }
  2473  
  2474  TEST_P(SimpleTcpSocketTest, ShutdownReadWriteConnectingSocket) {
  2475    // TODO(b/171436815): Re-enable when S/R is fixed.
  2476    const DisableSave disable_save;
  2477    // TODO(b/175409607): Fix this test for hostinet.
  2478    SKIP_IF(IsRunningWithHostinet());
  2479    ShutdownConnectingSocket(GetParam(), SHUT_RDWR);
  2480  }
  2481  
  2482  // Tests that connecting to an unspecified address results in ECONNREFUSED.
  2483  TEST_P(SimpleTcpSocketTest, ConnectUnspecifiedAddress) {
  2484    sockaddr_storage addr;
  2485    socklen_t addrlen = sizeof(addr);
  2486    memset(&addr, 0, addrlen);
  2487    addr.ss_family = GetParam();
  2488    auto do_connect = [&addr, addrlen]() {
  2489      FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE(
  2490          Socket(addr.ss_family, SOCK_STREAM, IPPROTO_TCP));
  2491      ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen),
  2492                  SyscallFailsWithErrno(ECONNREFUSED));
  2493    };
  2494    do_connect();
  2495    // Test the v4 mapped address as well.
  2496    if (GetParam() == AF_INET6) {
  2497      auto sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr);
  2498      sin6->sin6_addr.s6_addr[10] = sin6->sin6_addr.s6_addr[11] = 0xff;
  2499      do_connect();
  2500    }
  2501  }
  2502  
  2503  TEST_P(SimpleTcpSocketTest, OnlyAcknowledgeBacklogConnections) {
  2504    // TODO(b/171436815): Re-enable when S/R is fixed.
  2505    const DisableSave disable_save;
  2506    // TODO(b/175409607): Fix this test for hostinet.
  2507    SKIP_IF(IsRunningWithHostinet());
  2508  
  2509    // At some point, there was a bug in gVisor where a connection could be
  2510    // SYN-ACK'd by the server even if the accept queue was already full. This was
  2511    // possible because once the listener would process an ACK, it would move the
  2512    // new connection in the accept queue asynchronously. It created an
  2513    // opportunity where the listener could process another SYN before completing
  2514    // the delivery that would have filled the accept queue.
  2515    //
  2516    // This test checks that there is no such race on loopback. On other
  2517    // interfaces, where delivery is not synchronous, it is possible for more
  2518    // clients to be in the ESTABLISHED state than there are slots in the accept
  2519    // queue.
  2520  
  2521    std::array<std::optional<ScopedThread>, 100> threads;
  2522    for (auto& thread : threads) {
  2523      thread.emplace([]() {
  2524        FileDescriptor bound_s = ASSERT_NO_ERRNO_AND_VALUE(
  2525            Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  2526  
  2527        sockaddr_storage bound_addr =
  2528            ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  2529        socklen_t bound_addrlen = sizeof(bound_addr);
  2530  
  2531        ASSERT_THAT(bind(bound_s.get(), AsSockAddr(&bound_addr), bound_addrlen),
  2532                    SyscallSucceeds());
  2533  
  2534        // Start listening. Use a zero backlog to only allow one connection in the
  2535        // accept queue.
  2536        ASSERT_THAT(listen(bound_s.get(), 0), SyscallSucceeds());
  2537  
  2538        // Get the addresses the socket is bound to because the port is chosen by
  2539        // the stack.
  2540        ASSERT_THAT(
  2541            getsockname(bound_s.get(), AsSockAddr(&bound_addr), &bound_addrlen),
  2542            SyscallSucceeds());
  2543  
  2544        // Establish a connection, but do not accept it.
  2545        FileDescriptor connected_s = ASSERT_NO_ERRNO_AND_VALUE(
  2546            Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  2547        ASSERT_THAT(RetryEINTR(connect)(
  2548                        connected_s.get(),
  2549                        reinterpret_cast<const struct sockaddr*>(&bound_addr),
  2550                        bound_addrlen),
  2551                    SyscallSucceeds());
  2552  
  2553        // Immediately attempt to establish another connection. Use non blocking
  2554        // socket because this is expected to timeout.
  2555        FileDescriptor connecting_s = ASSERT_NO_ERRNO_AND_VALUE(
  2556            Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
  2557        ASSERT_THAT(RetryEINTR(connect)(
  2558                        connecting_s.get(),
  2559                        reinterpret_cast<const struct sockaddr*>(&bound_addr),
  2560                        bound_addrlen),
  2561                    SyscallFailsWithErrno(EINPROGRESS));
  2562  
  2563        struct pollfd poll_fd = {
  2564            .fd = connecting_s.get(),
  2565            .events = POLLOUT,
  2566        };
  2567        EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10),
  2568                    SyscallSucceedsWithValue(0));
  2569      });
  2570    }
  2571  }
  2572  
  2573  TEST_P(SimpleTcpSocketTest, SynRcvdOnListenerShutdown) {
  2574    // TODO(b/171436815): Re-enable when S/R is fixed.
  2575    const DisableSave disable_save;
  2576    FileDescriptor bound_s =
  2577        ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP));
  2578  
  2579    sockaddr_storage bound_addr =
  2580        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  2581    socklen_t bound_addrlen = sizeof(bound_addr);
  2582  
  2583    ASSERT_THAT(bind(bound_s.get(), AsSockAddr(&bound_addr), bound_addrlen),
  2584                SyscallSucceeds());
  2585  
  2586    // Get the addresses the socket is bound to because the port is chosen by the
  2587    // stack.
  2588    ASSERT_THAT(
  2589        getsockname(bound_s.get(), AsSockAddr(&bound_addr), &bound_addrlen),
  2590        SyscallSucceeds());
  2591  
  2592    // kBacklog connections are permitted to be in the SYNRCVD state. Select the
  2593    // largest reasonable value; we want to create a situation where at least some
  2594    // of the connections are still in SYNRCVD when we shut down the listener.
  2595    constexpr int kBacklog = 256;
  2596    ASSERT_THAT(listen(bound_s.get(), kBacklog), SyscallSucceeds());
  2597  
  2598    std::array<std::thread, kBacklog + 1> threads;
  2599    for (auto& thread : threads) {
  2600      FileDescriptor connecting_s = ASSERT_NO_ERRNO_AND_VALUE(
  2601          Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP));
  2602      ASSERT_THAT(RetryEINTR(connect)(
  2603                      connecting_s.get(),
  2604                      reinterpret_cast<const struct sockaddr*>(&bound_addr),
  2605                      bound_addrlen),
  2606                  SyscallFailsWithErrno(EINPROGRESS));
  2607      thread = std::thread([connecting_s = std::move(connecting_s)]() {
  2608        struct pollfd poll_fd = {
  2609            .fd = connecting_s.get(),
  2610        };
  2611        poll_fd.events = std::numeric_limits<decltype(poll_fd.events)>::max();
  2612        ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, 1000),
  2613                    SyscallSucceedsWithValue(1));
  2614  
  2615        int err;
  2616        socklen_t optlen = sizeof(err);
  2617        ASSERT_THAT(
  2618            getsockopt(connecting_s.get(), SOL_SOCKET, SO_ERROR, &err, &optlen),
  2619            SyscallSucceeds());
  2620        ASSERT_EQ(optlen, sizeof(err));
  2621  
  2622        if (err == 0) {
  2623          EXPECT_EQ(poll_fd.revents, POLLOUT
  2624          // TODO(https://fxbug.dev/42152810): Remove when POLLWRNORM is correctly
  2625          // asserted in Fuchsia.
  2626  #if !defined(__Fuchsia__)
  2627                                         | POLLWRNORM
  2628  #endif
  2629          );
  2630        } else {
  2631          EXPECT_THAT(err, ::testing::AnyOf(::testing::Eq(ECONNRESET),
  2632                                            ::testing::Eq(ECONNREFUSED)))
  2633              << strerror(err);
  2634  
  2635          const int revents = poll_fd.revents;
  2636  
  2637          // It's possible the error arrived *after* poll returned. Fetch the
  2638          // signals again - this time with a zero timeout.
  2639          EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0),
  2640                      SyscallSucceedsWithValue(1));
  2641  
  2642          EXPECT_EQ(poll_fd.revents,
  2643          // TODO(https://fxbug.dev/42156248): Remove when other signals are
  2644          // asserted together with POLLERR in Fuchsia.
  2645  #if defined(__Fuchsia__)
  2646                    POLLOUT
  2647  #else
  2648                    []() {
  2649                      const int expected_revents = POLLIN | POLLOUT | POLLHUP |
  2650                                                   POLLRDNORM | POLLWRNORM |
  2651                                                   POLLRDHUP;
  2652                      // TODO(gvisor.dev/issue/6666): POLLERR is still present
  2653                      // after getsockopt(..., SO_ERROR, ...) call (unless
  2654                      // hostinet is used).
  2655                      if (IsRunningOnGvisor() && !IsRunningWithHostinet()) {
  2656                        return expected_revents | POLLPRI | POLLERR;
  2657                      }
  2658                      return expected_revents;
  2659                    }()
  2660  #endif
  2661          );
  2662  
  2663          EXPECT_THAT(
  2664              // TODO(gvisor.dev/issue/6666): on Linux, POLLERR goes away
  2665              // after the getsockopt(..., SO_ERROR, ...) call, but not on
  2666              // gVisor (unless hostinet is used).
  2667              revents,
  2668              ::testing::AnyOf(
  2669                  // If the error arrived after poll returned.
  2670                  ::testing::Eq(POLLOUT | POLLWRNORM),
  2671                  ::testing::Eq([expected_revents = poll_fd.revents]() -> int {
  2672                    if (IsRunningOnGvisor() && !IsRunningWithHostinet()) {
  2673                      return expected_revents;
  2674                    }
  2675                    return expected_revents | POLLERR;
  2676                  }())));
  2677        }
  2678      });
  2679    }
  2680  
  2681    EXPECT_THAT(shutdown(bound_s.get(), SHUT_RD), SyscallSucceeds());
  2682  
  2683    for (auto& thread : threads) {
  2684      thread.join();
  2685    }
  2686  }
  2687  
  2688  // Fuchsia doesn't have epoll.
  2689  #ifdef __linux__
  2690  
  2691  // Ensure that we can S/R when epoll is waiting on a listening socket.
  2692  // Regression test for b/280313827.
  2693  TEST_P(SimpleTcpSocketTest, EpollListeningSocket) {
  2694    // Create the listening socket.
  2695    int fd;
  2696    ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, 0),
  2697                SyscallSucceeds());
  2698    FileDescriptor sockfd(fd);
  2699  
  2700    // Bind to some port.
  2701    sockaddr_storage addr =
  2702        ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam()));
  2703    socklen_t addrlen = sizeof(addr);
  2704    ASSERT_THAT(bind(sockfd.get(), AsSockAddr(&addr), addrlen),
  2705                SyscallSucceeds());
  2706  
  2707    // Listen and accept with the expectation that accept fails.
  2708    ASSERT_THAT(listen(sockfd.get(), 2), SyscallSucceeds());
  2709    ASSERT_THAT(accept(sockfd.get(), nullptr, nullptr),
  2710                SyscallFailsWithErrno(EAGAIN));
  2711  
  2712    // Start a thread that waits a bit, then connects to the listening socket.
  2713    ScopedThread save_and_connect_thread([&]() {
  2714      // Give epoll a chance to start blocking.
  2715      absl::SleepFor(absl::Seconds(1));
  2716  
  2717      // Save while epoll is blocking.
  2718      MaybeSave();
  2719  
  2720      // Get the listener's address and connect to it.
  2721      int fd;
  2722      ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, 0), SyscallSucceeds());
  2723      FileDescriptor connfd(fd);
  2724      ASSERT_THAT(getsockname(sockfd.get(), AsSockAddr(&addr), &addrlen),
  2725                  SyscallSucceeds());
  2726      ASSERT_THAT(RetryEINTR(connect)(connfd.get(), AsSockAddr(&addr), addrlen),
  2727                  SyscallSucceeds());
  2728    });
  2729  
  2730    // Epoll on sockfd.
  2731    ASSERT_THAT(fd = epoll_create(1), SyscallSucceeds());
  2732    FileDescriptor epollfd(fd);
  2733    struct epoll_event event = {};
  2734    event.events = EPOLLIN;
  2735    ASSERT_THAT(epoll_ctl(epollfd.get(), EPOLL_CTL_ADD, sockfd.get(), &event),
  2736                SyscallSucceeds());
  2737  
  2738    struct epoll_event results = {};
  2739    ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), &results, 1, 60000),
  2740                SyscallSucceeds());
  2741  
  2742    save_and_connect_thread.Join();
  2743  }
  2744  
  2745  TEST_P(SimpleTcpSocketTest, SetTCPCorkOff) {
  2746    int fd;
  2747    ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP),
  2748                SyscallSucceeds());
  2749  
  2750    ASSERT_THAT(
  2751        setsockopt(fd, IPPROTO_TCP, TCP_CORK, &kSockOptOff, sizeof(kSockOptOff)),
  2752        SyscallSucceeds());
  2753  }
  2754  #endif  // __linux__
  2755  
  2756  INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest,
  2757                           ::testing::Values(AF_INET, AF_INET6));
  2758  
  2759  }  // namespace
  2760  
  2761  }  // namespace testing
  2762  }  // namespace gvisor