gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/test/syscalls/linux/tcp_socket.cc (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include <fcntl.h> 16 17 #ifdef __linux__ 18 #include <linux/filter.h> 19 #include <sys/epoll.h> 20 #endif // __linux__ 21 #include <errno.h> 22 #include <netinet/in.h> 23 #include <netinet/tcp.h> 24 #include <poll.h> 25 #include <sys/ioctl.h> 26 #include <sys/socket.h> 27 #include <unistd.h> 28 29 #include <limits> 30 #include <vector> 31 32 #include "gmock/gmock.h" 33 #include "gtest/gtest.h" 34 #include "absl/status/statusor.h" 35 #include "absl/time/clock.h" 36 #include "absl/time/time.h" 37 #include "test/util/file_descriptor.h" 38 #include "test/util/posix_error.h" 39 #include "test/util/socket_util.h" 40 #include "test/util/test_util.h" 41 #include "test/util/thread_util.h" 42 43 using ::testing::AnyOf; 44 45 namespace gvisor { 46 namespace testing { 47 48 namespace { 49 50 constexpr int kTimeoutMillis = 10000; 51 52 PosixErrorOr<sockaddr_storage> InetLoopbackAddrZeroPort(int family) { 53 struct sockaddr_storage addr; 54 memset(&addr, 0, sizeof(addr)); 55 addr.ss_family = family; 56 switch (family) { 57 case AF_INET: { 58 auto& addr_in = reinterpret_cast<struct sockaddr_in&>(addr); 59 addr_in.sin_addr.s_addr = htonl(INADDR_LOOPBACK); 60 break; 61 } 62 case AF_INET6: { 63 auto& addr_in6 = reinterpret_cast<struct sockaddr_in6&>(addr); 64 addr_in6.sin6_addr = in6addr_loopback; 65 break; 66 } 67 default: 68 return PosixError(EINVAL, 69 absl::StrCat("unknown socket family: ", family)); 70 } 71 return addr; 72 } 73 74 // Gets the port number from the address, assuming it is an IPv4 or IPv6 socket 75 // address. 76 absl::StatusOr<uint16_t> GetPort(const sockaddr_storage& addr) { 77 switch (addr.ss_family) { 78 case AF_INET: 79 return reinterpret_cast<const struct sockaddr_in&>(addr).sin_port; 80 case AF_INET6: 81 return reinterpret_cast<const struct sockaddr_in6&>(addr).sin6_port; 82 default: 83 return absl::InvalidArgumentError("not an IPv4 or IPv6 address"); 84 } 85 } 86 87 // Allocates a file descriptor that is bound to a local port but not listening. 88 // Sets `addr` and `addrlen` to the bound address. 89 PosixErrorOr<FileDescriptor> ReserveLocalPort(int family, 90 sockaddr_storage& addr, 91 socklen_t& addrlen) { 92 // Reserve a port by binding to it but not listening. 93 ASSIGN_OR_RETURN_ERRNO(FileDescriptor reserving, 94 Socket(family, SOCK_STREAM, IPPROTO_TCP)); 95 if (int err = bind(reserving.get(), AsSockAddr(&addr), addrlen); err != 0) { 96 return PosixError(err, "bind failed"); 97 } 98 // Get the address with the reserved port because the port is chosen by the 99 // stack. 100 if (int err = getsockname(reserving.get(), AsSockAddr(&addr), &addrlen); 101 err != 0) { 102 return PosixError(err, "getsockname failed"); 103 } 104 return reserving; 105 } 106 107 static void FillSocketBuffers(int sender, int receiver) { 108 // Set the FD to O_NONBLOCK. 109 int opts; 110 int orig_opts; 111 ASSERT_THAT(opts = fcntl(sender, F_GETFL), SyscallSucceeds()); 112 orig_opts = opts; 113 opts |= O_NONBLOCK; 114 ASSERT_THAT(fcntl(sender, F_SETFL, opts), SyscallSucceeds()); 115 116 // Set TCP_NODELAY, which will cause linux to fill the receive buffer from the 117 // send buffer as quickly as possibly. This way we can fill up both buffers 118 // faster. 119 constexpr int tcp_nodelay_flag = 1; 120 ASSERT_THAT(setsockopt(sender, IPPROTO_TCP, TCP_NODELAY, &tcp_nodelay_flag, 121 sizeof(tcp_nodelay_flag)), 122 SyscallSucceeds()); 123 124 // Set a 256KB send/receive buffer. 125 int buf_sz = 1 << 18; 126 EXPECT_THAT( 127 setsockopt(receiver, SOL_SOCKET, SO_RCVBUF, &buf_sz, sizeof(buf_sz)), 128 SyscallSucceedsWithValue(0)); 129 EXPECT_THAT( 130 setsockopt(sender, SOL_SOCKET, SO_SNDBUF, &buf_sz, sizeof(buf_sz)), 131 SyscallSucceedsWithValue(0)); 132 133 // Create a large buffer that will be used for sending. 134 std::vector<char> buf(buf_sz << 2); 135 136 // Write until we receive an error. 137 while (RetryEINTR(send)(sender, buf.data(), buf.size(), 0) != -1) { 138 // Sleep to give linux a chance to move data from the send buffer to the 139 // receive buffer. 140 absl::SleepFor(absl::Milliseconds(100)); // 100ms. 141 } 142 // The last error should have been EWOULDBLOCK. 143 ASSERT_EQ(errno, EWOULDBLOCK); 144 145 // Restore the fcntl opts 146 ASSERT_THAT(fcntl(sender, F_SETFL, orig_opts), SyscallSucceeds()); 147 } 148 149 // Fixture for tests parameterized by the address family to use (AF_INET and 150 // AF_INET6) when creating sockets. 151 class TcpSocketTest : public ::testing::TestWithParam<int> { 152 protected: 153 // Creates three sockets that will be used by test cases -- a listener, one 154 // that connects, and the accepted one. 155 void SetUp() override; 156 157 // Listening socket. 158 FileDescriptor listener_; 159 160 // Socket connected via connect(). 161 FileDescriptor connected_; 162 163 // Socket connected via accept(). 164 FileDescriptor accepted_; 165 166 // Initial size of the send buffer. 167 int sendbuf_size_ = -1; 168 }; 169 170 void TcpSocketTest::SetUp() { 171 listener_ = 172 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 173 174 connected_ = 175 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 176 177 // Initialize address to the loopback one. 178 sockaddr_storage addr = 179 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 180 socklen_t addrlen = sizeof(addr); 181 182 // Bind to some port then start listening. 183 ASSERT_THAT(bind(listener_.get(), AsSockAddr(&addr), addrlen), 184 SyscallSucceeds()); 185 186 ASSERT_THAT(listen(listener_.get(), SOMAXCONN), SyscallSucceeds()); 187 188 // Get the address we're listening on, then connect to it. We need to do this 189 // because we're allowing the stack to pick a port for us. 190 ASSERT_THAT(getsockname(listener_.get(), AsSockAddr(&addr), &addrlen), 191 SyscallSucceeds()); 192 193 ASSERT_THAT(RetryEINTR(connect)(connected_.get(), AsSockAddr(&addr), addrlen), 194 SyscallSucceeds()); 195 196 // Get the initial send buffer size. 197 socklen_t optlen = sizeof(sendbuf_size_); 198 ASSERT_THAT(getsockopt(connected_.get(), SOL_SOCKET, SO_SNDBUF, 199 &sendbuf_size_, &optlen), 200 SyscallSucceeds()); 201 202 // Accept the connection. 203 accepted_ = 204 ASSERT_NO_ERRNO_AND_VALUE(Accept(listener_.get(), nullptr, nullptr)); 205 } 206 207 TEST_P(TcpSocketTest, ConnectedAcceptedPeerAndLocalAreReciprocals) { 208 struct FdAndAddrs { 209 int fd; 210 sockaddr_storage peer; 211 socklen_t peer_len = sizeof(peer); 212 sockaddr_storage name; 213 socklen_t name_len = sizeof(name); 214 }; 215 216 FdAndAddrs connected{.fd = connected_.get()}, accepted{.fd = accepted_.get()}; 217 218 for (FdAndAddrs* fd_and_addrs : {&connected, &accepted}) { 219 ASSERT_THAT(getpeername(fd_and_addrs->fd, AsSockAddr(&fd_and_addrs->peer), 220 &fd_and_addrs->peer_len), 221 SyscallSucceeds()); 222 ASSERT_NE(fd_and_addrs->peer_len, 0); 223 ASSERT_THAT(getsockname(fd_and_addrs->fd, AsSockAddr(&fd_and_addrs->name), 224 &fd_and_addrs->name_len), 225 SyscallSucceeds()); 226 ASSERT_NE(fd_and_addrs->name_len, 0); 227 } 228 229 ASSERT_EQ(connected.peer_len, accepted.name_len); 230 EXPECT_EQ(memcmp(&connected.peer, &accepted.name, connected.peer_len), 0); 231 232 ASSERT_EQ(connected.name_len, accepted.peer_len); 233 EXPECT_EQ(memcmp(&connected.name, &accepted.peer, connected.name_len), 0); 234 } 235 236 TEST_P(TcpSocketTest, ConnectOnEstablishedConnection) { 237 sockaddr_storage addr; 238 socklen_t addrlen = sizeof(addr); 239 ASSERT_THAT(getpeername(connected_.get(), AsSockAddr(&addr), &addrlen), 240 SyscallSucceeds()); 241 242 ASSERT_THAT(RetryEINTR(connect)( 243 connected_.get(), 244 reinterpret_cast<const struct sockaddr*>(&addr), addrlen), 245 SyscallFailsWithErrno(EISCONN)); 246 247 ASSERT_THAT(RetryEINTR(connect)( 248 accepted_.get(), 249 reinterpret_cast<const struct sockaddr*>(&addr), addrlen), 250 SyscallFailsWithErrno(EISCONN)); 251 } 252 253 TEST_P(TcpSocketTest, ShutdownWriteInTimeWait) { 254 EXPECT_THAT(shutdown(accepted_.get(), SHUT_WR), SyscallSucceeds()); 255 EXPECT_THAT(shutdown(connected_.get(), SHUT_RDWR), SyscallSucceeds()); 256 absl::SleepFor(absl::Seconds(1)); // Wait to enter TIME_WAIT. 257 EXPECT_THAT(shutdown(accepted_.get(), SHUT_WR), 258 SyscallFailsWithErrno(ENOTCONN)); 259 } 260 261 TEST_P(TcpSocketTest, ShutdownWriteInFinWait1) { 262 EXPECT_THAT(shutdown(accepted_.get(), SHUT_WR), SyscallSucceeds()); 263 EXPECT_THAT(shutdown(accepted_.get(), SHUT_WR), SyscallSucceeds()); 264 absl::SleepFor(absl::Seconds(1)); // Wait to enter FIN-WAIT2. 265 EXPECT_THAT(shutdown(accepted_.get(), SHUT_WR), SyscallSucceeds()); 266 } 267 268 TEST_P(TcpSocketTest, DataCoalesced) { 269 char buf[10]; 270 271 // Write in two steps. 272 ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, sizeof(buf) / 2), 273 SyscallSucceedsWithValue(sizeof(buf) / 2)); 274 ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, sizeof(buf) / 2), 275 SyscallSucceedsWithValue(sizeof(buf) / 2)); 276 277 // Allow stack to process both packets. 278 absl::SleepFor(absl::Seconds(1)); 279 280 // Read in one shot. 281 EXPECT_THAT(RetryEINTR(recv)(accepted_.get(), buf, sizeof(buf), 0), 282 SyscallSucceedsWithValue(sizeof(buf))); 283 } 284 285 TEST_P(TcpSocketTest, SenderAddressIgnored) { 286 char buf[3]; 287 ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, sizeof(buf)), 288 SyscallSucceedsWithValue(sizeof(buf))); 289 290 struct sockaddr_storage addr; 291 socklen_t addrlen = sizeof(addr); 292 memset(&addr, 0, sizeof(addr)); 293 294 ASSERT_THAT(RetryEINTR(recvfrom)(accepted_.get(), buf, sizeof(buf), 0, 295 AsSockAddr(&addr), &addrlen), 296 SyscallSucceedsWithValue(3)); 297 298 // Check that addr remains zeroed-out. 299 const char* ptr = reinterpret_cast<char*>(&addr); 300 for (size_t i = 0; i < sizeof(addr); i++) { 301 EXPECT_EQ(ptr[i], 0); 302 } 303 } 304 305 TEST_P(TcpSocketTest, SenderAddressIgnoredOnPeek) { 306 char buf[3]; 307 ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, sizeof(buf)), 308 SyscallSucceedsWithValue(sizeof(buf))); 309 310 struct sockaddr_storage addr; 311 socklen_t addrlen = sizeof(addr); 312 memset(&addr, 0, sizeof(addr)); 313 314 ASSERT_THAT(RetryEINTR(recvfrom)(accepted_.get(), buf, sizeof(buf), MSG_PEEK, 315 AsSockAddr(&addr), &addrlen), 316 SyscallSucceedsWithValue(3)); 317 318 // Check that addr remains zeroed-out. 319 const char* ptr = reinterpret_cast<char*>(&addr); 320 for (size_t i = 0; i < sizeof(addr); i++) { 321 EXPECT_EQ(ptr[i], 0); 322 } 323 } 324 325 TEST_P(TcpSocketTest, SendtoAddressIgnored) { 326 struct sockaddr_storage addr; 327 memset(&addr, 0, sizeof(addr)); 328 addr.ss_family = GetParam(); // FIXME(b/63803955) 329 330 char data = '\0'; 331 EXPECT_THAT(RetryEINTR(sendto)(connected_.get(), &data, sizeof(data), 0, 332 AsSockAddr(&addr), sizeof(addr)), 333 SyscallSucceedsWithValue(1)); 334 } 335 336 TEST_P(TcpSocketTest, WritevZeroIovec) { 337 // 2 bytes just to be safe and have vecs[1] not point to something random 338 // (even though length is 0). 339 char buf[2]; 340 char recv_buf[1]; 341 342 // Construct a vec where the final vector is of length 0. 343 iovec vecs[2] = {}; 344 vecs[0].iov_base = buf; 345 vecs[0].iov_len = 1; 346 vecs[1].iov_base = buf + 1; 347 vecs[1].iov_len = 0; 348 349 EXPECT_THAT(RetryEINTR(writev)(connected_.get(), vecs, 2), 350 SyscallSucceedsWithValue(1)); 351 352 EXPECT_THAT(RetryEINTR(recv)(accepted_.get(), recv_buf, 1, 0), 353 SyscallSucceedsWithValue(1)); 354 EXPECT_EQ(memcmp(recv_buf, buf, 1), 0); 355 } 356 357 TEST_P(TcpSocketTest, ZeroWriteAllowed) { 358 char buf[3]; 359 // Send a zero length packet. 360 ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, 0), 361 SyscallSucceedsWithValue(0)); 362 // Verify that there is no packet available. 363 EXPECT_THAT(RetryEINTR(recv)(accepted_.get(), buf, sizeof(buf), MSG_DONTWAIT), 364 SyscallFailsWithErrno(EAGAIN)); 365 } 366 367 // Test that a non-blocking write with a buffer that is larger than the send 368 // buffer size will not actually write the whole thing at once. Regression test 369 // for b/64438887. 370 TEST_P(TcpSocketTest, NonblockingLargeWrite) { 371 // Set the FD to O_NONBLOCK. 372 int opts; 373 ASSERT_THAT(opts = fcntl(connected_.get(), F_GETFL), SyscallSucceeds()); 374 opts |= O_NONBLOCK; 375 ASSERT_THAT(fcntl(connected_.get(), F_SETFL, opts), SyscallSucceeds()); 376 377 // Allocate a buffer three times the size of the send buffer. We do this with 378 // a vector to avoid allocating on the stack. 379 int size = 3 * sendbuf_size_; 380 std::vector<char> buf(size); 381 382 // Try to write the whole thing. 383 int n; 384 ASSERT_THAT(n = RetryEINTR(write)(connected_.get(), buf.data(), size), 385 SyscallSucceeds()); 386 387 // We should have written something, but not the whole thing. 388 EXPECT_GT(n, 0); 389 EXPECT_LT(n, size); 390 } 391 392 // Test that a blocking write with a buffer that is larger than the send buffer 393 // will block until the entire buffer is sent. 394 TEST_P(TcpSocketTest, BlockingLargeWrite) { 395 // Allocate a buffer three times the size of the send buffer on the heap. We 396 // do this as a vector to avoid allocating on the stack. 397 int size = 3 * sendbuf_size_; 398 std::vector<char> writebuf(size); 399 400 // Start reading the response in a loop. 401 int read_bytes = 0; 402 ScopedThread t([this, &read_bytes]() { 403 // Avoid interrupting the blocking write in main thread. 404 const DisableSave disable_save; 405 406 // Take ownership of the FD so that we close it on failure. This will 407 // unblock the blocking write below. 408 FileDescriptor fd(std::move(accepted_)); 409 410 char readbuf[2500] = {}; 411 int n = -1; 412 while (n != 0) { 413 ASSERT_THAT(n = RetryEINTR(read)(fd.get(), &readbuf, sizeof(readbuf)), 414 SyscallSucceeds()); 415 read_bytes += n; 416 } 417 }); 418 419 // Try to write the whole thing. 420 int n; 421 ASSERT_THAT(n = WriteFd(connected_.get(), writebuf.data(), size), 422 SyscallSucceeds()); 423 424 // We should have written the whole thing. 425 EXPECT_EQ(n, size); 426 EXPECT_THAT(close(connected_.release()), SyscallSucceedsWithValue(0)); 427 t.Join(); 428 429 // We should have read the whole thing. 430 EXPECT_EQ(read_bytes, size); 431 } 432 433 // Test that a send with MSG_DONTWAIT flag and buffer that larger than the send 434 // buffer size will not write the whole thing. 435 TEST_P(TcpSocketTest, LargeSendDontWait) { 436 // Allocate a buffer three times the size of the send buffer. We do this on 437 // with a vector to avoid allocating on the stack. 438 int size = 3 * sendbuf_size_; 439 std::vector<char> buf(size); 440 441 // Try to write the whole thing with MSG_DONTWAIT flag, which can 442 // return a partial write. 443 int n; 444 ASSERT_THAT( 445 n = RetryEINTR(send)(connected_.get(), buf.data(), size, MSG_DONTWAIT), 446 SyscallSucceeds()); 447 448 // We should have written something, but not the whole thing. 449 EXPECT_GT(n, 0); 450 EXPECT_LT(n, size); 451 } 452 453 // Test that a send on a non-blocking socket with a buffer that larger than the 454 // send buffer will not write the whole thing at once. 455 TEST_P(TcpSocketTest, NonblockingLargeSend) { 456 // Set the FD to O_NONBLOCK. 457 int opts; 458 ASSERT_THAT(opts = fcntl(connected_.get(), F_GETFL), SyscallSucceeds()); 459 opts |= O_NONBLOCK; 460 ASSERT_THAT(fcntl(connected_.get(), F_SETFL, opts), SyscallSucceeds()); 461 462 // Allocate a buffer three times the size of the send buffer. We do this on 463 // with a vector to avoid allocating on the stack. 464 int size = 3 * sendbuf_size_; 465 std::vector<char> buf(size); 466 467 // Try to write the whole thing. 468 int n; 469 ASSERT_THAT(n = RetryEINTR(send)(connected_.get(), buf.data(), size, 0), 470 SyscallSucceeds()); 471 472 // We should have written something, but not the whole thing. 473 EXPECT_GT(n, 0); 474 EXPECT_LT(n, size); 475 } 476 477 // Same test as above, but calls send instead of write. 478 TEST_P(TcpSocketTest, BlockingLargeSend) { 479 // Allocate a buffer three times the size of the send buffer. We do this on 480 // with a vector to avoid allocating on the stack. 481 int size = 3 * sendbuf_size_; 482 std::vector<char> writebuf(size); 483 484 // Start reading the response in a loop. 485 int read_bytes = 0; 486 ScopedThread t([this, &read_bytes]() { 487 // Avoid interrupting the blocking write in main thread. 488 const DisableSave disable_save; 489 490 // Take ownership of the FD so that we close it on failure. This will 491 // unblock the blocking write below. 492 FileDescriptor fd(std::move(accepted_)); 493 494 char readbuf[2500] = {}; 495 int n = -1; 496 while (n != 0) { 497 ASSERT_THAT(n = RetryEINTR(read)(fd.get(), &readbuf, sizeof(readbuf)), 498 SyscallSucceeds()); 499 read_bytes += n; 500 } 501 }); 502 503 // Try to send the whole thing. 504 int n; 505 ASSERT_THAT(n = SendFd(connected_.get(), writebuf.data(), size, 0), 506 SyscallSucceeds()); 507 508 // We should have written the whole thing. 509 EXPECT_EQ(n, size); 510 EXPECT_THAT(close(connected_.release()), SyscallSucceedsWithValue(0)); 511 t.Join(); 512 513 // We should have read the whole thing. 514 EXPECT_EQ(read_bytes, size); 515 } 516 517 // Test that polling on a socket with a full send buffer will block. 518 TEST_P(TcpSocketTest, PollWithFullBufferBlocks) { 519 FillSocketBuffers(connected_.get(), accepted_.get()); 520 // Now polling on the FD with a timeout should return 0 corresponding to no 521 // FDs ready. 522 struct pollfd poll_fd = {connected_.get(), POLLOUT, 0}; 523 EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10), SyscallSucceedsWithValue(0)); 524 } 525 526 TEST_P(TcpSocketTest, ClosedWriteBlockingSocket) { 527 FillSocketBuffers(connected_.get(), accepted_.get()); 528 constexpr int timeout = 10; 529 struct timeval tv = {.tv_sec = timeout, .tv_usec = 0}; 530 EXPECT_THAT( 531 setsockopt(connected_.get(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)), 532 SyscallSucceeds()); 533 534 struct timespec begin; 535 struct timespec end; 536 const DisableSave disable_save; // Timing-related. 537 EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &begin), SyscallSucceeds()); 538 539 ScopedThread send_thread([this]() { 540 char send_byte; 541 // Expect the send() to be blocked until receive timeout. 542 ASSERT_THAT( 543 RetryEINTR(send)(connected_.get(), &send_byte, sizeof(send_byte), 0), 544 SyscallFailsWithErrno(EAGAIN)); 545 }); 546 547 // Wait for the thread to be blocked on write. 548 absl::SleepFor(absl::Milliseconds(250)); 549 // Socket close does not have any effect on a blocked write. 550 ASSERT_THAT(close(connected_.release()), SyscallSucceeds()); 551 552 send_thread.Join(); 553 554 EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &end), SyscallSucceeds()); 555 // Check the lower bound on the timeout. Checking for an upper bound is 556 // fragile because Linux can overrun the timeout due to scheduling delays. 557 EXPECT_GT(ms_elapsed(begin, end), timeout * 1000 - 1); 558 } 559 560 TEST_P(TcpSocketTest, ClosedReadBlockingSocket) { 561 constexpr int timeout = 10; 562 struct timeval tv = {.tv_sec = timeout, .tv_usec = 0}; 563 EXPECT_THAT( 564 setsockopt(connected_.get(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), 565 SyscallSucceeds()); 566 567 struct timespec begin; 568 struct timespec end; 569 const DisableSave disable_save; // Timing-related. 570 EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &begin), SyscallSucceeds()); 571 572 ScopedThread read_thread([this]() { 573 char read_byte; 574 // Expect the read() to be blocked until receive timeout. 575 ASSERT_THAT(read(connected_.get(), &read_byte, sizeof(read_byte)), 576 SyscallFailsWithErrno(EAGAIN)); 577 }); 578 579 // Wait for the thread to be blocked on read. 580 absl::SleepFor(absl::Milliseconds(250)); 581 // Socket close does not have any effect on a blocked read. 582 ASSERT_THAT(close(connected_.release()), SyscallSucceeds()); 583 584 read_thread.Join(); 585 586 EXPECT_THAT(clock_gettime(CLOCK_MONOTONIC, &end), SyscallSucceeds()); 587 // Check the lower bound on the timeout. Checking for an upper bound is 588 // fragile because Linux can overrun the timeout due to scheduling delays. 589 EXPECT_GT(ms_elapsed(begin, end), timeout * 1000 - 1); 590 } 591 592 TEST_P(TcpSocketTest, MsgTrunc) { 593 char sent_data[512]; 594 RandomizeBuffer(sent_data, sizeof(sent_data)); 595 ASSERT_THAT( 596 RetryEINTR(send)(connected_.get(), sent_data, sizeof(sent_data), 0), 597 SyscallSucceedsWithValue(sizeof(sent_data))); 598 char received_data[sizeof(sent_data)] = {}; 599 ASSERT_THAT(RetryEINTR(recv)(accepted_.get(), received_data, 600 sizeof(received_data) / 2, MSG_TRUNC), 601 SyscallSucceedsWithValue(sizeof(sent_data) / 2)); 602 603 // Check that we didn't get anything. 604 char zeros[sizeof(received_data)] = {}; 605 EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data))); 606 } 607 608 // MSG_CTRUNC is a return flag but linux allows it to be set on input flags 609 // without returning an error. 610 TEST_P(TcpSocketTest, MsgTruncWithCtrunc) { 611 char sent_data[512]; 612 RandomizeBuffer(sent_data, sizeof(sent_data)); 613 ASSERT_THAT( 614 RetryEINTR(send)(connected_.get(), sent_data, sizeof(sent_data), 0), 615 SyscallSucceedsWithValue(sizeof(sent_data))); 616 char received_data[sizeof(sent_data)] = {}; 617 ASSERT_THAT( 618 RetryEINTR(recv)(accepted_.get(), received_data, 619 sizeof(received_data) / 2, MSG_TRUNC | MSG_CTRUNC), 620 SyscallSucceedsWithValue(sizeof(sent_data) / 2)); 621 622 // Check that we didn't get anything. 623 char zeros[sizeof(received_data)] = {}; 624 EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data))); 625 } 626 627 // This test will verify that MSG_CTRUNC doesn't do anything when specified 628 // on input. 629 TEST_P(TcpSocketTest, MsgTruncWithCtruncOnly) { 630 char sent_data[512]; 631 RandomizeBuffer(sent_data, sizeof(sent_data)); 632 ASSERT_THAT( 633 RetryEINTR(send)(connected_.get(), sent_data, sizeof(sent_data), 0), 634 SyscallSucceedsWithValue(sizeof(sent_data))); 635 char received_data[sizeof(sent_data)] = {}; 636 ASSERT_THAT(RetryEINTR(recv)(accepted_.get(), received_data, 637 sizeof(received_data) / 2, MSG_CTRUNC), 638 SyscallSucceedsWithValue(sizeof(sent_data) / 2)); 639 640 // Since MSG_CTRUNC here had no affect, it should not behave like MSG_TRUNC. 641 EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data) / 2)); 642 } 643 644 TEST_P(TcpSocketTest, MsgTruncLargeSize) { 645 char sent_data[512]; 646 RandomizeBuffer(sent_data, sizeof(sent_data)); 647 ASSERT_THAT( 648 RetryEINTR(send)(connected_.get(), sent_data, sizeof(sent_data), 0), 649 SyscallSucceedsWithValue(sizeof(sent_data))); 650 char received_data[sizeof(sent_data) * 2] = {}; 651 ASSERT_THAT(RetryEINTR(recv)(accepted_.get(), received_data, 652 sizeof(received_data), MSG_TRUNC), 653 SyscallSucceedsWithValue(sizeof(sent_data))); 654 655 // Check that we didn't get anything. 656 char zeros[sizeof(received_data)] = {}; 657 EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data))); 658 } 659 660 TEST_P(TcpSocketTest, MsgTruncPeek) { 661 char sent_data[512]; 662 RandomizeBuffer(sent_data, sizeof(sent_data)); 663 ASSERT_THAT( 664 RetryEINTR(send)(connected_.get(), sent_data, sizeof(sent_data), 0), 665 SyscallSucceedsWithValue(sizeof(sent_data))); 666 char received_data[sizeof(sent_data)] = {}; 667 ASSERT_THAT(RetryEINTR(recv)(accepted_.get(), received_data, 668 sizeof(received_data) / 2, MSG_TRUNC | MSG_PEEK), 669 SyscallSucceedsWithValue(sizeof(sent_data) / 2)); 670 671 // Check that we didn't get anything. 672 char zeros[sizeof(received_data)] = {}; 673 EXPECT_EQ(0, memcmp(zeros, received_data, sizeof(received_data))); 674 675 // Check that we can still get all of the data. 676 ASSERT_THAT(RetryEINTR(recv)(accepted_.get(), received_data, 677 sizeof(received_data), 0), 678 SyscallSucceedsWithValue(sizeof(sent_data))); 679 EXPECT_EQ(0, memcmp(sent_data, received_data, sizeof(sent_data))); 680 } 681 682 TEST_P(TcpSocketTest, NoDelayDefault) { 683 int get = -1; 684 socklen_t get_len = sizeof(get); 685 EXPECT_THAT( 686 getsockopt(connected_.get(), IPPROTO_TCP, TCP_NODELAY, &get, &get_len), 687 SyscallSucceedsWithValue(0)); 688 EXPECT_EQ(get_len, sizeof(get)); 689 EXPECT_EQ(get, kSockOptOff); 690 } 691 692 TEST_P(TcpSocketTest, SetNoDelay) { 693 ASSERT_THAT(setsockopt(connected_.get(), IPPROTO_TCP, TCP_NODELAY, 694 &kSockOptOn, sizeof(kSockOptOn)), 695 SyscallSucceeds()); 696 697 int get = -1; 698 socklen_t get_len = sizeof(get); 699 EXPECT_THAT( 700 getsockopt(connected_.get(), IPPROTO_TCP, TCP_NODELAY, &get, &get_len), 701 SyscallSucceedsWithValue(0)); 702 EXPECT_EQ(get_len, sizeof(get)); 703 EXPECT_EQ(get, kSockOptOn); 704 705 ASSERT_THAT(setsockopt(connected_.get(), IPPROTO_TCP, TCP_NODELAY, 706 &kSockOptOff, sizeof(kSockOptOff)), 707 SyscallSucceeds()); 708 709 EXPECT_THAT( 710 getsockopt(connected_.get(), IPPROTO_TCP, TCP_NODELAY, &get, &get_len), 711 SyscallSucceedsWithValue(0)); 712 EXPECT_EQ(get_len, sizeof(get)); 713 EXPECT_EQ(get, kSockOptOff); 714 } 715 716 #ifndef TCP_INQ 717 #define TCP_INQ 36 718 #endif 719 720 TEST_P(TcpSocketTest, TcpInqSetSockOpt) { 721 char buf[1024]; 722 ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, sizeof(buf)), 723 SyscallSucceedsWithValue(sizeof(buf))); 724 725 // TCP_INQ is disabled by default. 726 int val = -1; 727 socklen_t slen = sizeof(val); 728 EXPECT_THAT(getsockopt(accepted_.get(), SOL_TCP, TCP_INQ, &val, &slen), 729 SyscallSucceedsWithValue(0)); 730 ASSERT_EQ(val, 0); 731 732 // Try to set TCP_INQ. 733 val = 1; 734 EXPECT_THAT(setsockopt(accepted_.get(), SOL_TCP, TCP_INQ, &val, sizeof(val)), 735 SyscallSucceedsWithValue(0)); 736 val = -1; 737 slen = sizeof(val); 738 EXPECT_THAT(getsockopt(accepted_.get(), SOL_TCP, TCP_INQ, &val, &slen), 739 SyscallSucceedsWithValue(0)); 740 ASSERT_EQ(val, 1); 741 742 // Try to unset TCP_INQ. 743 val = 0; 744 EXPECT_THAT(setsockopt(accepted_.get(), SOL_TCP, TCP_INQ, &val, sizeof(val)), 745 SyscallSucceedsWithValue(0)); 746 val = -1; 747 slen = sizeof(val); 748 EXPECT_THAT(getsockopt(accepted_.get(), SOL_TCP, TCP_INQ, &val, &slen), 749 SyscallSucceedsWithValue(0)); 750 ASSERT_EQ(val, 0); 751 } 752 753 TEST_P(TcpSocketTest, TcpInq) { 754 char buf[1024]; 755 // Write more than one TCP segment. 756 int size = sizeof(buf); 757 int kChunk = sizeof(buf) / 4; 758 for (int i = 0; i < size; i += kChunk) { 759 ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, kChunk), 760 SyscallSucceedsWithValue(kChunk)); 761 } 762 763 int val = 1; 764 kChunk = sizeof(buf) / 2; 765 EXPECT_THAT(setsockopt(accepted_.get(), SOL_TCP, TCP_INQ, &val, sizeof(val)), 766 SyscallSucceedsWithValue(0)); 767 768 // Wait when all data will be in the received queue. 769 while (true) { 770 ASSERT_THAT(ioctl(accepted_.get(), TIOCINQ, &size), SyscallSucceeds()); 771 if (size == sizeof(buf)) { 772 break; 773 } 774 absl::SleepFor(absl::Milliseconds(10)); 775 } 776 777 struct msghdr msg = {}; 778 std::vector<char> control(CMSG_SPACE(sizeof(int))); 779 size = sizeof(buf); 780 struct iovec iov; 781 while (size != 0) { 782 msg.msg_control = &control[0]; 783 msg.msg_controllen = control.size(); 784 785 iov.iov_base = buf; 786 iov.iov_len = kChunk; 787 msg.msg_iov = &iov; 788 msg.msg_iovlen = 1; 789 ASSERT_THAT(RetryEINTR(recvmsg)(accepted_.get(), &msg, 0), 790 SyscallSucceedsWithValue(kChunk)); 791 size -= kChunk; 792 793 struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); 794 ASSERT_NE(cmsg, nullptr); 795 ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int))); 796 ASSERT_EQ(cmsg->cmsg_level, SOL_TCP); 797 ASSERT_EQ(cmsg->cmsg_type, TCP_INQ); 798 799 int inq = 0; 800 memcpy(&inq, CMSG_DATA(cmsg), sizeof(int)); 801 ASSERT_EQ(inq, size); 802 } 803 } 804 805 TEST_P(TcpSocketTest, Tiocinq) { 806 char buf[1024]; 807 int size = sizeof(buf); 808 ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, size), 809 SyscallSucceedsWithValue(size)); 810 811 uint32_t seed = time(nullptr); 812 const size_t max_chunk = size / 10; 813 while (size > 0) { 814 size_t chunk = (rand_r(&seed) % max_chunk) + 1; 815 ssize_t read = 816 RetryEINTR(recvfrom)(accepted_.get(), buf, chunk, 0, nullptr, nullptr); 817 ASSERT_THAT(read, SyscallSucceeds()); 818 size -= read; 819 820 // The remaining data should end up in the receive queue. 821 constexpr absl::Duration kSleepFor = absl::Milliseconds(10); 822 int inq = 0; 823 for (const auto start = absl::Now(); 824 absl::Now() <= start + absl::Milliseconds(kTimeoutMillis);) { 825 ASSERT_THAT(ioctl(accepted_.get(), TIOCINQ, &inq), SyscallSucceeds()); 826 if (size == inq) { 827 break; 828 } 829 absl::SleepFor(kSleepFor); 830 } 831 832 ASSERT_EQ(inq, size); 833 } 834 } 835 836 TEST_P(TcpSocketTest, TcpSCMPriority) { 837 char buf[1024]; 838 ASSERT_THAT(RetryEINTR(write)(connected_.get(), buf, sizeof(buf)), 839 SyscallSucceedsWithValue(sizeof(buf))); 840 841 int val = 1; 842 EXPECT_THAT(setsockopt(accepted_.get(), SOL_TCP, TCP_INQ, &val, sizeof(val)), 843 SyscallSucceedsWithValue(0)); 844 EXPECT_THAT( 845 setsockopt(accepted_.get(), SOL_SOCKET, SO_TIMESTAMP, &val, sizeof(val)), 846 SyscallSucceedsWithValue(0)); 847 848 struct msghdr msg = {}; 849 std::vector<char> control( 850 CMSG_SPACE(sizeof(struct timeval) + CMSG_SPACE(sizeof(int)))); 851 struct iovec iov; 852 msg.msg_control = &control[0]; 853 msg.msg_controllen = control.size(); 854 855 iov.iov_base = buf; 856 iov.iov_len = sizeof(buf); 857 msg.msg_iov = &iov; 858 msg.msg_iovlen = 1; 859 ASSERT_THAT(RetryEINTR(recvmsg)(accepted_.get(), &msg, 0), 860 SyscallSucceedsWithValue(sizeof(buf))); 861 862 struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg); 863 ASSERT_NE(cmsg, nullptr); 864 // TODO(b/78348848): SO_TIMESTAMP isn't implemented for TCP sockets. 865 if (!IsRunningOnGvisor() || cmsg->cmsg_level == SOL_SOCKET) { 866 ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET); 867 ASSERT_EQ(cmsg->cmsg_type, SO_TIMESTAMP); 868 ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(struct timeval))); 869 870 cmsg = CMSG_NXTHDR(&msg, cmsg); 871 ASSERT_NE(cmsg, nullptr); 872 } 873 ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(int))); 874 ASSERT_EQ(cmsg->cmsg_level, SOL_TCP); 875 ASSERT_EQ(cmsg->cmsg_type, TCP_INQ); 876 877 int inq = 0; 878 memcpy(&inq, CMSG_DATA(cmsg), sizeof(int)); 879 ASSERT_EQ(inq, 0); 880 881 cmsg = CMSG_NXTHDR(&msg, cmsg); 882 ASSERT_EQ(cmsg, nullptr); 883 } 884 885 TEST_P(TcpSocketTest, TimeWaitPollHUP) { 886 shutdown(connected_.get(), SHUT_RDWR); 887 ScopedThread t([&]() { 888 constexpr int16_t want_events = POLLHUP; 889 struct pollfd pfd = { 890 .fd = connected_.get(), 891 .events = want_events, 892 }; 893 ASSERT_THAT(poll(&pfd, 1, kTimeoutMillis), SyscallSucceedsWithValue(1)); 894 }); 895 shutdown(accepted_.get(), SHUT_RDWR); 896 t.Join(); 897 // At this point first_fd should be in TIME-WAIT and polling for POLLHUP 898 // should return with 1 FD. 899 constexpr int16_t want_events = POLLHUP; 900 struct pollfd pfd = { 901 .fd = connected_.get(), 902 .events = want_events, 903 }; 904 ASSERT_THAT(poll(&pfd, 1, kTimeoutMillis), SyscallSucceedsWithValue(1)); 905 } 906 907 // Tests that send will return EWOULDBLOCK initially with large buffer and will 908 // succeed after the send buffer size is increased. 909 TEST_P(TcpSocketTest, SendUnblocksOnSendBufferIncrease) { 910 // Set the FD to O_NONBLOCK. 911 int opts; 912 ASSERT_THAT(opts = fcntl(connected_.get(), F_GETFL), SyscallSucceeds()); 913 opts |= O_NONBLOCK; 914 ASSERT_THAT(fcntl(connected_.get(), F_SETFL, opts), SyscallSucceeds()); 915 916 // Get maximum buffer size by trying to set it to a large value. 917 constexpr int kSndBufSz = 0xffffffff; 918 ASSERT_THAT(setsockopt(connected_.get(), SOL_SOCKET, SO_SNDBUF, &kSndBufSz, 919 sizeof(kSndBufSz)), 920 SyscallSucceeds()); 921 922 int max_buffer_sz = 0; 923 socklen_t max_len = sizeof(max_buffer_sz); 924 ASSERT_THAT(getsockopt(connected_.get(), SOL_SOCKET, SO_SNDBUF, 925 &max_buffer_sz, &max_len), 926 SyscallSucceeds()); 927 928 int buffer_sz = max_buffer_sz >> 2; 929 EXPECT_THAT(setsockopt(connected_.get(), SOL_SOCKET, SO_SNDBUF, &buffer_sz, 930 sizeof(buffer_sz)), 931 SyscallSucceedsWithValue(0)); 932 933 // Create a large buffer that will be used for sending. 934 std::vector<char> buffer(max_buffer_sz); 935 936 // Write until we receive an error. 937 while (RetryEINTR(send)(connected_.get(), buffer.data(), buffer.size(), 0) != 938 -1) { 939 // Sleep to give linux a chance to move data from the send buffer to the 940 // receive buffer. 941 absl::SleepFor(absl::Milliseconds(10)); // 10ms. 942 } 943 944 // The last error should have been EWOULDBLOCK. 945 ASSERT_EQ(errno, EWOULDBLOCK); 946 947 ScopedThread send_thread([this]() { 948 int flags = 0; 949 ASSERT_THAT(flags = fcntl(connected_.get(), F_GETFL), SyscallSucceeds()); 950 EXPECT_THAT(fcntl(connected_.get(), F_SETFL, flags & ~O_NONBLOCK), 951 SyscallSucceeds()); 952 953 // Expect the send() to succeed. 954 char buffer; 955 ASSERT_THAT(RetryEINTR(send)(connected_.get(), &buffer, sizeof(buffer), 0), 956 SyscallSucceeds()); 957 }); 958 959 // Set SO_SNDBUF to maximum buffer size allowed. 960 buffer_sz = max_buffer_sz >> 1; 961 EXPECT_THAT(setsockopt(connected_.get(), SOL_SOCKET, SO_SNDBUF, &buffer_sz, 962 sizeof(buffer_sz)), 963 SyscallSucceedsWithValue(0)); 964 965 send_thread.Join(); 966 } 967 968 INSTANTIATE_TEST_SUITE_P(AllInetTests, TcpSocketTest, 969 ::testing::Values(AF_INET, AF_INET6)); 970 971 // Fixture for tests parameterized by address family that don't want the fixture 972 // to do things. 973 using SimpleTcpSocketTest = ::testing::TestWithParam<int>; 974 975 TEST_P(SimpleTcpSocketTest, SendUnconnected) { 976 int fd; 977 ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), 978 SyscallSucceeds()); 979 FileDescriptor sock_fd(fd); 980 981 char data = '\0'; 982 EXPECT_THAT(RetryEINTR(send)(fd, &data, sizeof(data), 0), 983 SyscallFailsWithErrno(EPIPE)); 984 } 985 986 TEST_P(SimpleTcpSocketTest, SendtoWithoutAddressUnconnected) { 987 int fd; 988 ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), 989 SyscallSucceeds()); 990 FileDescriptor sock_fd(fd); 991 992 char data = '\0'; 993 EXPECT_THAT(RetryEINTR(sendto)(fd, &data, sizeof(data), 0, nullptr, 0), 994 SyscallFailsWithErrno(EPIPE)); 995 } 996 997 TEST_P(SimpleTcpSocketTest, SendtoWithAddressUnconnected) { 998 int fd; 999 ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), 1000 SyscallSucceeds()); 1001 FileDescriptor sock_fd(fd); 1002 1003 sockaddr_storage addr = 1004 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 1005 char data = '\0'; 1006 EXPECT_THAT(RetryEINTR(sendto)(fd, &data, sizeof(data), 0, AsSockAddr(&addr), 1007 sizeof(addr)), 1008 SyscallFailsWithErrno(EPIPE)); 1009 } 1010 1011 TEST_P(SimpleTcpSocketTest, GetPeerNameUnconnected) { 1012 int fd; 1013 ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), 1014 SyscallSucceeds()); 1015 FileDescriptor sock_fd(fd); 1016 1017 sockaddr_storage addr; 1018 socklen_t addrlen = sizeof(addr); 1019 EXPECT_THAT(getpeername(fd, AsSockAddr(&addr), &addrlen), 1020 SyscallFailsWithErrno(ENOTCONN)); 1021 } 1022 1023 TEST_P(SimpleTcpSocketTest, GetSockNameUnbound) { 1024 int fd; 1025 ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), 1026 SyscallSucceeds()); 1027 FileDescriptor sock_fd(fd); 1028 1029 sockaddr_storage addr; 1030 // Ensure that any 0s we read later have been explicitly set by getsockname. 1031 memset(&addr, -1, sizeof(addr)); 1032 socklen_t addrlen = sizeof(addr); 1033 EXPECT_THAT(getsockname(fd, AsSockAddr(&addr), &addrlen), SyscallSucceeds()); 1034 switch (GetParam()) { 1035 case AF_INET: { 1036 ASSERT_EQ(addrlen, sizeof(sockaddr_in)); 1037 auto sock_addr_in = reinterpret_cast<const sockaddr_in*>(&addr); 1038 ASSERT_EQ(sock_addr_in->sin_addr.s_addr, 0); 1039 ASSERT_EQ(sock_addr_in->sin_port, 0); 1040 break; 1041 } 1042 case AF_INET6: { 1043 ASSERT_EQ(addrlen, sizeof(sockaddr_in6)); 1044 auto sock_addr_in6 = reinterpret_cast<const sockaddr_in6*>(&addr); 1045 ASSERT_TRUE(IN6_IS_ADDR_UNSPECIFIED(&sock_addr_in6->sin6_addr)); 1046 ASSERT_EQ(sock_addr_in6->sin6_port, 0); 1047 break; 1048 } 1049 default: { 1050 ADD_FAILURE() << "unreachable"; 1051 break; 1052 } 1053 } 1054 } 1055 1056 TEST_P(TcpSocketTest, FullBuffer) { 1057 // Set both FDs to be blocking. 1058 int flags = 0; 1059 ASSERT_THAT(flags = fcntl(connected_.get(), F_GETFL), SyscallSucceeds()); 1060 EXPECT_THAT(fcntl(connected_.get(), F_SETFL, flags & ~O_NONBLOCK), 1061 SyscallSucceeds()); 1062 flags = 0; 1063 ASSERT_THAT(flags = fcntl(accepted_.get(), F_GETFL), SyscallSucceeds()); 1064 EXPECT_THAT(fcntl(accepted_.get(), F_SETFL, flags & ~O_NONBLOCK), 1065 SyscallSucceeds()); 1066 1067 // 2500 was chosen as a small value that can be set on Linux. 1068 int set_snd = 2500; 1069 EXPECT_THAT(setsockopt(connected_.get(), SOL_SOCKET, SO_SNDBUF, &set_snd, 1070 sizeof(set_snd)), 1071 SyscallSucceedsWithValue(0)); 1072 int get_snd = -1; 1073 socklen_t get_snd_len = sizeof(get_snd); 1074 EXPECT_THAT(getsockopt(connected_.get(), SOL_SOCKET, SO_SNDBUF, &get_snd, 1075 &get_snd_len), 1076 SyscallSucceedsWithValue(0)); 1077 EXPECT_EQ(get_snd_len, sizeof(get_snd)); 1078 EXPECT_GT(get_snd, 0); 1079 1080 // 2500 was chosen as a small value that can be set on Linux and gVisor. 1081 int set_rcv = 2500; 1082 EXPECT_THAT(setsockopt(accepted_.get(), SOL_SOCKET, SO_RCVBUF, &set_rcv, 1083 sizeof(set_rcv)), 1084 SyscallSucceedsWithValue(0)); 1085 int get_rcv = -1; 1086 socklen_t get_rcv_len = sizeof(get_rcv); 1087 EXPECT_THAT(getsockopt(accepted_.get(), SOL_SOCKET, SO_RCVBUF, &get_rcv, 1088 &get_rcv_len), 1089 SyscallSucceedsWithValue(0)); 1090 EXPECT_EQ(get_rcv_len, sizeof(get_rcv)); 1091 EXPECT_GE(get_rcv, 2500); 1092 1093 // Quick sanity test. 1094 EXPECT_LT(get_snd + get_rcv, 2500 * IOV_MAX); 1095 1096 char data[2500] = {}; 1097 std::vector<struct iovec> iovecs; 1098 for (int i = 0; i < IOV_MAX; i++) { 1099 struct iovec iov = {}; 1100 iov.iov_base = data; 1101 iov.iov_len = sizeof(data); 1102 iovecs.push_back(iov); 1103 } 1104 ScopedThread t([this, &iovecs]() { 1105 int result = -1; 1106 EXPECT_THAT(result = RetryEINTR(writev)(connected_.get(), iovecs.data(), 1107 iovecs.size()), 1108 SyscallSucceeds()); 1109 EXPECT_GT(result, 1); 1110 EXPECT_LT(result, sizeof(data) * iovecs.size()); 1111 }); 1112 1113 char recv = 0; 1114 EXPECT_THAT(RetryEINTR(read)(accepted_.get(), &recv, 1), 1115 SyscallSucceedsWithValue(1)); 1116 EXPECT_THAT(close(accepted_.release()), SyscallSucceedsWithValue(0)); 1117 } 1118 1119 TEST_P(TcpSocketTest, PollAfterShutdown) { 1120 ScopedThread client_thread([this]() { 1121 EXPECT_THAT(shutdown(connected_.get(), SHUT_WR), 1122 SyscallSucceedsWithValue(0)); 1123 struct pollfd poll_fd = {connected_.get(), POLLIN | POLLERR | POLLHUP, 0}; 1124 EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMillis), 1125 SyscallSucceedsWithValue(1)); 1126 }); 1127 1128 EXPECT_THAT(shutdown(accepted_.get(), SHUT_WR), SyscallSucceedsWithValue(0)); 1129 struct pollfd poll_fd = {accepted_.get(), POLLIN | POLLERR | POLLHUP, 0}; 1130 EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMillis), 1131 SyscallSucceedsWithValue(1)); 1132 } 1133 1134 TEST_P(SimpleTcpSocketTest, PollAroundAccept) { 1135 const FileDescriptor listener = 1136 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1137 sockaddr_storage addr = 1138 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 1139 socklen_t addrlen = sizeof(addr); 1140 1141 // Bind to some port. 1142 ASSERT_THAT(bind(listener.get(), AsSockAddr(&addr), addrlen), 1143 SyscallSucceeds()); 1144 ASSERT_THAT(listen(listener.get(), SOMAXCONN), SyscallSucceeds()); 1145 1146 // Get the address we're bound to. We need to do this because we're allowing 1147 // the stack to pick a port for us. 1148 ASSERT_THAT(getsockname(listener.get(), AsSockAddr(&addr), &addrlen), 1149 SyscallSucceeds()); 1150 switch (GetParam()) { 1151 case AF_INET: 1152 ASSERT_EQ(addrlen, sizeof(sockaddr_in)); 1153 break; 1154 case AF_INET6: 1155 ASSERT_EQ(addrlen, sizeof(sockaddr_in6)); 1156 break; 1157 } 1158 1159 // Before the listener socket receives a connection, it should not be eligible 1160 // for reading. 1161 struct pollfd poll_fd = {listener.get(), POLLIN, 0}; 1162 EXPECT_THAT(RetryEINTR(poll)(&poll_fd, /* nfds */ 1, /* timeout */ 0), 1163 SyscallSucceedsWithValue(0)); 1164 1165 FileDescriptor connector = 1166 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1167 ASSERT_THAT(RetryEINTR(connect)(connector.get(), AsSockAddr(&addr), addrlen), 1168 SyscallSucceeds()); 1169 1170 // Now that a connection is pending, the listener is ready for a read. 1171 ASSERT_THAT( 1172 RetryEINTR(poll)(&poll_fd, /* nfds */ 1, /* infinite timeout */ -1), 1173 SyscallSucceedsWithValue(1)); 1174 1175 // Accept the connection. This should make the listener no longer ready for a 1176 // read. 1177 const FileDescriptor accepted = 1178 ASSERT_NO_ERRNO_AND_VALUE(Accept(listener.get(), nullptr, nullptr)); 1179 EXPECT_THAT(RetryEINTR(poll)(&poll_fd, /* nfds*/ 1, /* timeout */ 0), 1180 SyscallSucceedsWithValue(0)); 1181 } 1182 1183 TEST_P(SimpleTcpSocketTest, NonBlockingConnectRetry) { 1184 const FileDescriptor listener = 1185 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1186 1187 // Initialize address to the loopback one. 1188 sockaddr_storage addr = 1189 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 1190 socklen_t addrlen = sizeof(addr); 1191 1192 // Bind to some port but don't listen yet. 1193 ASSERT_THAT(bind(listener.get(), AsSockAddr(&addr), addrlen), 1194 SyscallSucceeds()); 1195 1196 // Get the address we're bound to, then connect to it. We need to do this 1197 // because we're allowing the stack to pick a port for us. 1198 ASSERT_THAT(getsockname(listener.get(), AsSockAddr(&addr), &addrlen), 1199 SyscallSucceeds()); 1200 1201 FileDescriptor connector = 1202 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1203 1204 // Verify that connect fails. 1205 ASSERT_THAT(RetryEINTR(connect)(connector.get(), AsSockAddr(&addr), addrlen), 1206 SyscallFailsWithErrno(ECONNREFUSED)); 1207 1208 // Now start listening 1209 ASSERT_THAT(listen(listener.get(), SOMAXCONN), SyscallSucceeds()); 1210 1211 // TODO(gvisor.dev/issue/3828): Issuing connect() again on a socket that 1212 // failed first connect should succeed. 1213 if (IsRunningOnGvisor()) { 1214 ASSERT_THAT( 1215 RetryEINTR(connect)(connector.get(), AsSockAddr(&addr), addrlen), 1216 SyscallFailsWithErrno(ECONNABORTED)); 1217 return; 1218 } 1219 1220 // Verify that connect now succeeds. 1221 ASSERT_THAT(RetryEINTR(connect)(connector.get(), AsSockAddr(&addr), addrlen), 1222 SyscallSucceeds()); 1223 1224 // Accept the connection. 1225 const FileDescriptor accepted = 1226 ASSERT_NO_ERRNO_AND_VALUE(Accept(listener.get(), nullptr, nullptr)); 1227 } 1228 1229 // nonBlockingConnectNoListener returns a socket on which a connect that is 1230 // expected to fail has been issued. The address to which the connect is issued 1231 // is written to `addr` and `addrlen`. 1232 PosixErrorOr<FileDescriptor> nonBlockingConnectNoListener( 1233 const int family, sockaddr_storage& addr, socklen_t& addrlen) { 1234 // We will first create a socket and bind to ensure we bind a port but will 1235 // not call listen on this socket. 1236 // Then we will create a new socket that will connect to the port bound by 1237 // the first socket and that shoud fail. 1238 constexpr int sock_type = SOCK_STREAM | SOCK_NONBLOCK; 1239 int b_sock; 1240 RETURN_ERROR_IF_SYSCALL_FAIL(b_sock = socket(family, sock_type, IPPROTO_TCP)); 1241 FileDescriptor b(b_sock); 1242 EXPECT_THAT(bind(b.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds()); 1243 1244 // Get the address bound by the listening socket. 1245 EXPECT_THAT(getsockname(b.get(), AsSockAddr(&addr), &addrlen), 1246 SyscallSucceeds()); 1247 1248 // Now create another socket and issue a connect on this one. This connect 1249 // should fail as there is no listener. 1250 int c_sock; 1251 RETURN_ERROR_IF_SYSCALL_FAIL(c_sock = socket(family, sock_type, IPPROTO_TCP)); 1252 FileDescriptor s(c_sock); 1253 1254 // Now connect to the bound address and this should fail as nothing 1255 // is listening on the bound address. 1256 EXPECT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen), 1257 SyscallFailsWithErrno(EINPROGRESS)); 1258 1259 // Wait for the connect to fail. 1260 struct pollfd poll_fd = {s.get(), POLLERR, 0}; 1261 EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMillis), 1262 SyscallSucceedsWithValue(1)); 1263 return std::move(s); 1264 } 1265 1266 TEST_P(SimpleTcpSocketTest, NonBlockingConnectNoListener) { 1267 sockaddr_storage addr = 1268 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 1269 socklen_t addrlen = sizeof(addr); 1270 1271 const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( 1272 nonBlockingConnectNoListener(GetParam(), addr, addrlen)); 1273 ASSERT_NE(GetPort(addr).value(), 0); 1274 1275 int err; 1276 socklen_t optlen = sizeof(err); 1277 ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ERROR, &err, &optlen), 1278 SyscallSucceeds()); 1279 ASSERT_EQ(optlen, sizeof(err)); 1280 EXPECT_EQ(err, ECONNREFUSED); 1281 1282 unsigned char c; 1283 ASSERT_THAT(read(s.get(), &c, sizeof(c)), SyscallSucceedsWithValue(0)); 1284 int opts; 1285 EXPECT_THAT(opts = fcntl(s.get(), F_GETFL), SyscallSucceeds()); 1286 opts &= ~O_NONBLOCK; 1287 EXPECT_THAT(fcntl(s.get(), F_SETFL, opts), SyscallSucceeds()); 1288 // Try connecting again. 1289 ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen), 1290 SyscallFailsWithErrno(ECONNABORTED)); 1291 } 1292 1293 TEST_P(SimpleTcpSocketTest, ListenConnectParallel) { 1294 // TODO(b/171436815): Re-enable when S/R is fixed. 1295 const DisableSave disable_save; 1296 int family = GetParam(); 1297 sockaddr_storage addr = 1298 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 1299 socklen_t addrlen = sizeof(addr); 1300 constexpr int sock_type = SOCK_STREAM; 1301 1302 FileDescriptor l = 1303 ASSERT_NO_ERRNO_AND_VALUE(Socket(family, sock_type, IPPROTO_TCP)); 1304 EXPECT_THAT(bind(l.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds()); 1305 1306 // Get the address bound by the listening socket. 1307 EXPECT_THAT(getsockname(l.get(), AsSockAddr(&addr), &addrlen), 1308 SyscallSucceeds()); 1309 1310 constexpr int num_threads = 100; 1311 ScopedThread t([&l]() { 1312 absl::SleepFor(absl::Microseconds(1000)); 1313 EXPECT_THAT(listen(l.get(), num_threads), SyscallSucceeds()); 1314 }); 1315 1316 // Initiate connects in a separate thread. 1317 std::vector<std::unique_ptr<ScopedThread>> threads; 1318 threads.reserve(num_threads); 1319 for (int i = 0; i < num_threads; i++) { 1320 threads.push_back( 1321 std::make_unique<ScopedThread>([&addr, &addrlen, family]() { 1322 const FileDescriptor c = ASSERT_NO_ERRNO_AND_VALUE( 1323 Socket(family, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); 1324 1325 // Now connect to the bound address and this should fail as nothing 1326 // is listening on the bound address. 1327 EXPECT_THAT(RetryEINTR(connect)(c.get(), AsSockAddr(&addr), addrlen), 1328 SyscallFailsWithErrno(EINPROGRESS)); 1329 // Wait for the connect to fail or succeed as it can race with the 1330 // socket listening. 1331 struct pollfd poll_fd = {c.get(), POLLERR | POLLOUT, 0}; 1332 EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 1000), 1333 SyscallSucceedsWithValue(1)); 1334 })); 1335 } 1336 } 1337 1338 TEST_P(SimpleTcpSocketTest, NonBlockingConnectNoListenerRead) { 1339 sockaddr_storage addr = 1340 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 1341 socklen_t addrlen = sizeof(addr); 1342 1343 const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( 1344 nonBlockingConnectNoListener(GetParam(), addr, addrlen)); 1345 ASSERT_NE(GetPort(addr).value(), 0); 1346 1347 unsigned char c; 1348 ASSERT_THAT(read(s.get(), &c, 1), SyscallFailsWithErrno(ECONNREFUSED)); 1349 ASSERT_THAT(read(s.get(), &c, 1), SyscallSucceedsWithValue(0)); 1350 ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen), 1351 SyscallFailsWithErrno(ECONNABORTED)); 1352 } 1353 1354 TEST_P(SimpleTcpSocketTest, NonBlockingConnectNoListenerPeek) { 1355 sockaddr_storage addr = 1356 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 1357 socklen_t addrlen = sizeof(addr); 1358 1359 const FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( 1360 nonBlockingConnectNoListener(GetParam(), addr, addrlen)); 1361 ASSERT_NE(GetPort(addr).value(), 0); 1362 1363 unsigned char c; 1364 ASSERT_THAT(recv(s.get(), &c, 1, MSG_PEEK), 1365 SyscallFailsWithErrno(ECONNREFUSED)); 1366 ASSERT_THAT(recv(s.get(), &c, 1, MSG_PEEK), SyscallSucceedsWithValue(0)); 1367 ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen), 1368 SyscallFailsWithErrno(ECONNABORTED)); 1369 } 1370 1371 TEST_P(SimpleTcpSocketTest, SelfConnectSendRecv) { 1372 // Initialize address to the loopback one. 1373 sockaddr_storage addr = 1374 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 1375 socklen_t addrlen = sizeof(addr); 1376 1377 const FileDescriptor s = 1378 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1379 1380 ASSERT_THAT((bind)(s.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds()); 1381 // Get the bound port. 1382 ASSERT_THAT(getsockname(s.get(), AsSockAddr(&addr), &addrlen), 1383 SyscallSucceeds()); 1384 ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen), 1385 SyscallSucceeds()); 1386 1387 constexpr int kBufSz = 1 << 20; // 1 MiB 1388 std::vector<char> writebuf(kBufSz); 1389 1390 // Start reading the response in a loop. 1391 int read_bytes = 0; 1392 ScopedThread t([&s, &read_bytes]() { 1393 // Too many syscalls. 1394 const DisableSave disable_save; 1395 1396 char readbuf[2500] = {}; 1397 int n = -1; 1398 while (n != 0) { 1399 ASSERT_THAT(n = RetryEINTR(read)(s.get(), &readbuf, sizeof(readbuf)), 1400 SyscallSucceeds()); 1401 read_bytes += n; 1402 } 1403 }); 1404 1405 // Try to send the whole thing. 1406 int n; 1407 ASSERT_THAT(n = SendFd(s.get(), writebuf.data(), kBufSz, 0), 1408 SyscallSucceeds()); 1409 1410 // We should have written the whole thing. 1411 EXPECT_EQ(n, kBufSz); 1412 EXPECT_THAT(shutdown(s.get(), SHUT_WR), SyscallSucceedsWithValue(0)); 1413 t.Join(); 1414 1415 // We should have read the whole thing. 1416 EXPECT_EQ(read_bytes, kBufSz); 1417 } 1418 1419 TEST_P(SimpleTcpSocketTest, SelfConnectSend) { 1420 // Initialize address to the loopback one. 1421 sockaddr_storage addr = 1422 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 1423 socklen_t addrlen = sizeof(addr); 1424 1425 const FileDescriptor s = 1426 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1427 1428 constexpr int max_seg = 256; 1429 ASSERT_THAT( 1430 setsockopt(s.get(), SOL_TCP, TCP_MAXSEG, &max_seg, sizeof(max_seg)), 1431 SyscallSucceeds()); 1432 1433 ASSERT_THAT(bind(s.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds()); 1434 // Get the bound port. 1435 ASSERT_THAT(getsockname(s.get(), AsSockAddr(&addr), &addrlen), 1436 SyscallSucceeds()); 1437 ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen), 1438 SyscallSucceeds()); 1439 1440 // Ensure the write buffer is large enough not to block on a single write. 1441 size_t write_size = 128 << 10; // 128 KiB. 1442 EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_SNDBUF, &write_size, 1443 sizeof(write_size)), 1444 SyscallSucceedsWithValue(0)); 1445 1446 std::vector<char> writebuf(write_size); 1447 1448 // Try to send the whole thing. 1449 int n; 1450 ASSERT_THAT(n = SendFd(s.get(), writebuf.data(), writebuf.size(), 0), 1451 SyscallSucceeds()); 1452 1453 // We should have written the whole thing. 1454 EXPECT_EQ(n, writebuf.size()); 1455 EXPECT_THAT(shutdown(s.get(), SHUT_WR), SyscallSucceedsWithValue(0)); 1456 } 1457 1458 TEST_P(SimpleTcpSocketTest, SelfConnectSendShutdownWrite) { 1459 // Initialize address to the loopback one. 1460 sockaddr_storage addr = 1461 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 1462 socklen_t addrlen = sizeof(addr); 1463 1464 const FileDescriptor s = 1465 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1466 1467 ASSERT_THAT(bind(s.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds()); 1468 // Get the bound port. 1469 ASSERT_THAT(getsockname(s.get(), AsSockAddr(&addr), &addrlen), 1470 SyscallSucceeds()); 1471 ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen), 1472 SyscallSucceeds()); 1473 1474 // Write enough data to fill send and receive buffers. 1475 size_t write_size = 24 << 20; // 24 MiB. 1476 std::vector<char> writebuf(write_size); 1477 1478 ScopedThread t([&s]() { 1479 absl::SleepFor(absl::Milliseconds(250)); 1480 ASSERT_THAT(shutdown(s.get(), SHUT_WR), SyscallSucceeds()); 1481 }); 1482 1483 // Try to send the whole thing. 1484 int n; 1485 ASSERT_THAT(n = SendFd(s.get(), writebuf.data(), writebuf.size(), 0), 1486 SyscallFailsWithErrno(EPIPE)); 1487 } 1488 1489 TEST_P(SimpleTcpSocketTest, SelfConnectRecvShutdownRead) { 1490 // Initialize address to the loopback one. 1491 sockaddr_storage addr = 1492 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 1493 socklen_t addrlen = sizeof(addr); 1494 1495 const FileDescriptor s = 1496 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1497 1498 ASSERT_THAT(bind(s.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds()); 1499 // Get the bound port. 1500 ASSERT_THAT(getsockname(s.get(), AsSockAddr(&addr), &addrlen), 1501 SyscallSucceeds()); 1502 ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen), 1503 SyscallSucceeds()); 1504 1505 ScopedThread t([&s]() { 1506 absl::SleepFor(absl::Milliseconds(250)); 1507 ASSERT_THAT(shutdown(s.get(), SHUT_RD), SyscallSucceeds()); 1508 }); 1509 1510 char buf[1]; 1511 EXPECT_THAT(recv(s.get(), buf, 0, 0), SyscallSucceedsWithValue(0)); 1512 } 1513 1514 void NonBlockingConnect(int family, int16_t pollMask) { 1515 const FileDescriptor listener = 1516 ASSERT_NO_ERRNO_AND_VALUE(Socket(family, SOCK_STREAM, IPPROTO_TCP)); 1517 1518 // Initialize address to the loopback one. 1519 sockaddr_storage addr = 1520 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(family)); 1521 socklen_t addrlen = sizeof(addr); 1522 1523 // Bind to some port then start listening. 1524 ASSERT_THAT(bind(listener.get(), AsSockAddr(&addr), addrlen), 1525 SyscallSucceeds()); 1526 1527 ASSERT_THAT(listen(listener.get(), SOMAXCONN), SyscallSucceeds()); 1528 1529 FileDescriptor s = 1530 ASSERT_NO_ERRNO_AND_VALUE(Socket(family, SOCK_STREAM, IPPROTO_TCP)); 1531 1532 // Set the FD to O_NONBLOCK. 1533 int opts; 1534 ASSERT_THAT(opts = fcntl(s.get(), F_GETFL), SyscallSucceeds()); 1535 opts |= O_NONBLOCK; 1536 ASSERT_THAT(fcntl(s.get(), F_SETFL, opts), SyscallSucceeds()); 1537 1538 ASSERT_THAT(getsockname(listener.get(), AsSockAddr(&addr), &addrlen), 1539 SyscallSucceeds()); 1540 1541 ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen), 1542 SyscallFailsWithErrno(EINPROGRESS)); 1543 1544 int t; 1545 ASSERT_THAT(t = RetryEINTR(accept)(listener.get(), nullptr, nullptr), 1546 SyscallSucceeds()); 1547 1548 struct pollfd poll_fd = {s.get(), pollMask, 0}; 1549 EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMillis), 1550 SyscallSucceedsWithValue(1)); 1551 1552 int err; 1553 socklen_t optlen = sizeof(err); 1554 ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ERROR, &err, &optlen), 1555 SyscallSucceeds()); 1556 ASSERT_EQ(optlen, sizeof(err)); 1557 1558 EXPECT_EQ(err, 0); 1559 1560 EXPECT_THAT(close(t), SyscallSucceeds()); 1561 } 1562 1563 TEST_P(SimpleTcpSocketTest, NonBlockingConnect_PollOut) { 1564 NonBlockingConnect(GetParam(), POLLOUT); 1565 } 1566 1567 TEST_P(SimpleTcpSocketTest, NonBlockingConnect_PollWrNorm) { 1568 NonBlockingConnect(GetParam(), POLLWRNORM); 1569 } 1570 1571 TEST_P(SimpleTcpSocketTest, NonBlockingConnect_PollWrNorm_PollOut) { 1572 NonBlockingConnect(GetParam(), POLLWRNORM | POLLOUT); 1573 } 1574 1575 TEST_P(SimpleTcpSocketTest, NonBlockingConnectRemoteClose) { 1576 const FileDescriptor listener = 1577 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1578 1579 // Initialize address to the loopback one. 1580 sockaddr_storage addr = 1581 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 1582 socklen_t addrlen = sizeof(addr); 1583 1584 // Bind to some port then start listening. 1585 ASSERT_THAT(bind(listener.get(), AsSockAddr(&addr), addrlen), 1586 SyscallSucceeds()); 1587 1588 ASSERT_THAT(listen(listener.get(), SOMAXCONN), SyscallSucceeds()); 1589 1590 FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( 1591 Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); 1592 1593 ASSERT_THAT(getsockname(listener.get(), AsSockAddr(&addr), &addrlen), 1594 SyscallSucceeds()); 1595 1596 ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen), 1597 SyscallFailsWithErrno(EINPROGRESS)); 1598 1599 int t; 1600 ASSERT_THAT(t = RetryEINTR(accept)(listener.get(), nullptr, nullptr), 1601 SyscallSucceeds()); 1602 1603 EXPECT_THAT(close(t), SyscallSucceeds()); 1604 1605 // Now polling on the FD with a timeout should return 0 corresponding to no 1606 // FDs ready. 1607 struct pollfd poll_fd = {s.get(), POLLOUT, 0}; 1608 EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMillis), 1609 SyscallSucceedsWithValue(1)); 1610 1611 ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen), 1612 SyscallSucceeds()); 1613 1614 ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen), 1615 SyscallFailsWithErrno(EISCONN)); 1616 } 1617 1618 // Test that we get an ECONNREFUSED with a blocking socket when no one is 1619 // listening on the other end. 1620 TEST_P(SimpleTcpSocketTest, BlockingConnectRefused) { 1621 FileDescriptor s = 1622 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1623 1624 // Initialize address to the loopback one. 1625 sockaddr_storage addr = 1626 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 1627 socklen_t addrlen = sizeof(addr); 1628 1629 auto reservation = ReserveLocalPort(GetParam(), addr, addrlen); 1630 ASSERT_NE(GetPort(addr).value(), 0); 1631 1632 ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen), 1633 SyscallFailsWithErrno(ECONNREFUSED)); 1634 1635 // Avoiding triggering save in destructor of s. 1636 EXPECT_THAT(close(s.release()), SyscallSucceeds()); 1637 } 1638 1639 // Test that connecting to a non-listening port and thus receiving a RST is 1640 // handled appropriately by the socket - the port that the socket was bound to 1641 // is released and the expected error is returned. 1642 TEST_P(SimpleTcpSocketTest, CleanupOnConnectionRefused) { 1643 // Create a socket that is known to not be listening. As is it bound but not 1644 // listening, when another socket connects to the port, it will refuse.. 1645 FileDescriptor bound_s = 1646 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1647 1648 sockaddr_storage bound_addr = 1649 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 1650 socklen_t bound_addrlen = sizeof(bound_addr); 1651 1652 ASSERT_THAT(bind(bound_s.get(), AsSockAddr(&bound_addr), bound_addrlen), 1653 SyscallSucceeds()); 1654 1655 // Get the addresses the socket is bound to because the port is chosen by the 1656 // stack. 1657 ASSERT_THAT( 1658 getsockname(bound_s.get(), AsSockAddr(&bound_addr), &bound_addrlen), 1659 SyscallSucceeds()); 1660 1661 // Create, initialize, and bind the socket that is used to test connecting to 1662 // the non-listening port. 1663 FileDescriptor client_s = 1664 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1665 // Initialize client address to the loopback one. 1666 sockaddr_storage client_addr = 1667 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 1668 socklen_t client_addrlen = sizeof(client_addr); 1669 1670 ASSERT_THAT(bind(client_s.get(), AsSockAddr(&client_addr), client_addrlen), 1671 SyscallSucceeds()); 1672 1673 ASSERT_THAT( 1674 getsockname(client_s.get(), AsSockAddr(&client_addr), &client_addrlen), 1675 SyscallSucceeds()); 1676 1677 // Now the test: connect to the bound but not listening socket with the 1678 // client socket. The bound socket should return a RST and cause the client 1679 // socket to return an error and clean itself up immediately. 1680 // The error being ECONNREFUSED diverges with RFC 793, page 37, but does what 1681 // Linux does. 1682 ASSERT_THAT( 1683 RetryEINTR(connect)(client_s.get(), 1684 reinterpret_cast<const struct sockaddr*>(&bound_addr), 1685 bound_addrlen), 1686 SyscallFailsWithErrno(ECONNREFUSED)); 1687 1688 FileDescriptor new_s = 1689 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1690 1691 // Test binding to the address from the client socket. This should be okay 1692 // if it was dropped correctly. 1693 ASSERT_THAT(bind(new_s.get(), AsSockAddr(&client_addr), client_addrlen), 1694 SyscallSucceeds()); 1695 1696 // Attempt #2, with the new socket and reused addr our connect should fail in 1697 // the same way as before, not with an EADDRINUSE. 1698 // 1699 // TODO(gvisor.dev/issue/3828): 2nd connect on a socket which failed connect 1700 // first time should succeed. 1701 // gVisor never issues the second connect and returns ECONNABORTED instead. 1702 // Linux actually sends a SYN again and gets a RST and correctly returns 1703 // ECONNREFUSED. 1704 if (IsRunningOnGvisor()) { 1705 ASSERT_THAT(RetryEINTR(connect)( 1706 client_s.get(), 1707 reinterpret_cast<const struct sockaddr*>(&bound_addr), 1708 bound_addrlen), 1709 SyscallFailsWithErrno(ECONNABORTED)); 1710 return; 1711 } 1712 ASSERT_THAT( 1713 RetryEINTR(connect)(client_s.get(), 1714 reinterpret_cast<const struct sockaddr*>(&bound_addr), 1715 bound_addrlen), 1716 SyscallFailsWithErrno(ECONNREFUSED)); 1717 } 1718 1719 // Test that we get an ECONNREFUSED with a nonblocking socket. 1720 TEST_P(SimpleTcpSocketTest, NonBlockingConnectRefused) { 1721 FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( 1722 Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); 1723 1724 // Initialize address to the loopback one. 1725 sockaddr_storage addr = 1726 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 1727 socklen_t addrlen = sizeof(addr); 1728 auto reservation = ReserveLocalPort(GetParam(), addr, addrlen); 1729 ASSERT_NE(GetPort(addr).value(), 0); 1730 1731 ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen), 1732 SyscallFailsWithErrno(EINPROGRESS)); 1733 1734 // We don't need to specify any events to get POLLHUP or POLLERR as these 1735 // are added before the poll. 1736 struct pollfd poll_fd = {s.get(), /*events=*/0, 0}; 1737 EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, kTimeoutMillis), 1738 SyscallSucceedsWithValue(1)); 1739 1740 // The ECONNREFUSED should cause us to be woken up with POLLHUP. 1741 EXPECT_NE(poll_fd.revents & (POLLHUP | POLLERR), 0); 1742 1743 // Avoiding triggering save in destructor of s. 1744 EXPECT_THAT(close(s.release()), SyscallSucceeds()); 1745 } 1746 1747 // Test that setting a supported congestion control algorithm succeeds for an 1748 // unconnected TCP socket 1749 TEST_P(SimpleTcpSocketTest, SetCongestionControlSucceedsForSupported) { 1750 // This is Linux's net/tcp.h TCP_CA_NAME_MAX. 1751 const int kTcpCaNameMax = 16; 1752 1753 FileDescriptor s = 1754 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1755 { 1756 const char kSetCC[kTcpCaNameMax] = "reno"; 1757 ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC, 1758 strlen(kSetCC)), 1759 SyscallSucceedsWithValue(0)); 1760 1761 char got_cc[kTcpCaNameMax]; 1762 memset(got_cc, '1', sizeof(got_cc)); 1763 socklen_t optlen = sizeof(got_cc); 1764 ASSERT_THAT( 1765 getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen), 1766 SyscallSucceedsWithValue(0)); 1767 // We ignore optlen here as the linux kernel sets optlen to the lower of the 1768 // size of the buffer passed in or kTcpCaNameMax and not the length of the 1769 // congestion control algorithm's actual name. 1770 EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kTcpCaNameMax))); 1771 } 1772 { 1773 const char kSetCC[kTcpCaNameMax] = "cubic"; 1774 ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC, 1775 strlen(kSetCC)), 1776 SyscallSucceedsWithValue(0)); 1777 1778 char got_cc[kTcpCaNameMax]; 1779 memset(got_cc, '1', sizeof(got_cc)); 1780 socklen_t optlen = sizeof(got_cc); 1781 ASSERT_THAT( 1782 getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen), 1783 SyscallSucceedsWithValue(0)); 1784 // We ignore optlen here as the linux kernel sets optlen to the lower of the 1785 // size of the buffer passed in or kTcpCaNameMax and not the length of the 1786 // congestion control algorithm's actual name. 1787 EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kTcpCaNameMax))); 1788 } 1789 } 1790 1791 // This test verifies that a getsockopt(...TCP_CONGESTION) behaviour is 1792 // consistent between linux and gvisor when the passed in buffer is smaller than 1793 // kTcpCaNameMax. 1794 TEST_P(SimpleTcpSocketTest, SetGetTCPCongestionShortReadBuffer) { 1795 FileDescriptor s = 1796 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1797 { 1798 // Verify that getsockopt/setsockopt work with buffers smaller than 1799 // kTcpCaNameMax. 1800 const char kSetCC[] = "cubic"; 1801 ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC, 1802 strlen(kSetCC)), 1803 SyscallSucceedsWithValue(0)); 1804 1805 char got_cc[sizeof(kSetCC)]; 1806 socklen_t optlen = sizeof(got_cc); 1807 ASSERT_THAT( 1808 getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen), 1809 SyscallSucceedsWithValue(0)); 1810 EXPECT_EQ(sizeof(got_cc), optlen); 1811 EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(got_cc))); 1812 } 1813 } 1814 1815 // This test verifies that a getsockopt(...TCP_CONGESTION) behaviour is 1816 // consistent between linux and gvisor when the passed in buffer is larger than 1817 // kTcpCaNameMax. 1818 TEST_P(SimpleTcpSocketTest, SetGetTCPCongestionLargeReadBuffer) { 1819 // This is Linux's net/tcp.h TCP_CA_NAME_MAX. 1820 const int kTcpCaNameMax = 16; 1821 1822 FileDescriptor s = 1823 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1824 { 1825 // Verify that getsockopt works with buffers larger than 1826 // kTcpCaNameMax. 1827 const char kSetCC[] = "cubic"; 1828 ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &kSetCC, 1829 strlen(kSetCC)), 1830 SyscallSucceedsWithValue(0)); 1831 1832 char got_cc[kTcpCaNameMax + 5]; 1833 socklen_t optlen = sizeof(got_cc); 1834 ASSERT_THAT( 1835 getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen), 1836 SyscallSucceedsWithValue(0)); 1837 // Linux copies the minimum of kTcpCaNameMax or the length of the passed in 1838 // buffer and sets optlen to the number of bytes actually copied 1839 // irrespective of the actual length of the congestion control name. 1840 EXPECT_EQ(kTcpCaNameMax, optlen); 1841 EXPECT_EQ(0, memcmp(got_cc, kSetCC, sizeof(kSetCC))); 1842 } 1843 } 1844 1845 // Test that setting an unsupported congestion control algorithm fails for an 1846 // unconnected TCP socket. 1847 TEST_P(SimpleTcpSocketTest, SetCongestionControlFailsForUnsupported) { 1848 // This is Linux's net/tcp.h TCP_CA_NAME_MAX. 1849 const int kTcpCaNameMax = 16; 1850 1851 FileDescriptor s = 1852 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1853 char old_cc[kTcpCaNameMax]; 1854 socklen_t optlen = sizeof(old_cc); 1855 ASSERT_THAT( 1856 getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &old_cc, &optlen), 1857 SyscallSucceedsWithValue(0)); 1858 1859 const char kSetCC[] = "invalid_ca_kSetCC"; 1860 ASSERT_THAT( 1861 setsockopt(s.get(), SOL_TCP, TCP_CONGESTION, &kSetCC, strlen(kSetCC)), 1862 SyscallFailsWithErrno(ENOENT)); 1863 1864 char got_cc[kTcpCaNameMax]; 1865 ASSERT_THAT( 1866 getsockopt(s.get(), IPPROTO_TCP, TCP_CONGESTION, &got_cc, &optlen), 1867 SyscallSucceedsWithValue(0)); 1868 // We ignore optlen here as the linux kernel sets optlen to the lower of the 1869 // size of the buffer passed in or kTcpCaNameMax and not the length of the 1870 // congestion control algorithm's actual name. 1871 EXPECT_EQ(0, memcmp(got_cc, old_cc, sizeof(kTcpCaNameMax))); 1872 } 1873 1874 TEST_P(SimpleTcpSocketTest, MaxSegDefault) { 1875 FileDescriptor s = 1876 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1877 1878 constexpr int kDefaultMSS = 536; 1879 int tcp_max_seg; 1880 socklen_t optlen = sizeof(tcp_max_seg); 1881 ASSERT_THAT( 1882 getsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg, &optlen), 1883 SyscallSucceedsWithValue(0)); 1884 1885 EXPECT_EQ(kDefaultMSS, tcp_max_seg); 1886 EXPECT_EQ(sizeof(tcp_max_seg), optlen); 1887 } 1888 1889 TEST_P(SimpleTcpSocketTest, SetMaxSeg) { 1890 FileDescriptor s = 1891 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1892 1893 constexpr int kDefaultMSS = 536; 1894 constexpr int kTCPMaxSeg = 1024; 1895 ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &kTCPMaxSeg, 1896 sizeof(kTCPMaxSeg)), 1897 SyscallSucceedsWithValue(0)); 1898 1899 int optval; 1900 socklen_t optlen = sizeof(optval); 1901 ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &optval, &optlen), 1902 SyscallSucceedsWithValue(0)); 1903 ASSERT_EQ(optlen, sizeof(optval)); 1904 1905 // In older Linux versions, user_mss value was never actually returned. Linux 1906 // would always return the default MSS value for an unconnected socket and 1907 // always return the actual current MSS for a connected one. However, the 1908 // behavior changed since 34dfde4ad87b ("tcp: Return user_mss for TCP_MAXSEG 1909 // in CLOSE/LISTEN state if user_mss set"). With this change, user_mss is 1910 // returned if set for unconnected sockets. So allow both. 1911 EXPECT_THAT(optval, AnyOf(kDefaultMSS, kTCPMaxSeg)); 1912 } 1913 1914 TEST_P(SimpleTcpSocketTest, SetMaxSegFailsForInvalidMSSValues) { 1915 FileDescriptor s = 1916 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1917 1918 { 1919 constexpr int tcp_max_seg = 10; 1920 ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg, 1921 sizeof(tcp_max_seg)), 1922 SyscallFailsWithErrno(EINVAL)); 1923 } 1924 { 1925 constexpr int tcp_max_seg = 75000; 1926 ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_MAXSEG, &tcp_max_seg, 1927 sizeof(tcp_max_seg)), 1928 SyscallFailsWithErrno(EINVAL)); 1929 } 1930 } 1931 1932 TEST_P(SimpleTcpSocketTest, SetTCPUserTimeout) { 1933 FileDescriptor s = 1934 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1935 1936 { 1937 constexpr int kTCPUserTimeout = -1; 1938 EXPECT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, 1939 &kTCPUserTimeout, sizeof(kTCPUserTimeout)), 1940 SyscallFailsWithErrno(EINVAL)); 1941 } 1942 1943 // kTCPUserTimeout is in milliseconds. 1944 constexpr int kTCPUserTimeout = 100; 1945 ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, 1946 &kTCPUserTimeout, sizeof(kTCPUserTimeout)), 1947 SyscallSucceedsWithValue(0)); 1948 int get = -1; 1949 socklen_t get_len = sizeof(get); 1950 ASSERT_THAT( 1951 getsockopt(s.get(), IPPROTO_TCP, TCP_USER_TIMEOUT, &get, &get_len), 1952 SyscallSucceedsWithValue(0)); 1953 EXPECT_EQ(get_len, sizeof(get)); 1954 EXPECT_EQ(get, kTCPUserTimeout); 1955 } 1956 1957 TEST_P(SimpleTcpSocketTest, SetTCPDeferAcceptNeg) { 1958 FileDescriptor s = 1959 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1960 1961 // -ve TCP_DEFER_ACCEPT is same as setting it to zero. 1962 constexpr int kNeg = -1; 1963 EXPECT_THAT( 1964 setsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &kNeg, sizeof(kNeg)), 1965 SyscallSucceeds()); 1966 int get = -1; 1967 socklen_t get_len = sizeof(get); 1968 ASSERT_THAT( 1969 getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len), 1970 SyscallSucceedsWithValue(0)); 1971 EXPECT_EQ(get_len, sizeof(get)); 1972 EXPECT_EQ(get, 0); 1973 } 1974 1975 TEST_P(SimpleTcpSocketTest, GetTCPDeferAcceptDefault) { 1976 FileDescriptor s = 1977 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1978 1979 int get = -1; 1980 socklen_t get_len = sizeof(get); 1981 ASSERT_THAT( 1982 getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len), 1983 SyscallSucceedsWithValue(0)); 1984 EXPECT_EQ(get_len, sizeof(get)); 1985 EXPECT_EQ(get, 0); 1986 } 1987 1988 TEST_P(SimpleTcpSocketTest, SetTCPDeferAcceptGreaterThanZero) { 1989 FileDescriptor s = 1990 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 1991 // kTCPDeferAccept is in seconds. 1992 // NOTE: linux translates seconds to # of retries and back from 1993 // #of retries to seconds. Which means only certain values 1994 // translate back exactly. That's why we use 3 here, a value of 1995 // 5 will result in us getting back 7 instead of 5 in the 1996 // getsockopt. 1997 constexpr int kTCPDeferAccept = 3; 1998 ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, 1999 &kTCPDeferAccept, sizeof(kTCPDeferAccept)), 2000 SyscallSucceeds()); 2001 int get = -1; 2002 socklen_t get_len = sizeof(get); 2003 ASSERT_THAT( 2004 getsockopt(s.get(), IPPROTO_TCP, TCP_DEFER_ACCEPT, &get, &get_len), 2005 SyscallSucceeds()); 2006 EXPECT_EQ(get_len, sizeof(get)); 2007 EXPECT_EQ(get, kTCPDeferAccept); 2008 } 2009 2010 TEST_P(SimpleTcpSocketTest, RecvOnClosedSocket) { 2011 auto s = 2012 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 2013 char buf[1]; 2014 EXPECT_THAT(recv(s.get(), buf, 0, 0), SyscallFailsWithErrno(ENOTCONN)); 2015 EXPECT_THAT(recv(s.get(), buf, sizeof(buf), 0), 2016 SyscallFailsWithErrno(ENOTCONN)); 2017 } 2018 2019 TEST_P(SimpleTcpSocketTest, TCPConnectSoRcvBufRace) { 2020 auto s = ASSERT_NO_ERRNO_AND_VALUE( 2021 Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); 2022 sockaddr_storage addr = 2023 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 2024 socklen_t addrlen = sizeof(addr); 2025 auto reservation = ReserveLocalPort(GetParam(), addr, addrlen); 2026 ASSERT_NE(GetPort(addr).value(), 0); 2027 2028 RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen); 2029 int buf_sz = 1 << 18; 2030 EXPECT_THAT( 2031 setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &buf_sz, sizeof(buf_sz)), 2032 SyscallSucceedsWithValue(0)); 2033 } 2034 2035 TEST_P(SimpleTcpSocketTest, SetTCPSynCntLessThanOne) { 2036 FileDescriptor s = 2037 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 2038 2039 int get = -1; 2040 socklen_t get_len = sizeof(get); 2041 ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), 2042 SyscallSucceedsWithValue(0)); 2043 EXPECT_EQ(get_len, sizeof(get)); 2044 int default_syn_cnt = get; 2045 2046 { 2047 // TCP_SYNCNT less than 1 should be rejected with an EINVAL. 2048 constexpr int kZero = 0; 2049 EXPECT_THAT( 2050 setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kZero, sizeof(kZero)), 2051 SyscallFailsWithErrno(EINVAL)); 2052 2053 // TCP_SYNCNT less than 1 should be rejected with an EINVAL. 2054 constexpr int kNeg = -1; 2055 EXPECT_THAT( 2056 setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kNeg, sizeof(kNeg)), 2057 SyscallFailsWithErrno(EINVAL)); 2058 2059 int get = -1; 2060 socklen_t get_len = sizeof(get); 2061 2062 ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), 2063 SyscallSucceedsWithValue(0)); 2064 EXPECT_EQ(get_len, sizeof(get)); 2065 EXPECT_EQ(default_syn_cnt, get); 2066 } 2067 } 2068 2069 TEST_P(SimpleTcpSocketTest, GetTCPSynCntDefault) { 2070 FileDescriptor s = 2071 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 2072 2073 int get = -1; 2074 socklen_t get_len = sizeof(get); 2075 constexpr int kDefaultSynCnt = 6; 2076 2077 ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), 2078 SyscallSucceedsWithValue(0)); 2079 EXPECT_EQ(get_len, sizeof(get)); 2080 EXPECT_EQ(get, kDefaultSynCnt); 2081 } 2082 2083 TEST_P(SimpleTcpSocketTest, SetTCPSynCntGreaterThanOne) { 2084 FileDescriptor s = 2085 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 2086 constexpr int kTCPSynCnt = 20; 2087 ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kTCPSynCnt, 2088 sizeof(kTCPSynCnt)), 2089 SyscallSucceeds()); 2090 2091 int get = -1; 2092 socklen_t get_len = sizeof(get); 2093 ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), 2094 SyscallSucceeds()); 2095 EXPECT_EQ(get_len, sizeof(get)); 2096 EXPECT_EQ(get, kTCPSynCnt); 2097 } 2098 2099 TEST_P(SimpleTcpSocketTest, SetTCPSynCntAboveMax) { 2100 FileDescriptor s = 2101 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 2102 int get = -1; 2103 socklen_t get_len = sizeof(get); 2104 ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), 2105 SyscallSucceedsWithValue(0)); 2106 EXPECT_EQ(get_len, sizeof(get)); 2107 int default_syn_cnt = get; 2108 { 2109 constexpr int kTCPSynCnt = 256; 2110 ASSERT_THAT(setsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &kTCPSynCnt, 2111 sizeof(kTCPSynCnt)), 2112 SyscallFailsWithErrno(EINVAL)); 2113 2114 int get = -1; 2115 socklen_t get_len = sizeof(get); 2116 ASSERT_THAT(getsockopt(s.get(), IPPROTO_TCP, TCP_SYNCNT, &get, &get_len), 2117 SyscallSucceeds()); 2118 EXPECT_EQ(get_len, sizeof(get)); 2119 EXPECT_EQ(get, default_syn_cnt); 2120 } 2121 } 2122 2123 TEST_P(SimpleTcpSocketTest, SetTCPWindowClampBelowMinRcvBuf) { 2124 FileDescriptor s = 2125 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 2126 2127 // Discover minimum receive buf by setting a really low value 2128 // for the receive buffer. 2129 constexpr int kZero = 0; 2130 EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &kZero, sizeof(kZero)), 2131 SyscallSucceeds()); 2132 2133 // Now retrieve the minimum value for SO_RCVBUF as the set above should 2134 // have caused SO_RCVBUF for the socket to be set to the minimum. 2135 int get = -1; 2136 socklen_t get_len = sizeof(get); 2137 ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &get, &get_len), 2138 SyscallSucceedsWithValue(0)); 2139 EXPECT_EQ(get_len, sizeof(get)); 2140 int min_so_rcvbuf = get; 2141 2142 { 2143 // TCP_WINDOW_CLAMP less than min_so_rcvbuf/2 should be set to 2144 // min_so_rcvbuf/2. 2145 int below_half_min_rcvbuf = min_so_rcvbuf / 2 - 1; 2146 EXPECT_THAT( 2147 setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, 2148 &below_half_min_rcvbuf, sizeof(below_half_min_rcvbuf)), 2149 SyscallSucceeds()); 2150 2151 int get = -1; 2152 socklen_t get_len = sizeof(get); 2153 2154 ASSERT_THAT( 2155 getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len), 2156 SyscallSucceedsWithValue(0)); 2157 EXPECT_EQ(get_len, sizeof(get)); 2158 EXPECT_EQ(min_so_rcvbuf / 2, get); 2159 } 2160 } 2161 2162 TEST_P(SimpleTcpSocketTest, SetTCPWindowClampZeroClosedSocket) { 2163 FileDescriptor s = 2164 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 2165 constexpr int kZero = 0; 2166 ASSERT_THAT( 2167 setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &kZero, sizeof(kZero)), 2168 SyscallSucceeds()); 2169 2170 int get = -1; 2171 socklen_t get_len = sizeof(get); 2172 ASSERT_THAT( 2173 getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len), 2174 SyscallSucceeds()); 2175 EXPECT_EQ(get_len, sizeof(get)); 2176 EXPECT_EQ(get, kZero); 2177 } 2178 2179 TEST_P(SimpleTcpSocketTest, SetTCPWindowClampAboveHalfMinRcvBuf) { 2180 FileDescriptor s = 2181 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 2182 2183 // Discover minimum receive buf by setting a really low value 2184 // for the receive buffer. 2185 constexpr int kZero = 0; 2186 EXPECT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &kZero, sizeof(kZero)), 2187 SyscallSucceeds()); 2188 2189 // Now retrieve the minimum value for SO_RCVBUF as the set above should 2190 // have caused SO_RCVBUF for the socket to be set to the minimum. 2191 int get = -1; 2192 socklen_t get_len = sizeof(get); 2193 ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_RCVBUF, &get, &get_len), 2194 SyscallSucceedsWithValue(0)); 2195 EXPECT_EQ(get_len, sizeof(get)); 2196 int min_so_rcvbuf = get; 2197 2198 { 2199 int above_half_min_rcv_buf = min_so_rcvbuf / 2 + 1; 2200 EXPECT_THAT( 2201 setsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, 2202 &above_half_min_rcv_buf, sizeof(above_half_min_rcv_buf)), 2203 SyscallSucceeds()); 2204 2205 int get = -1; 2206 socklen_t get_len = sizeof(get); 2207 2208 ASSERT_THAT( 2209 getsockopt(s.get(), IPPROTO_TCP, TCP_WINDOW_CLAMP, &get, &get_len), 2210 SyscallSucceedsWithValue(0)); 2211 EXPECT_EQ(get_len, sizeof(get)); 2212 EXPECT_EQ(above_half_min_rcv_buf, get); 2213 } 2214 } 2215 2216 #ifdef __linux__ 2217 2218 // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER. 2219 // gVisor currently silently ignores attaching a filter. 2220 TEST_P(SimpleTcpSocketTest, SetSocketAttachDetachFilter) { 2221 FileDescriptor s = 2222 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 2223 // Program generated using sudo tcpdump -i lo tcp and port 1234 -dd 2224 struct sock_filter code[] = { 2225 {0x28, 0, 0, 0x0000000c}, {0x15, 0, 6, 0x000086dd}, 2226 {0x30, 0, 0, 0x00000014}, {0x15, 0, 15, 0x00000006}, 2227 {0x28, 0, 0, 0x00000036}, {0x15, 12, 0, 0x000004d2}, 2228 {0x28, 0, 0, 0x00000038}, {0x15, 10, 11, 0x000004d2}, 2229 {0x15, 0, 10, 0x00000800}, {0x30, 0, 0, 0x00000017}, 2230 {0x15, 0, 8, 0x00000006}, {0x28, 0, 0, 0x00000014}, 2231 {0x45, 6, 0, 0x00001fff}, {0xb1, 0, 0, 0x0000000e}, 2232 {0x48, 0, 0, 0x0000000e}, {0x15, 2, 0, 0x000004d2}, 2233 {0x48, 0, 0, 0x00000010}, {0x15, 0, 1, 0x000004d2}, 2234 {0x6, 0, 0, 0x00040000}, {0x6, 0, 0, 0x00000000}, 2235 }; 2236 struct sock_fprog bpf = { 2237 .len = ABSL_ARRAYSIZE(code), 2238 .filter = code, 2239 }; 2240 ASSERT_THAT( 2241 setsockopt(s.get(), SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)), 2242 SyscallSucceeds()); 2243 2244 constexpr int val = 0; 2245 ASSERT_THAT( 2246 setsockopt(s.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)), 2247 SyscallSucceeds()); 2248 } 2249 2250 #endif // __linux__ 2251 2252 TEST_P(SimpleTcpSocketTest, SetSocketDetachFilterNoInstalledFilter) { 2253 // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER. 2254 SKIP_IF(IsRunningOnGvisor()); 2255 FileDescriptor s = 2256 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 2257 constexpr int val = 0; 2258 ASSERT_THAT( 2259 setsockopt(s.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)), 2260 SyscallFailsWithErrno(ENOENT)); 2261 } 2262 2263 TEST_P(SimpleTcpSocketTest, GetSocketDetachFilter) { 2264 FileDescriptor s = 2265 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 2266 2267 int val = 0; 2268 socklen_t val_len = sizeof(val); 2269 ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_DETACH_FILTER, &val, &val_len), 2270 SyscallFailsWithErrno(ENOPROTOOPT)); 2271 } 2272 2273 TEST_P(SimpleTcpSocketTest, CloseNonConnectedLingerOption) { 2274 FileDescriptor s = 2275 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 2276 2277 constexpr int kLingerTimeout = 10; // Seconds. 2278 2279 // Set the SO_LINGER option. 2280 struct linger sl = { 2281 .l_onoff = 1, 2282 .l_linger = kLingerTimeout, 2283 }; 2284 ASSERT_THAT(setsockopt(s.get(), SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)), 2285 SyscallSucceeds()); 2286 2287 struct pollfd poll_fd = { 2288 .fd = s.get(), 2289 .events = POLLHUP, 2290 }; 2291 constexpr int kPollTimeoutMs = 0; 2292 ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, kPollTimeoutMs), 2293 SyscallSucceedsWithValue(1)); 2294 2295 auto const start_time = absl::Now(); 2296 EXPECT_THAT(close(s.release()), SyscallSucceeds()); 2297 auto const end_time = absl::Now(); 2298 2299 // Close() should not linger and return immediately. 2300 ASSERT_LT((end_time - start_time), absl::Seconds(kLingerTimeout)); 2301 } 2302 2303 // Tests that SO_ACCEPTCONN returns non zero value for listening sockets. 2304 TEST_P(TcpSocketTest, GetSocketAcceptConnListener) { 2305 int got = -1; 2306 socklen_t length = sizeof(got); 2307 ASSERT_THAT( 2308 getsockopt(listener_.get(), SOL_SOCKET, SO_ACCEPTCONN, &got, &length), 2309 SyscallSucceeds()); 2310 ASSERT_EQ(length, sizeof(got)); 2311 EXPECT_EQ(got, 1); 2312 } 2313 2314 // Tests that SO_ACCEPTCONN returns zero value for not listening sockets. 2315 TEST_P(TcpSocketTest, GetSocketAcceptConnNonListener) { 2316 int got = -1; 2317 socklen_t length = sizeof(got); 2318 ASSERT_THAT( 2319 getsockopt(connected_.get(), SOL_SOCKET, SO_ACCEPTCONN, &got, &length), 2320 SyscallSucceeds()); 2321 ASSERT_EQ(length, sizeof(got)); 2322 EXPECT_EQ(got, 0); 2323 2324 ASSERT_THAT( 2325 getsockopt(accepted_.get(), SOL_SOCKET, SO_ACCEPTCONN, &got, &length), 2326 SyscallSucceeds()); 2327 ASSERT_EQ(length, sizeof(got)); 2328 EXPECT_EQ(got, 0); 2329 } 2330 2331 TEST_P(TcpSocketTest, SetPMTUD) { 2332 // IP_PMTUDISC_WANT should be default. 2333 int got = -1; 2334 socklen_t length = sizeof(got); 2335 ASSERT_THAT( 2336 getsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &got, &length), 2337 SyscallSucceeds()); 2338 EXPECT_EQ(got, IP_PMTUDISC_WANT); 2339 2340 int set = IP_PMTUDISC_DO; 2341 ASSERT_THAT( 2342 setsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &set, length), 2343 SyscallSucceeds()); 2344 ASSERT_THAT( 2345 getsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &got, &length), 2346 SyscallSucceeds()); 2347 EXPECT_EQ(got, IP_PMTUDISC_DO); 2348 set = IP_PMTUDISC_DONT; 2349 ASSERT_THAT( 2350 setsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &set, length), 2351 SyscallSucceeds()); 2352 ASSERT_THAT( 2353 getsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &got, &length), 2354 SyscallSucceeds()); 2355 EXPECT_EQ(got, IP_PMTUDISC_DONT); 2356 2357 // IP_PMTUDISC_PROBE is not supported by gVisor. 2358 set = IP_PMTUDISC_PROBE; 2359 if (IsRunningOnGvisor() && !IsRunningWithHostinet()) { 2360 ASSERT_THAT( 2361 setsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &set, length), 2362 SyscallFailsWithErrno(ENOTSUP)); 2363 } else { 2364 ASSERT_THAT( 2365 setsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &set, length), 2366 SyscallSucceeds()); 2367 ASSERT_THAT( 2368 getsockopt(accepted_.get(), SOL_IP, IP_MTU_DISCOVER, &got, &length), 2369 SyscallSucceeds()); 2370 EXPECT_EQ(got, IP_PMTUDISC_PROBE); 2371 } 2372 } 2373 2374 TEST_P(SimpleTcpSocketTest, GetSocketAcceptConnWithShutdown) { 2375 // TODO(b/171345701): Fix the TCP state for listening socket on shutdown. 2376 SKIP_IF(IsRunningOnGvisor()); 2377 2378 FileDescriptor s = 2379 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 2380 2381 // Initialize address to the loopback one. 2382 sockaddr_storage addr = 2383 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 2384 socklen_t addrlen = sizeof(addr); 2385 2386 // Bind to some port then start listening. 2387 ASSERT_THAT(bind(s.get(), AsSockAddr(&addr), addrlen), SyscallSucceeds()); 2388 2389 ASSERT_THAT(listen(s.get(), SOMAXCONN), SyscallSucceeds()); 2390 2391 int got = -1; 2392 socklen_t length = sizeof(got); 2393 ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ACCEPTCONN, &got, &length), 2394 SyscallSucceeds()); 2395 ASSERT_EQ(length, sizeof(got)); 2396 EXPECT_EQ(got, 1); 2397 2398 EXPECT_THAT(shutdown(s.get(), SHUT_RD), SyscallSucceeds()); 2399 ASSERT_THAT(getsockopt(s.get(), SOL_SOCKET, SO_ACCEPTCONN, &got, &length), 2400 SyscallSucceeds()); 2401 ASSERT_EQ(length, sizeof(got)); 2402 EXPECT_EQ(got, 0); 2403 } 2404 2405 void ShutdownConnectingSocket(int domain, int shutdown_mode) { 2406 FileDescriptor bound_s = 2407 ASSERT_NO_ERRNO_AND_VALUE(Socket(domain, SOCK_STREAM, IPPROTO_TCP)); 2408 2409 sockaddr_storage bound_addr = 2410 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(domain)); 2411 socklen_t bound_addrlen = sizeof(bound_addr); 2412 2413 ASSERT_THAT(bind(bound_s.get(), AsSockAddr(&bound_addr), bound_addrlen), 2414 SyscallSucceeds()); 2415 2416 // Start listening. Use a zero backlog to only allow one connection in the 2417 // accept queue. 2418 ASSERT_THAT(listen(bound_s.get(), 0), SyscallSucceeds()); 2419 2420 // Get the addresses the socket is bound to because the port is chosen by the 2421 // stack. 2422 ASSERT_THAT( 2423 getsockname(bound_s.get(), AsSockAddr(&bound_addr), &bound_addrlen), 2424 SyscallSucceeds()); 2425 2426 // Establish a connection. But do not accept it. That way, subsequent 2427 // connections will not get a SYN-ACK because the queue is full. 2428 FileDescriptor connected_s = 2429 ASSERT_NO_ERRNO_AND_VALUE(Socket(domain, SOCK_STREAM, IPPROTO_TCP)); 2430 ASSERT_THAT( 2431 RetryEINTR(connect)(connected_s.get(), 2432 reinterpret_cast<const struct sockaddr*>(&bound_addr), 2433 bound_addrlen), 2434 SyscallSucceeds()); 2435 2436 FileDescriptor connecting_s = ASSERT_NO_ERRNO_AND_VALUE( 2437 Socket(domain, SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); 2438 ASSERT_THAT( 2439 RetryEINTR(connect)(connecting_s.get(), 2440 reinterpret_cast<const struct sockaddr*>(&bound_addr), 2441 bound_addrlen), 2442 SyscallFailsWithErrno(EINPROGRESS)); 2443 2444 // Now the test: when a connecting socket is shutdown, the socket should enter 2445 // an error state. 2446 EXPECT_THAT(shutdown(connecting_s.get(), shutdown_mode), SyscallSucceeds()); 2447 2448 // We don't need to specify any events to get POLLHUP or POLLERR because these 2449 // are always tracked. 2450 struct pollfd poll_fd = { 2451 .fd = connecting_s.get(), 2452 }; 2453 2454 EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), SyscallSucceedsWithValue(1)); 2455 EXPECT_EQ(poll_fd.revents, POLLHUP | POLLERR); 2456 } 2457 2458 TEST_P(SimpleTcpSocketTest, ShutdownReadConnectingSocket) { 2459 // TODO(b/171436815): Re-enable when S/R is fixed. 2460 const DisableSave disable_save; 2461 // TODO(b/175409607): Fix this test for hostinet. 2462 SKIP_IF(IsRunningWithHostinet()); 2463 ShutdownConnectingSocket(GetParam(), SHUT_RD); 2464 } 2465 2466 TEST_P(SimpleTcpSocketTest, ShutdownWriteConnectingSocket) { 2467 // TODO(b/171436815): Re-enable when S/R is fixed. 2468 const DisableSave disable_save; 2469 // TODO(b/175409607): Fix this test for hostinet. 2470 SKIP_IF(IsRunningWithHostinet()); 2471 ShutdownConnectingSocket(GetParam(), SHUT_WR); 2472 } 2473 2474 TEST_P(SimpleTcpSocketTest, ShutdownReadWriteConnectingSocket) { 2475 // TODO(b/171436815): Re-enable when S/R is fixed. 2476 const DisableSave disable_save; 2477 // TODO(b/175409607): Fix this test for hostinet. 2478 SKIP_IF(IsRunningWithHostinet()); 2479 ShutdownConnectingSocket(GetParam(), SHUT_RDWR); 2480 } 2481 2482 // Tests that connecting to an unspecified address results in ECONNREFUSED. 2483 TEST_P(SimpleTcpSocketTest, ConnectUnspecifiedAddress) { 2484 sockaddr_storage addr; 2485 socklen_t addrlen = sizeof(addr); 2486 memset(&addr, 0, addrlen); 2487 addr.ss_family = GetParam(); 2488 auto do_connect = [&addr, addrlen]() { 2489 FileDescriptor s = ASSERT_NO_ERRNO_AND_VALUE( 2490 Socket(addr.ss_family, SOCK_STREAM, IPPROTO_TCP)); 2491 ASSERT_THAT(RetryEINTR(connect)(s.get(), AsSockAddr(&addr), addrlen), 2492 SyscallFailsWithErrno(ECONNREFUSED)); 2493 }; 2494 do_connect(); 2495 // Test the v4 mapped address as well. 2496 if (GetParam() == AF_INET6) { 2497 auto sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr); 2498 sin6->sin6_addr.s6_addr[10] = sin6->sin6_addr.s6_addr[11] = 0xff; 2499 do_connect(); 2500 } 2501 } 2502 2503 TEST_P(SimpleTcpSocketTest, OnlyAcknowledgeBacklogConnections) { 2504 // TODO(b/171436815): Re-enable when S/R is fixed. 2505 const DisableSave disable_save; 2506 // TODO(b/175409607): Fix this test for hostinet. 2507 SKIP_IF(IsRunningWithHostinet()); 2508 2509 // At some point, there was a bug in gVisor where a connection could be 2510 // SYN-ACK'd by the server even if the accept queue was already full. This was 2511 // possible because once the listener would process an ACK, it would move the 2512 // new connection in the accept queue asynchronously. It created an 2513 // opportunity where the listener could process another SYN before completing 2514 // the delivery that would have filled the accept queue. 2515 // 2516 // This test checks that there is no such race on loopback. On other 2517 // interfaces, where delivery is not synchronous, it is possible for more 2518 // clients to be in the ESTABLISHED state than there are slots in the accept 2519 // queue. 2520 2521 std::array<std::optional<ScopedThread>, 100> threads; 2522 for (auto& thread : threads) { 2523 thread.emplace([]() { 2524 FileDescriptor bound_s = ASSERT_NO_ERRNO_AND_VALUE( 2525 Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 2526 2527 sockaddr_storage bound_addr = 2528 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 2529 socklen_t bound_addrlen = sizeof(bound_addr); 2530 2531 ASSERT_THAT(bind(bound_s.get(), AsSockAddr(&bound_addr), bound_addrlen), 2532 SyscallSucceeds()); 2533 2534 // Start listening. Use a zero backlog to only allow one connection in the 2535 // accept queue. 2536 ASSERT_THAT(listen(bound_s.get(), 0), SyscallSucceeds()); 2537 2538 // Get the addresses the socket is bound to because the port is chosen by 2539 // the stack. 2540 ASSERT_THAT( 2541 getsockname(bound_s.get(), AsSockAddr(&bound_addr), &bound_addrlen), 2542 SyscallSucceeds()); 2543 2544 // Establish a connection, but do not accept it. 2545 FileDescriptor connected_s = ASSERT_NO_ERRNO_AND_VALUE( 2546 Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 2547 ASSERT_THAT(RetryEINTR(connect)( 2548 connected_s.get(), 2549 reinterpret_cast<const struct sockaddr*>(&bound_addr), 2550 bound_addrlen), 2551 SyscallSucceeds()); 2552 2553 // Immediately attempt to establish another connection. Use non blocking 2554 // socket because this is expected to timeout. 2555 FileDescriptor connecting_s = ASSERT_NO_ERRNO_AND_VALUE( 2556 Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); 2557 ASSERT_THAT(RetryEINTR(connect)( 2558 connecting_s.get(), 2559 reinterpret_cast<const struct sockaddr*>(&bound_addr), 2560 bound_addrlen), 2561 SyscallFailsWithErrno(EINPROGRESS)); 2562 2563 struct pollfd poll_fd = { 2564 .fd = connecting_s.get(), 2565 .events = POLLOUT, 2566 }; 2567 EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 10), 2568 SyscallSucceedsWithValue(0)); 2569 }); 2570 } 2571 } 2572 2573 TEST_P(SimpleTcpSocketTest, SynRcvdOnListenerShutdown) { 2574 // TODO(b/171436815): Re-enable when S/R is fixed. 2575 const DisableSave disable_save; 2576 FileDescriptor bound_s = 2577 ASSERT_NO_ERRNO_AND_VALUE(Socket(GetParam(), SOCK_STREAM, IPPROTO_TCP)); 2578 2579 sockaddr_storage bound_addr = 2580 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 2581 socklen_t bound_addrlen = sizeof(bound_addr); 2582 2583 ASSERT_THAT(bind(bound_s.get(), AsSockAddr(&bound_addr), bound_addrlen), 2584 SyscallSucceeds()); 2585 2586 // Get the addresses the socket is bound to because the port is chosen by the 2587 // stack. 2588 ASSERT_THAT( 2589 getsockname(bound_s.get(), AsSockAddr(&bound_addr), &bound_addrlen), 2590 SyscallSucceeds()); 2591 2592 // kBacklog connections are permitted to be in the SYNRCVD state. Select the 2593 // largest reasonable value; we want to create a situation where at least some 2594 // of the connections are still in SYNRCVD when we shut down the listener. 2595 constexpr int kBacklog = 256; 2596 ASSERT_THAT(listen(bound_s.get(), kBacklog), SyscallSucceeds()); 2597 2598 std::array<std::thread, kBacklog + 1> threads; 2599 for (auto& thread : threads) { 2600 FileDescriptor connecting_s = ASSERT_NO_ERRNO_AND_VALUE( 2601 Socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, IPPROTO_TCP)); 2602 ASSERT_THAT(RetryEINTR(connect)( 2603 connecting_s.get(), 2604 reinterpret_cast<const struct sockaddr*>(&bound_addr), 2605 bound_addrlen), 2606 SyscallFailsWithErrno(EINPROGRESS)); 2607 thread = std::thread([connecting_s = std::move(connecting_s)]() { 2608 struct pollfd poll_fd = { 2609 .fd = connecting_s.get(), 2610 }; 2611 poll_fd.events = std::numeric_limits<decltype(poll_fd.events)>::max(); 2612 ASSERT_THAT(RetryEINTR(poll)(&poll_fd, 1, 1000), 2613 SyscallSucceedsWithValue(1)); 2614 2615 int err; 2616 socklen_t optlen = sizeof(err); 2617 ASSERT_THAT( 2618 getsockopt(connecting_s.get(), SOL_SOCKET, SO_ERROR, &err, &optlen), 2619 SyscallSucceeds()); 2620 ASSERT_EQ(optlen, sizeof(err)); 2621 2622 if (err == 0) { 2623 EXPECT_EQ(poll_fd.revents, POLLOUT 2624 // TODO(https://fxbug.dev/42152810): Remove when POLLWRNORM is correctly 2625 // asserted in Fuchsia. 2626 #if !defined(__Fuchsia__) 2627 | POLLWRNORM 2628 #endif 2629 ); 2630 } else { 2631 EXPECT_THAT(err, ::testing::AnyOf(::testing::Eq(ECONNRESET), 2632 ::testing::Eq(ECONNREFUSED))) 2633 << strerror(err); 2634 2635 const int revents = poll_fd.revents; 2636 2637 // It's possible the error arrived *after* poll returned. Fetch the 2638 // signals again - this time with a zero timeout. 2639 EXPECT_THAT(RetryEINTR(poll)(&poll_fd, 1, 0), 2640 SyscallSucceedsWithValue(1)); 2641 2642 EXPECT_EQ(poll_fd.revents, 2643 // TODO(https://fxbug.dev/42156248): Remove when other signals are 2644 // asserted together with POLLERR in Fuchsia. 2645 #if defined(__Fuchsia__) 2646 POLLOUT 2647 #else 2648 []() { 2649 const int expected_revents = POLLIN | POLLOUT | POLLHUP | 2650 POLLRDNORM | POLLWRNORM | 2651 POLLRDHUP; 2652 // TODO(gvisor.dev/issue/6666): POLLERR is still present 2653 // after getsockopt(..., SO_ERROR, ...) call (unless 2654 // hostinet is used). 2655 if (IsRunningOnGvisor() && !IsRunningWithHostinet()) { 2656 return expected_revents | POLLPRI | POLLERR; 2657 } 2658 return expected_revents; 2659 }() 2660 #endif 2661 ); 2662 2663 EXPECT_THAT( 2664 // TODO(gvisor.dev/issue/6666): on Linux, POLLERR goes away 2665 // after the getsockopt(..., SO_ERROR, ...) call, but not on 2666 // gVisor (unless hostinet is used). 2667 revents, 2668 ::testing::AnyOf( 2669 // If the error arrived after poll returned. 2670 ::testing::Eq(POLLOUT | POLLWRNORM), 2671 ::testing::Eq([expected_revents = poll_fd.revents]() -> int { 2672 if (IsRunningOnGvisor() && !IsRunningWithHostinet()) { 2673 return expected_revents; 2674 } 2675 return expected_revents | POLLERR; 2676 }()))); 2677 } 2678 }); 2679 } 2680 2681 EXPECT_THAT(shutdown(bound_s.get(), SHUT_RD), SyscallSucceeds()); 2682 2683 for (auto& thread : threads) { 2684 thread.join(); 2685 } 2686 } 2687 2688 // Fuchsia doesn't have epoll. 2689 #ifdef __linux__ 2690 2691 // Ensure that we can S/R when epoll is waiting on a listening socket. 2692 // Regression test for b/280313827. 2693 TEST_P(SimpleTcpSocketTest, EpollListeningSocket) { 2694 // Create the listening socket. 2695 int fd; 2696 ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM | SOCK_NONBLOCK, 0), 2697 SyscallSucceeds()); 2698 FileDescriptor sockfd(fd); 2699 2700 // Bind to some port. 2701 sockaddr_storage addr = 2702 ASSERT_NO_ERRNO_AND_VALUE(InetLoopbackAddrZeroPort(GetParam())); 2703 socklen_t addrlen = sizeof(addr); 2704 ASSERT_THAT(bind(sockfd.get(), AsSockAddr(&addr), addrlen), 2705 SyscallSucceeds()); 2706 2707 // Listen and accept with the expectation that accept fails. 2708 ASSERT_THAT(listen(sockfd.get(), 2), SyscallSucceeds()); 2709 ASSERT_THAT(accept(sockfd.get(), nullptr, nullptr), 2710 SyscallFailsWithErrno(EAGAIN)); 2711 2712 // Start a thread that waits a bit, then connects to the listening socket. 2713 ScopedThread save_and_connect_thread([&]() { 2714 // Give epoll a chance to start blocking. 2715 absl::SleepFor(absl::Seconds(1)); 2716 2717 // Save while epoll is blocking. 2718 MaybeSave(); 2719 2720 // Get the listener's address and connect to it. 2721 int fd; 2722 ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, 0), SyscallSucceeds()); 2723 FileDescriptor connfd(fd); 2724 ASSERT_THAT(getsockname(sockfd.get(), AsSockAddr(&addr), &addrlen), 2725 SyscallSucceeds()); 2726 ASSERT_THAT(RetryEINTR(connect)(connfd.get(), AsSockAddr(&addr), addrlen), 2727 SyscallSucceeds()); 2728 }); 2729 2730 // Epoll on sockfd. 2731 ASSERT_THAT(fd = epoll_create(1), SyscallSucceeds()); 2732 FileDescriptor epollfd(fd); 2733 struct epoll_event event = {}; 2734 event.events = EPOLLIN; 2735 ASSERT_THAT(epoll_ctl(epollfd.get(), EPOLL_CTL_ADD, sockfd.get(), &event), 2736 SyscallSucceeds()); 2737 2738 struct epoll_event results = {}; 2739 ASSERT_THAT(RetryEINTR(epoll_wait)(epollfd.get(), &results, 1, 60000), 2740 SyscallSucceeds()); 2741 2742 save_and_connect_thread.Join(); 2743 } 2744 2745 TEST_P(SimpleTcpSocketTest, SetTCPCorkOff) { 2746 int fd; 2747 ASSERT_THAT(fd = socket(GetParam(), SOCK_STREAM, IPPROTO_TCP), 2748 SyscallSucceeds()); 2749 2750 ASSERT_THAT( 2751 setsockopt(fd, IPPROTO_TCP, TCP_CORK, &kSockOptOff, sizeof(kSockOptOff)), 2752 SyscallSucceeds()); 2753 } 2754 #endif // __linux__ 2755 2756 INSTANTIATE_TEST_SUITE_P(AllInetTests, SimpleTcpSocketTest, 2757 ::testing::Values(AF_INET, AF_INET6)); 2758 2759 } // namespace 2760 2761 } // namespace testing 2762 } // namespace gvisor