github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/test/syscalls/linux/socket_test_util.cc (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "test/syscalls/linux/socket_test_util.h" 16 17 #include <arpa/inet.h> 18 #include <netinet/in.h> 19 #include <poll.h> 20 #include <sys/socket.h> 21 22 #include <memory> 23 24 #include "gtest/gtest.h" 25 #include "absl/memory/memory.h" 26 #include "absl/strings/str_cat.h" 27 #include "absl/strings/str_split.h" 28 #include "absl/time/clock.h" 29 #include "absl/types/optional.h" 30 #include "test/util/file_descriptor.h" 31 #include "test/util/posix_error.h" 32 #include "test/util/temp_path.h" 33 #include "test/util/thread_util.h" 34 35 namespace gvisor { 36 namespace testing { 37 38 Creator<SocketPair> SyscallSocketPairCreator(int domain, int type, 39 int protocol) { 40 return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> { 41 int pair[2]; 42 RETURN_ERROR_IF_SYSCALL_FAIL(socketpair(domain, type, protocol, pair)); 43 MaybeSave(); // Save on successful creation. 44 return absl::make_unique<FDSocketPair>(pair[0], pair[1]); 45 }; 46 } 47 48 Creator<FileDescriptor> SyscallSocketCreator(int domain, int type, 49 int protocol) { 50 return [=]() -> PosixErrorOr<std::unique_ptr<FileDescriptor>> { 51 int fd = 0; 52 RETURN_ERROR_IF_SYSCALL_FAIL(fd = socket(domain, type, protocol)); 53 MaybeSave(); // Save on successful creation. 54 return absl::make_unique<FileDescriptor>(fd); 55 }; 56 } 57 58 PosixErrorOr<struct sockaddr_un> UniqueUnixAddr(bool abstract, int domain) { 59 struct sockaddr_un addr = {}; 60 std::string path = NewTempAbsPathInDir("/tmp"); 61 if (path.size() >= sizeof(addr.sun_path)) { 62 return PosixError(EINVAL, 63 "Unable to generate a temp path of appropriate length"); 64 } 65 66 if (abstract) { 67 // Indicate that the path is in the abstract namespace. 68 path[0] = 0; 69 } 70 memcpy(addr.sun_path, path.c_str(), path.length()); 71 addr.sun_family = domain; 72 return addr; 73 } 74 75 Creator<SocketPair> AcceptBindSocketPairCreator(bool abstract, int domain, 76 int type, int protocol) { 77 return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> { 78 ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un bind_addr, 79 UniqueUnixAddr(abstract, domain)); 80 ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un extra_addr, 81 UniqueUnixAddr(abstract, domain)); 82 83 int bound; 84 RETURN_ERROR_IF_SYSCALL_FAIL(bound = socket(domain, type, protocol)); 85 MaybeSave(); // Successful socket creation. 86 RETURN_ERROR_IF_SYSCALL_FAIL( 87 bind(bound, AsSockAddr(&bind_addr), sizeof(bind_addr))); 88 MaybeSave(); // Successful bind. 89 RETURN_ERROR_IF_SYSCALL_FAIL( 90 listen(bound, /* backlog = */ 5)); // NOLINT(bugprone-argument-comment) 91 MaybeSave(); // Successful listen. 92 93 int connected; 94 RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol)); 95 MaybeSave(); // Successful socket creation. 96 RETURN_ERROR_IF_SYSCALL_FAIL( 97 connect(connected, AsSockAddr(&bind_addr), sizeof(bind_addr))); 98 MaybeSave(); // Successful connect. 99 100 int accepted; 101 RETURN_ERROR_IF_SYSCALL_FAIL( 102 accepted = accept4(bound, nullptr, nullptr, 103 type & (SOCK_NONBLOCK | SOCK_CLOEXEC))); 104 MaybeSave(); // Successful connect. 105 106 // Cleanup no longer needed resources. 107 RETURN_ERROR_IF_SYSCALL_FAIL(close(bound)); 108 MaybeSave(); // Dropped original socket. 109 110 // Only unlink if path is not in abstract namespace. 111 if (bind_addr.sun_path[0] != 0) { 112 RETURN_ERROR_IF_SYSCALL_FAIL(unlink(bind_addr.sun_path)); 113 MaybeSave(); // Unlinked path. 114 } 115 116 // accepted is before connected to destruct connected before accepted. 117 // Destructors for nonstatic member objects are called in the reverse order 118 // in which they appear in the class declaration. 119 return absl::make_unique<AddrFDSocketPair>(accepted, connected, bind_addr, 120 extra_addr); 121 }; 122 } 123 124 Creator<SocketPair> FilesystemAcceptBindSocketPairCreator(int domain, int type, 125 int protocol) { 126 return AcceptBindSocketPairCreator(/* abstract= */ false, domain, type, 127 protocol); 128 } 129 130 Creator<SocketPair> AbstractAcceptBindSocketPairCreator(int domain, int type, 131 int protocol) { 132 return AcceptBindSocketPairCreator(/* abstract= */ true, domain, type, 133 protocol); 134 } 135 136 Creator<SocketPair> BidirectionalBindSocketPairCreator(bool abstract, 137 int domain, int type, 138 int protocol) { 139 return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> { 140 ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr1, 141 UniqueUnixAddr(abstract, domain)); 142 ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr2, 143 UniqueUnixAddr(abstract, domain)); 144 145 int sock1; 146 RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol)); 147 MaybeSave(); // Successful socket creation. 148 RETURN_ERROR_IF_SYSCALL_FAIL( 149 bind(sock1, AsSockAddr(&addr1), sizeof(addr1))); 150 MaybeSave(); // Successful bind. 151 152 int sock2; 153 RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol)); 154 MaybeSave(); // Successful socket creation. 155 RETURN_ERROR_IF_SYSCALL_FAIL( 156 bind(sock2, AsSockAddr(&addr2), sizeof(addr2))); 157 MaybeSave(); // Successful bind. 158 159 RETURN_ERROR_IF_SYSCALL_FAIL( 160 connect(sock1, AsSockAddr(&addr2), sizeof(addr2))); 161 MaybeSave(); // Successful connect. 162 163 RETURN_ERROR_IF_SYSCALL_FAIL( 164 connect(sock2, AsSockAddr(&addr1), sizeof(addr1))); 165 MaybeSave(); // Successful connect. 166 167 // Cleanup no longer needed resources. 168 169 // Only unlink if path is not in abstract namespace. 170 if (addr1.sun_path[0] != 0) { 171 RETURN_ERROR_IF_SYSCALL_FAIL(unlink(addr1.sun_path)); 172 MaybeSave(); // Successful unlink. 173 } 174 175 // Only unlink if path is not in abstract namespace. 176 if (addr2.sun_path[0] != 0) { 177 RETURN_ERROR_IF_SYSCALL_FAIL(unlink(addr2.sun_path)); 178 MaybeSave(); // Successful unlink. 179 } 180 181 return absl::make_unique<FDSocketPair>(sock1, sock2); 182 }; 183 } 184 185 Creator<SocketPair> FilesystemBidirectionalBindSocketPairCreator(int domain, 186 int type, 187 int protocol) { 188 return BidirectionalBindSocketPairCreator(/* abstract= */ false, domain, type, 189 protocol); 190 } 191 192 Creator<SocketPair> AbstractBidirectionalBindSocketPairCreator(int domain, 193 int type, 194 int protocol) { 195 return BidirectionalBindSocketPairCreator(/* abstract= */ true, domain, type, 196 protocol); 197 } 198 199 Creator<SocketPair> SocketpairGoferSocketPairCreator(int domain, int type, 200 int protocol) { 201 return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> { 202 struct sockaddr_un addr = {}; 203 constexpr char kSocketGoferPath[] = "/socket"; 204 memcpy(addr.sun_path, kSocketGoferPath, sizeof(kSocketGoferPath)); 205 addr.sun_family = domain; 206 207 int sock1; 208 RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol)); 209 MaybeSave(); // Successful socket creation. 210 RETURN_ERROR_IF_SYSCALL_FAIL( 211 connect(sock1, AsSockAddr(&addr), sizeof(addr))); 212 MaybeSave(); // Successful connect. 213 214 int sock2; 215 RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol)); 216 MaybeSave(); // Successful socket creation. 217 RETURN_ERROR_IF_SYSCALL_FAIL( 218 connect(sock2, AsSockAddr(&addr), sizeof(addr))); 219 MaybeSave(); // Successful connect. 220 221 // Make and close another socketpair to ensure that the duped ends of the 222 // first socketpair get closed. 223 // 224 // The problem is that there is no way to atomically send and close an FD. 225 // The closest that we can do is send and then immediately close the FD, 226 // which is what we do in the gofer. The gofer won't respond to another 227 // request until the reply is sent and the FD is closed, so forcing the 228 // gofer to handle another request will ensure that this has happened. 229 for (int i = 0; i < 2; i++) { 230 int sock; 231 RETURN_ERROR_IF_SYSCALL_FAIL(sock = socket(domain, type, protocol)); 232 RETURN_ERROR_IF_SYSCALL_FAIL( 233 connect(sock, AsSockAddr(&addr), sizeof(addr))); 234 RETURN_ERROR_IF_SYSCALL_FAIL(close(sock)); 235 } 236 237 return absl::make_unique<FDSocketPair>(sock1, sock2); 238 }; 239 } 240 241 Creator<SocketPair> SocketpairGoferFileSocketPairCreator(int flags) { 242 return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> { 243 constexpr char kSocketGoferPath[] = "/socket"; 244 245 int sock1; 246 RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = 247 open(kSocketGoferPath, O_RDWR | flags)); 248 MaybeSave(); // Successful socket creation. 249 250 int sock2; 251 RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = 252 open(kSocketGoferPath, O_RDWR | flags)); 253 MaybeSave(); // Successful socket creation. 254 255 return absl::make_unique<FDSocketPair>(sock1, sock2); 256 }; 257 } 258 259 Creator<SocketPair> UnboundSocketPairCreator(bool abstract, int domain, 260 int type, int protocol) { 261 return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> { 262 ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr1, 263 UniqueUnixAddr(abstract, domain)); 264 ASSIGN_OR_RETURN_ERRNO(struct sockaddr_un addr2, 265 UniqueUnixAddr(abstract, domain)); 266 267 int sock1; 268 RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol)); 269 MaybeSave(); // Successful socket creation. 270 int sock2; 271 RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol)); 272 MaybeSave(); // Successful socket creation. 273 return absl::make_unique<AddrFDSocketPair>(sock1, sock2, addr1, addr2); 274 }; 275 } 276 277 Creator<SocketPair> FilesystemUnboundSocketPairCreator(int domain, int type, 278 int protocol) { 279 return UnboundSocketPairCreator(/* abstract= */ false, domain, type, 280 protocol); 281 } 282 283 Creator<SocketPair> AbstractUnboundSocketPairCreator(int domain, int type, 284 int protocol) { 285 return UnboundSocketPairCreator(/* abstract= */ true, domain, type, protocol); 286 } 287 288 void LocalhostAddr(struct sockaddr_in* addr, bool dual_stack) { 289 addr->sin_family = AF_INET; 290 addr->sin_port = htons(0); 291 inet_pton(AF_INET, "127.0.0.1", 292 reinterpret_cast<void*>(&addr->sin_addr.s_addr)); 293 } 294 295 void LocalhostAddr(struct sockaddr_in6* addr, bool dual_stack) { 296 addr->sin6_family = AF_INET6; 297 addr->sin6_port = htons(0); 298 if (dual_stack) { 299 inet_pton(AF_INET6, "::ffff:127.0.0.1", 300 reinterpret_cast<void*>(&addr->sin6_addr.s6_addr)); 301 } else { 302 inet_pton(AF_INET6, "::1", 303 reinterpret_cast<void*>(&addr->sin6_addr.s6_addr)); 304 } 305 addr->sin6_scope_id = 0; 306 } 307 308 template <typename T> 309 PosixErrorOr<T> BindIP(int fd, bool dual_stack) { 310 T addr = {}; 311 LocalhostAddr(&addr, dual_stack); 312 RETURN_ERROR_IF_SYSCALL_FAIL(bind(fd, AsSockAddr(&addr), sizeof(addr))); 313 socklen_t addrlen = sizeof(addr); 314 RETURN_ERROR_IF_SYSCALL_FAIL(getsockname(fd, AsSockAddr(&addr), &addrlen)); 315 return addr; 316 } 317 318 template <typename T> 319 PosixErrorOr<T> TCPBindAndListen(int fd, bool dual_stack) { 320 ASSIGN_OR_RETURN_ERRNO(T addr, BindIP<T>(fd, dual_stack)); 321 RETURN_ERROR_IF_SYSCALL_FAIL( 322 listen(fd, /* backlog = */ 5)); // NOLINT(bugprone-argument-comment) 323 return addr; 324 } 325 326 template <typename T> 327 PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> 328 CreateTCPConnectAcceptSocketPair(int bound, int connected, int type, 329 bool dual_stack, T bind_addr) { 330 int connect_result = 0; 331 RETURN_ERROR_IF_SYSCALL_FAIL( 332 (connect_result = RetryEINTR(connect)(connected, AsSockAddr(&bind_addr), 333 sizeof(bind_addr))) == -1 && 334 errno == EINPROGRESS 335 ? 0 336 : connect_result); 337 MaybeSave(); // Successful connect. 338 339 if (connect_result == -1) { 340 struct pollfd connect_poll = {connected, POLLOUT | POLLERR | POLLHUP, 0}; 341 RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(poll)(&connect_poll, 1, 0)); 342 int error = 0; 343 socklen_t errorlen = sizeof(error); 344 RETURN_ERROR_IF_SYSCALL_FAIL( 345 getsockopt(connected, SOL_SOCKET, SO_ERROR, &error, &errorlen)); 346 errno = error; 347 RETURN_ERROR_IF_SYSCALL_FAIL( 348 /* connect */ error == 0 ? 0 : -1); 349 } 350 351 int accepted = -1; 352 struct pollfd accept_poll = {bound, POLLIN, 0}; 353 while (accepted == -1) { 354 RETURN_ERROR_IF_SYSCALL_FAIL(RetryEINTR(poll)(&accept_poll, 1, 0)); 355 356 RETURN_ERROR_IF_SYSCALL_FAIL( 357 (accepted = RetryEINTR(accept4)( 358 bound, nullptr, nullptr, type & (SOCK_NONBLOCK | SOCK_CLOEXEC))) == 359 -1 && 360 errno == EAGAIN 361 ? 0 362 : accepted); 363 } 364 MaybeSave(); // Successful accept. 365 366 T extra_addr = {}; 367 LocalhostAddr(&extra_addr, dual_stack); 368 return absl::make_unique<AddrFDSocketPair>(connected, accepted, bind_addr, 369 extra_addr); 370 } 371 372 template <typename T> 373 PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateTCPAcceptBindSocketPair( 374 int bound, int connected, int type, bool dual_stack) { 375 ASSIGN_OR_RETURN_ERRNO(T bind_addr, TCPBindAndListen<T>(bound, dual_stack)); 376 377 auto result = CreateTCPConnectAcceptSocketPair(bound, connected, type, 378 dual_stack, bind_addr); 379 380 // Cleanup no longer needed resources. 381 RETURN_ERROR_IF_SYSCALL_FAIL(close(bound)); 382 MaybeSave(); // Successful close. 383 384 return result; 385 } 386 387 Creator<SocketPair> TCPAcceptBindSocketPairCreator(int domain, int type, 388 int protocol, 389 bool dual_stack) { 390 return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> { 391 int bound; 392 RETURN_ERROR_IF_SYSCALL_FAIL(bound = socket(domain, type, protocol)); 393 MaybeSave(); // Successful socket creation. 394 395 int connected; 396 RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol)); 397 MaybeSave(); // Successful socket creation. 398 399 if (domain == AF_INET) { 400 return CreateTCPAcceptBindSocketPair<sockaddr_in>(bound, connected, type, 401 dual_stack); 402 } 403 return CreateTCPAcceptBindSocketPair<sockaddr_in6>(bound, connected, type, 404 dual_stack); 405 }; 406 } 407 408 Creator<SocketPair> TCPAcceptBindPersistentListenerSocketPairCreator( 409 int domain, int type, int protocol, bool dual_stack) { 410 // These are lazily initialized below, on the first call to the returned 411 // lambda. These values are private to each returned lambda, but shared across 412 // invocations of a specific lambda. 413 // 414 // The sharing allows pairs created with the same parameters to share a 415 // listener. This prevents future connects from failing if the connecting 416 // socket selects a port which had previously been used by a listening socket 417 // that still has some connections in TIME-WAIT. 418 // 419 // The lazy initialization is to avoid creating sockets during parameter 420 // enumeration. This is important because parameters are enumerated during the 421 // build process where networking may not be available. 422 auto listener = std::make_shared<absl::optional<int>>(absl::optional<int>()); 423 auto addr4 = std::make_shared<absl::optional<sockaddr_in>>( 424 absl::optional<sockaddr_in>()); 425 auto addr6 = std::make_shared<absl::optional<sockaddr_in6>>( 426 absl::optional<sockaddr_in6>()); 427 428 return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> { 429 int connected; 430 RETURN_ERROR_IF_SYSCALL_FAIL(connected = socket(domain, type, protocol)); 431 MaybeSave(); // Successful socket creation. 432 433 // Share the listener across invocations. 434 if (!listener->has_value()) { 435 int fd = socket(domain, type, protocol); 436 if (fd < 0) { 437 return PosixError(errno, absl::StrCat("socket(", domain, ", ", type, 438 ", ", protocol, ")")); 439 } 440 listener->emplace(fd); 441 MaybeSave(); // Successful socket creation. 442 } 443 444 // Bind the listener once, but create a new connect/accept pair each 445 // time. 446 if (domain == AF_INET) { 447 if (!addr4->has_value()) { 448 addr4->emplace( 449 TCPBindAndListen<sockaddr_in>(listener->value(), dual_stack) 450 .ValueOrDie()); 451 } 452 return CreateTCPConnectAcceptSocketPair(listener->value(), connected, 453 type, dual_stack, addr4->value()); 454 } 455 if (!addr6->has_value()) { 456 addr6->emplace( 457 TCPBindAndListen<sockaddr_in6>(listener->value(), dual_stack) 458 .ValueOrDie()); 459 } 460 return CreateTCPConnectAcceptSocketPair(listener->value(), connected, type, 461 dual_stack, addr6->value()); 462 }; 463 } 464 465 template <typename T> 466 PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> CreateUDPBoundSocketPair( 467 int sock1, int sock2, int type, bool dual_stack) { 468 ASSIGN_OR_RETURN_ERRNO(T addr1, BindIP<T>(sock1, dual_stack)); 469 ASSIGN_OR_RETURN_ERRNO(T addr2, BindIP<T>(sock2, dual_stack)); 470 471 return absl::make_unique<AddrFDSocketPair>(sock1, sock2, addr1, addr2); 472 } 473 474 template <typename T> 475 PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> 476 CreateUDPBidirectionalBindSocketPair(int sock1, int sock2, int type, 477 bool dual_stack) { 478 ASSIGN_OR_RETURN_ERRNO( 479 auto socks, CreateUDPBoundSocketPair<T>(sock1, sock2, type, dual_stack)); 480 481 // Connect sock1 to sock2. 482 RETURN_ERROR_IF_SYSCALL_FAIL(connect(socks->first_fd(), socks->second_addr(), 483 socks->second_addr_size())); 484 MaybeSave(); // Successful connection. 485 486 // Connect sock2 to sock1. 487 RETURN_ERROR_IF_SYSCALL_FAIL(connect(socks->second_fd(), socks->first_addr(), 488 socks->first_addr_size())); 489 MaybeSave(); // Successful connection. 490 491 return socks; 492 } 493 494 Creator<SocketPair> UDPBidirectionalBindSocketPairCreator(int domain, int type, 495 int protocol, 496 bool dual_stack) { 497 return [=]() -> PosixErrorOr<std::unique_ptr<AddrFDSocketPair>> { 498 int sock1; 499 RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol)); 500 MaybeSave(); // Successful socket creation. 501 502 int sock2; 503 RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol)); 504 MaybeSave(); // Successful socket creation. 505 506 if (domain == AF_INET) { 507 return CreateUDPBidirectionalBindSocketPair<sockaddr_in>( 508 sock1, sock2, type, dual_stack); 509 } 510 return CreateUDPBidirectionalBindSocketPair<sockaddr_in6>(sock1, sock2, 511 type, dual_stack); 512 }; 513 } 514 515 Creator<SocketPair> UDPUnboundSocketPairCreator(int domain, int type, 516 int protocol, bool dual_stack) { 517 return [=]() -> PosixErrorOr<std::unique_ptr<FDSocketPair>> { 518 int sock1; 519 RETURN_ERROR_IF_SYSCALL_FAIL(sock1 = socket(domain, type, protocol)); 520 MaybeSave(); // Successful socket creation. 521 522 int sock2; 523 RETURN_ERROR_IF_SYSCALL_FAIL(sock2 = socket(domain, type, protocol)); 524 MaybeSave(); // Successful socket creation. 525 526 return absl::make_unique<FDSocketPair>(sock1, sock2); 527 }; 528 } 529 530 SocketPairKind Reversed(SocketPairKind const& base) { 531 auto const& creator = base.creator; 532 return SocketPairKind{ 533 absl::StrCat("reversed ", base.description), base.domain, base.type, 534 base.protocol, 535 [creator]() -> PosixErrorOr<std::unique_ptr<ReversedSocketPair>> { 536 ASSIGN_OR_RETURN_ERRNO(auto creator_value, creator()); 537 return absl::make_unique<ReversedSocketPair>(std::move(creator_value)); 538 }}; 539 } 540 541 Creator<FileDescriptor> UnboundSocketCreator(int domain, int type, 542 int protocol) { 543 return [=]() -> PosixErrorOr<std::unique_ptr<FileDescriptor>> { 544 int sock; 545 RETURN_ERROR_IF_SYSCALL_FAIL(sock = socket(domain, type, protocol)); 546 MaybeSave(); // Successful socket creation. 547 548 return absl::make_unique<FileDescriptor>(sock); 549 }; 550 } 551 552 std::vector<SocketPairKind> IncludeReversals(std::vector<SocketPairKind> vec) { 553 return ApplyVecToVec<SocketPairKind>(std::vector<Middleware>{NoOp, Reversed}, 554 vec); 555 } 556 557 SocketPairKind NoOp(SocketPairKind const& base) { return base; } 558 559 void TransferTest(int fd1, int fd2) { 560 char buf1[20]; 561 RandomizeBuffer(buf1, sizeof(buf1)); 562 ASSERT_THAT(WriteFd(fd1, buf1, sizeof(buf1)), 563 SyscallSucceedsWithValue(sizeof(buf1))); 564 565 char buf2[20]; 566 ASSERT_THAT(ReadFd(fd2, buf2, sizeof(buf2)), 567 SyscallSucceedsWithValue(sizeof(buf2))); 568 569 EXPECT_EQ(0, memcmp(buf1, buf2, sizeof(buf1))); 570 571 RandomizeBuffer(buf1, sizeof(buf1)); 572 ASSERT_THAT(WriteFd(fd2, buf1, sizeof(buf1)), 573 SyscallSucceedsWithValue(sizeof(buf1))); 574 575 ASSERT_THAT(ReadFd(fd1, buf2, sizeof(buf2)), 576 SyscallSucceedsWithValue(sizeof(buf2))); 577 578 EXPECT_EQ(0, memcmp(buf1, buf2, sizeof(buf1))); 579 } 580 581 // Initializes the given buffer with random data. 582 void RandomizeBuffer(char* ptr, size_t len) { 583 uint32_t seed = time(nullptr); 584 for (size_t i = 0; i < len; ++i) { 585 ptr[i] = static_cast<char>(rand_r(&seed)); 586 } 587 } 588 589 size_t CalculateUnixSockAddrLen(const char* sun_path) { 590 // Abstract addresses always return the full length. 591 if (sun_path[0] == 0) { 592 return sizeof(sockaddr_un); 593 } 594 // Filesystem addresses use the address length plus the 2 byte sun_family 595 // and null terminator. 596 return strlen(sun_path) + 3; 597 } 598 599 struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_un& addr) { 600 struct sockaddr_storage addr_storage = {}; 601 memcpy(&addr_storage, &addr, sizeof(addr)); 602 return addr_storage; 603 } 604 605 struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_in& addr) { 606 struct sockaddr_storage addr_storage = {}; 607 memcpy(&addr_storage, &addr, sizeof(addr)); 608 return addr_storage; 609 } 610 611 struct sockaddr_storage AddrFDSocketPair::to_storage(const sockaddr_in6& addr) { 612 struct sockaddr_storage addr_storage = {}; 613 memcpy(&addr_storage, &addr, sizeof(addr)); 614 return addr_storage; 615 } 616 617 SocketKind SimpleSocket(int fam, int type, int proto) { 618 return SocketKind{ 619 absl::StrCat("Family ", fam, ", type ", type, ", proto ", proto), fam, 620 type, proto, SyscallSocketCreator(fam, type, proto)}; 621 } 622 623 ssize_t SendLargeSendMsg(const std::unique_ptr<SocketPair>& sockets, 624 size_t size, bool reader) { 625 const int rfd = sockets->second_fd(); 626 ScopedThread t([rfd, size, reader] { 627 if (!reader) { 628 return; 629 } 630 631 // Potentially too many syscalls in the loop. 632 const DisableSave ds; 633 634 std::vector<char> buf(size); 635 size_t total = 0; 636 637 while (total < size) { 638 int ret = read(rfd, buf.data(), buf.size()); 639 if (ret == -1 && errno == EAGAIN) { 640 continue; 641 } 642 if (ret > 0) { 643 total += ret; 644 } 645 646 // Assert to return on first failure. 647 ASSERT_THAT(ret, SyscallSucceeds()); 648 } 649 }); 650 651 std::vector<char> buf(size); 652 653 struct iovec iov = {}; 654 iov.iov_base = buf.data(); 655 iov.iov_len = buf.size(); 656 657 struct msghdr msg = {}; 658 msg.msg_iov = &iov; 659 msg.msg_iovlen = 1; 660 661 return RetryEINTR(sendmsg)(sockets->first_fd(), &msg, 0); 662 } 663 664 namespace internal { 665 PosixErrorOr<int> TryPortAvailable(int port, AddressFamily family, 666 SocketType type, bool reuse_addr) { 667 if (port < 0) { 668 return PosixError(EINVAL, "Invalid port"); 669 } 670 671 // Both Ipv6 and Dualstack are AF_INET6. 672 int sock_fam = (family == AddressFamily::kIpv4 ? AF_INET : AF_INET6); 673 int sock_type = (type == SocketType::kTcp ? SOCK_STREAM : SOCK_DGRAM); 674 ASSIGN_OR_RETURN_ERRNO(auto fd, Socket(sock_fam, sock_type, 0)); 675 676 if (reuse_addr) { 677 int one = 1; 678 RETURN_ERROR_IF_SYSCALL_FAIL( 679 setsockopt(fd.get(), SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))); 680 } 681 682 // Try to bind. 683 sockaddr_storage storage = {}; 684 int storage_size = 0; 685 if (family == AddressFamily::kIpv4) { 686 sockaddr_in* addr = reinterpret_cast<sockaddr_in*>(&storage); 687 storage_size = sizeof(*addr); 688 addr->sin_family = AF_INET; 689 addr->sin_port = htons(port); 690 addr->sin_addr.s_addr = htonl(INADDR_ANY); 691 } else { 692 sockaddr_in6* addr = reinterpret_cast<sockaddr_in6*>(&storage); 693 storage_size = sizeof(*addr); 694 addr->sin6_family = AF_INET6; 695 addr->sin6_port = htons(port); 696 if (family == AddressFamily::kDualStack) { 697 inet_pton(AF_INET6, "::ffff:0.0.0.0", 698 reinterpret_cast<void*>(&addr->sin6_addr.s6_addr)); 699 } else { 700 addr->sin6_addr = in6addr_any; 701 } 702 } 703 704 RETURN_ERROR_IF_SYSCALL_FAIL( 705 bind(fd.get(), AsSockAddr(&storage), storage_size)); 706 707 // If the user specified 0 as the port, we will return the port that the 708 // kernel gave us, otherwise we will validate that this socket bound to the 709 // requested port. 710 sockaddr_storage bound_storage = {}; 711 socklen_t bound_storage_size = sizeof(bound_storage); 712 RETURN_ERROR_IF_SYSCALL_FAIL( 713 getsockname(fd.get(), AsSockAddr(&bound_storage), &bound_storage_size)); 714 715 int available_port = -1; 716 if (bound_storage.ss_family == AF_INET) { 717 sockaddr_in* addr = reinterpret_cast<sockaddr_in*>(&bound_storage); 718 available_port = ntohs(addr->sin_port); 719 } else if (bound_storage.ss_family == AF_INET6) { 720 sockaddr_in6* addr = reinterpret_cast<sockaddr_in6*>(&bound_storage); 721 available_port = ntohs(addr->sin6_port); 722 } else { 723 return PosixError(EPROTOTYPE, "Getsockname returned invalid family"); 724 } 725 726 // If we requested a specific port make sure our bound port is that port. 727 if (port != 0 && available_port != port) { 728 return PosixError(EINVAL, 729 absl::StrCat("Bound port ", available_port, 730 " was not equal to requested port ", port)); 731 } 732 733 // If we're trying to do a TCP socket, let's also try to listen. 734 if (type == SocketType::kTcp) { 735 RETURN_ERROR_IF_SYSCALL_FAIL(listen(fd.get(), 1)); 736 } 737 738 return available_port; 739 } 740 } // namespace internal 741 742 PosixErrorOr<int> SendMsg(int sock, msghdr* msg, char buf[], int buf_size) { 743 struct iovec iov; 744 iov.iov_base = buf; 745 iov.iov_len = buf_size; 746 msg->msg_iov = &iov; 747 msg->msg_iovlen = 1; 748 749 int ret; 750 RETURN_ERROR_IF_SYSCALL_FAIL(ret = RetryEINTR(sendmsg)(sock, msg, 0)); 751 return ret; 752 } 753 754 PosixErrorOr<int> RecvTimeout(int sock, char buf[], int buf_size, int timeout) { 755 fd_set rfd; 756 struct timeval to = {.tv_sec = timeout, .tv_usec = 0}; 757 FD_ZERO(&rfd); 758 FD_SET(sock, &rfd); 759 760 int ret; 761 RETURN_ERROR_IF_SYSCALL_FAIL(ret = select(1, &rfd, NULL, NULL, &to)); 762 RETURN_ERROR_IF_SYSCALL_FAIL( 763 ret = RetryEINTR(recv)(sock, buf, buf_size, MSG_DONTWAIT)); 764 return ret; 765 } 766 767 PosixErrorOr<int> RecvMsgTimeout(int sock, struct msghdr* msg, int timeout) { 768 fd_set rfd; 769 struct timeval to = {.tv_sec = timeout, .tv_usec = 0}; 770 FD_ZERO(&rfd); 771 FD_SET(sock, &rfd); 772 773 int ret; 774 RETURN_ERROR_IF_SYSCALL_FAIL(ret = select(1, &rfd, NULL, NULL, &to)); 775 RETURN_ERROR_IF_SYSCALL_FAIL( 776 ret = RetryEINTR(recvmsg)(sock, msg, MSG_DONTWAIT)); 777 return ret; 778 } 779 780 void RecvNoData(int sock) { 781 char data = 0; 782 struct iovec iov; 783 iov.iov_base = &data; 784 iov.iov_len = 1; 785 struct msghdr msg = {}; 786 msg.msg_iov = &iov; 787 msg.msg_iovlen = 1; 788 ASSERT_THAT(RetryEINTR(recvmsg)(sock, &msg, MSG_DONTWAIT), 789 SyscallFailsWithErrno(EAGAIN)); 790 } 791 792 TestAddress TestAddress::WithPort(uint16_t port) const { 793 TestAddress addr = *this; 794 switch (addr.family()) { 795 case AF_INET: 796 reinterpret_cast<sockaddr_in*>(&addr.addr)->sin_port = htons(port); 797 break; 798 case AF_INET6: 799 reinterpret_cast<sockaddr_in6*>(&addr.addr)->sin6_port = htons(port); 800 break; 801 } 802 return addr; 803 } 804 805 namespace { 806 807 TestAddress V4Addr(std::string description, in_addr_t addr) { 808 TestAddress t(std::move(description)); 809 t.addr.ss_family = AF_INET; 810 t.addr_len = sizeof(sockaddr_in); 811 reinterpret_cast<sockaddr_in*>(&t.addr)->sin_addr.s_addr = addr; 812 return t; 813 } 814 815 TestAddress V6Addr(std::string description, const in6_addr& addr) { 816 TestAddress t(std::move(description)); 817 t.addr.ss_family = AF_INET6; 818 t.addr_len = sizeof(sockaddr_in6); 819 reinterpret_cast<sockaddr_in6*>(&t.addr)->sin6_addr = addr; 820 return t; 821 } 822 823 } // namespace 824 825 TestAddress V4AddrStr(std::string description, const char* addr) { 826 in_addr_t s_addr; 827 inet_pton(AF_INET, addr, &s_addr); 828 return V4Addr(description, s_addr); 829 } 830 831 TestAddress V6AddrStr(std::string description, const char* addr) { 832 struct in6_addr s_addr; 833 inet_pton(AF_INET6, addr, &s_addr); 834 return V6Addr(description, s_addr); 835 } 836 837 TestAddress V4Any() { return V4Addr("V4Any", htonl(INADDR_ANY)); } 838 839 TestAddress V4Broadcast() { 840 return V4Addr("V4Broadcast", htonl(INADDR_BROADCAST)); 841 } 842 843 TestAddress V4Loopback() { 844 return V4Addr("V4Loopback", htonl(INADDR_LOOPBACK)); 845 } 846 847 TestAddress V4LoopbackSubnetBroadcast() { 848 return V4AddrStr("V4LoopbackSubnetBroadcast", "127.255.255.255"); 849 } 850 851 TestAddress V4MappedAny() { return V6AddrStr("V4MappedAny", "::ffff:0.0.0.0"); } 852 853 TestAddress V4MappedLoopback() { 854 return V6AddrStr("V4MappedLoopback", "::ffff:127.0.0.1"); 855 } 856 857 TestAddress V4Multicast() { 858 return V4Addr("V4Multicast", inet_addr(kMulticastAddress)); 859 } 860 861 TestAddress V4MulticastAllHosts() { 862 return V4Addr("V4MulticastAllHosts", htonl(INADDR_ALLHOSTS_GROUP)); 863 } 864 865 TestAddress V6Any() { return V6Addr("V6Any", in6addr_any); } 866 867 TestAddress V6Loopback() { return V6Addr("V6Loopback", in6addr_loopback); } 868 869 TestAddress V6Multicast() { return V6AddrStr("V6Multicast", "ff05::1234"); } 870 871 TestAddress V6MulticastInterfaceLocalAllNodes() { 872 return V6AddrStr("V6MulticastInterfaceLocalAllNodes", "ff01::1"); 873 } 874 875 TestAddress V6MulticastLinkLocalAllNodes() { 876 return V6AddrStr("V6MulticastLinkLocalAllNodes", "ff02::1"); 877 } 878 879 TestAddress V6MulticastLinkLocalAllRouters() { 880 return V6AddrStr("V6MulticastLinkLocalAllRouters", "ff02::2"); 881 } 882 883 // Checksum computes the internet checksum of a buffer. 884 uint16_t Checksum(uint16_t* buf, ssize_t buf_size) { 885 // Add up the 16-bit values in the buffer. 886 uint32_t total = 0; 887 for (unsigned int i = 0; i < buf_size; i += sizeof(*buf)) { 888 total += *buf; 889 buf++; 890 } 891 892 // If buf has an odd size, add the remaining byte. 893 if (buf_size % 2) { 894 total += *(reinterpret_cast<unsigned char*>(buf) - 1); 895 } 896 897 // This carries any bits past the lower 16 until everything fits in 16 bits. 898 while (total >> 16) { 899 uint16_t lower = total & 0xffff; 900 uint16_t upper = total >> 16; 901 total = lower + upper; 902 } 903 904 return ~total; 905 } 906 907 uint16_t IPChecksum(struct iphdr ip) { 908 return Checksum(reinterpret_cast<uint16_t*>(&ip), sizeof(ip)); 909 } 910 911 // The pseudo-header defined in RFC 768 for calculating the UDP checksum. 912 struct udp_pseudo_hdr { 913 uint32_t srcip; 914 uint32_t destip; 915 char zero; 916 char protocol; 917 uint16_t udplen; 918 }; 919 920 uint16_t UDPChecksum(struct iphdr iphdr, struct udphdr udphdr, 921 const char* payload, ssize_t payload_len) { 922 struct udp_pseudo_hdr phdr = {}; 923 phdr.srcip = iphdr.saddr; 924 phdr.destip = iphdr.daddr; 925 phdr.zero = 0; 926 phdr.protocol = IPPROTO_UDP; 927 phdr.udplen = udphdr.len; 928 929 ssize_t buf_size = sizeof(phdr) + sizeof(udphdr) + payload_len; 930 char* buf = static_cast<char*>(malloc(buf_size)); 931 memcpy(buf, &phdr, sizeof(phdr)); 932 memcpy(buf + sizeof(phdr), &udphdr, sizeof(udphdr)); 933 memcpy(buf + sizeof(phdr) + sizeof(udphdr), payload, payload_len); 934 935 uint16_t csum = Checksum(reinterpret_cast<uint16_t*>(buf), buf_size); 936 free(buf); 937 return csum; 938 } 939 940 uint16_t ICMPChecksum(struct icmphdr icmphdr, const char* payload, 941 ssize_t payload_len) { 942 ssize_t buf_size = sizeof(icmphdr) + payload_len; 943 char* buf = static_cast<char*>(malloc(buf_size)); 944 memcpy(buf, &icmphdr, sizeof(icmphdr)); 945 memcpy(buf + sizeof(icmphdr), payload, payload_len); 946 947 uint16_t csum = Checksum(reinterpret_cast<uint16_t*>(buf), buf_size); 948 free(buf); 949 return csum; 950 } 951 952 PosixErrorOr<uint16_t> AddrPort(int family, sockaddr_storage const& addr) { 953 switch (family) { 954 case AF_INET: 955 return static_cast<uint16_t>( 956 reinterpret_cast<sockaddr_in const*>(&addr)->sin_port); 957 case AF_INET6: 958 return static_cast<uint16_t>( 959 reinterpret_cast<sockaddr_in6 const*>(&addr)->sin6_port); 960 default: 961 return PosixError(EINVAL, 962 absl::StrCat("unknown socket family: ", family)); 963 } 964 } 965 966 PosixError SetAddrPort(int family, sockaddr_storage* addr, uint16_t port) { 967 switch (family) { 968 case AF_INET: 969 reinterpret_cast<sockaddr_in*>(addr)->sin_port = port; 970 return NoError(); 971 case AF_INET6: 972 reinterpret_cast<sockaddr_in6*>(addr)->sin6_port = port; 973 return NoError(); 974 default: 975 return PosixError(EINVAL, 976 absl::StrCat("unknown socket family: ", family)); 977 } 978 } 979 980 void SetupTimeWaitClose(const TestAddress* listener, 981 const TestAddress* connector, bool reuse, 982 bool accept_close, sockaddr_storage* listen_addr, 983 sockaddr_storage* conn_bound_addr) { 984 // Create the listening socket. 985 FileDescriptor listen_fd = ASSERT_NO_ERRNO_AND_VALUE( 986 Socket(listener->family(), SOCK_STREAM, IPPROTO_TCP)); 987 if (reuse) { 988 ASSERT_THAT(setsockopt(listen_fd.get(), SOL_SOCKET, SO_REUSEADDR, 989 &kSockOptOn, sizeof(kSockOptOn)), 990 SyscallSucceeds()); 991 } 992 ASSERT_THAT( 993 bind(listen_fd.get(), AsSockAddr(listen_addr), listener->addr_len), 994 SyscallSucceeds()); 995 ASSERT_THAT(listen(listen_fd.get(), SOMAXCONN), SyscallSucceeds()); 996 997 // Get the port bound by the listening socket. 998 socklen_t addrlen = listener->addr_len; 999 ASSERT_THAT(getsockname(listen_fd.get(), AsSockAddr(listen_addr), &addrlen), 1000 SyscallSucceeds()); 1001 1002 uint16_t const port = 1003 ASSERT_NO_ERRNO_AND_VALUE(AddrPort(listener->family(), *listen_addr)); 1004 1005 // Connect to the listening socket. 1006 FileDescriptor conn_fd = ASSERT_NO_ERRNO_AND_VALUE( 1007 Socket(connector->family(), SOCK_STREAM, IPPROTO_TCP)); 1008 1009 // We disable saves after this point as a S/R causes the netstack seed 1010 // to be regenerated which changes what ports/ISN is picked for a given 1011 // tuple (src ip,src port, dst ip, dst port). This can cause the final 1012 // SYN to use a sequence number that looks like one from the current 1013 // connection in TIME_WAIT and will not be accepted causing the test 1014 // to timeout. 1015 // 1016 // TODO(gvisor.dev/issue/940): S/R portSeed/portHint 1017 DisableSave ds; 1018 1019 sockaddr_storage conn_addr = connector->addr; 1020 ASSERT_NO_ERRNO(SetAddrPort(connector->family(), &conn_addr, port)); 1021 ASSERT_THAT(RetryEINTR(connect)(conn_fd.get(), AsSockAddr(&conn_addr), 1022 connector->addr_len), 1023 SyscallSucceeds()); 1024 1025 // Accept the connection. 1026 auto accepted = 1027 ASSERT_NO_ERRNO_AND_VALUE(Accept(listen_fd.get(), nullptr, nullptr)); 1028 1029 // Get the address/port bound by the connecting socket. 1030 socklen_t conn_addrlen = connector->addr_len; 1031 ASSERT_THAT( 1032 getsockname(conn_fd.get(), AsSockAddr(conn_bound_addr), &conn_addrlen), 1033 SyscallSucceeds()); 1034 1035 FileDescriptor active_closefd, passive_closefd; 1036 if (accept_close) { 1037 active_closefd = std::move(accepted); 1038 passive_closefd = std::move(conn_fd); 1039 } else { 1040 active_closefd = std::move(conn_fd); 1041 passive_closefd = std::move(accepted); 1042 } 1043 1044 // shutdown to trigger TIME_WAIT. 1045 ASSERT_THAT(shutdown(active_closefd.get(), SHUT_WR), SyscallSucceeds()); 1046 { 1047 constexpr int kTimeout = 10000; 1048 pollfd pfd = { 1049 .fd = passive_closefd.get(), 1050 .events = POLLIN, 1051 }; 1052 ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1)); 1053 ASSERT_EQ(pfd.revents, POLLIN); 1054 } 1055 ASSERT_THAT(shutdown(passive_closefd.get(), SHUT_WR), SyscallSucceeds()); 1056 { 1057 constexpr int kTimeout = 10000; 1058 constexpr int16_t want_events = POLLHUP; 1059 pollfd pfd = { 1060 .fd = active_closefd.get(), 1061 .events = want_events, 1062 }; 1063 ASSERT_THAT(poll(&pfd, 1, kTimeout), SyscallSucceedsWithValue(1)); 1064 } 1065 1066 // This sleep is needed to reduce flake to ensure that the passive-close 1067 // ensures the state transitions to CLOSE from LAST_ACK. 1068 absl::SleepFor(absl::Seconds(1)); 1069 } 1070 1071 constexpr char kRangeFile[] = "/proc/sys/net/ipv4/ip_local_port_range"; 1072 1073 PosixErrorOr<int> MaybeLimitEphemeralPorts() { 1074 int min = 0; 1075 int max = 1 << 16; 1076 1077 // Read the ephemeral range from /proc. 1078 ASSIGN_OR_RETURN_ERRNO(std::string rangefile, GetContents(kRangeFile)); 1079 const std::string err_msg = 1080 absl::StrFormat("%s has invalid content: %s", kRangeFile, rangefile); 1081 if (rangefile.back() != '\n') { 1082 return PosixError(EINVAL, err_msg); 1083 } 1084 rangefile.pop_back(); 1085 std::vector<std::string> range = 1086 absl::StrSplit(rangefile, absl::ByAnyChar("\t ")); 1087 if (range.size() < 2 || !absl::SimpleAtoi(range.front(), &min) || 1088 !absl::SimpleAtoi(range.back(), &max)) { 1089 return PosixError(EINVAL, err_msg); 1090 } 1091 1092 // If we can open as writable, limit the range. 1093 if (!access(kRangeFile, W_OK)) { 1094 ASSIGN_OR_RETURN_ERRNO(FileDescriptor fd, 1095 Open(kRangeFile, O_WRONLY | O_TRUNC, 0)); 1096 max = min + 50; 1097 const std::string small_range = absl::StrFormat("%d %d", min, max); 1098 int n = write(fd.get(), small_range.c_str(), small_range.size()); 1099 if (n < 0) { 1100 return PosixError( 1101 errno, 1102 absl::StrFormat("write(%d [%s], \"%s\", %d)", fd.get(), kRangeFile, 1103 small_range.c_str(), small_range.size())); 1104 } 1105 } 1106 return max - min; 1107 } 1108 1109 } // namespace testing 1110 } // namespace gvisor