github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/test/syscalls/linux/raw_socket.cc (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include <linux/capability.h> 16 #include <linux/filter.h> 17 #include <netinet/in.h> 18 #include <netinet/ip.h> 19 #include <netinet/ip6.h> 20 #include <netinet/ip_icmp.h> 21 #include <poll.h> 22 #include <sys/socket.h> 23 #include <sys/types.h> 24 #include <unistd.h> 25 26 #include <algorithm> 27 28 #include "gtest/gtest.h" 29 #include "test/syscalls/linux/socket_test_util.h" 30 #include "test/syscalls/linux/unix_domain_socket_test_util.h" 31 #include "test/util/capability_util.h" 32 #include "test/util/file_descriptor.h" 33 #include "test/util/test_util.h" 34 35 // Note: in order to run these tests, /proc/sys/net/ipv4/ping_group_range will 36 // need to be configured to let the superuser create ping sockets (see icmp(7)). 37 38 namespace gvisor { 39 namespace testing { 40 41 namespace { 42 43 // Fixture for tests parameterized by protocol. 44 class RawSocketTest : public ::testing::TestWithParam<std::tuple<int, int>> { 45 protected: 46 // Creates a socket to be used in tests. 47 void SetUp() override; 48 49 // Closes the socket created by SetUp(). 50 void TearDown() override; 51 52 // Sends buf via s_. 53 void SendBuf(const char* buf, int buf_len); 54 55 // Reads from s_ into recv_buf. 56 void ReceiveBuf(char* recv_buf, size_t recv_buf_len); 57 58 void ReceiveBufFrom(int sock, char* recv_buf, size_t recv_buf_len); 59 60 int Protocol() { return std::get<0>(GetParam()); } 61 62 int Family() { return std::get<1>(GetParam()); } 63 64 socklen_t AddrLen() { 65 if (Family() == AF_INET) { 66 return sizeof(sockaddr_in); 67 } 68 return sizeof(sockaddr_in6); 69 } 70 71 int HdrLen() { 72 if (Family() == AF_INET) { 73 return sizeof(struct iphdr); 74 } 75 // IPv6 raw sockets don't include the header. 76 return 0; 77 } 78 79 // The socket used for both reading and writing. 80 int s_; 81 82 // The loopback address. 83 struct sockaddr_storage addr_; 84 }; 85 86 void RawSocketTest::SetUp() { 87 if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { 88 ASSERT_THAT(socket(Family(), SOCK_RAW, Protocol()), 89 SyscallFailsWithErrno(EPERM)); 90 GTEST_SKIP(); 91 } 92 93 ASSERT_THAT(s_ = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds()); 94 95 addr_ = {}; 96 97 // We don't set ports because raw sockets don't have a notion of ports. 98 if (Family() == AF_INET) { 99 struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr_); 100 sin->sin_family = AF_INET; 101 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 102 } else { 103 struct sockaddr_in6* sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr_); 104 sin6->sin6_family = AF_INET6; 105 sin6->sin6_addr = in6addr_loopback; 106 } 107 } 108 109 void RawSocketTest::TearDown() { 110 // TearDown will be run even if we skip the test. 111 if (ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) { 112 EXPECT_THAT(close(s_), SyscallSucceeds()); 113 } 114 } 115 116 // We should be able to create multiple raw sockets for the same protocol. 117 // BasicRawSocket::Setup creates the first one, so we only have to create one 118 // more here. 119 TEST_P(RawSocketTest, MultipleCreation) { 120 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 121 122 int s2; 123 ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds()); 124 125 ASSERT_THAT(close(s2), SyscallSucceeds()); 126 } 127 128 // Test that shutting down an unconnected socket fails. 129 TEST_P(RawSocketTest, FailShutdownWithoutConnect) { 130 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 131 132 ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); 133 ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); 134 } 135 136 // Shutdown is a no-op for raw sockets (and datagram sockets in general). 137 TEST_P(RawSocketTest, ShutdownWriteNoop) { 138 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 139 140 ASSERT_THAT( 141 connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 142 SyscallSucceeds()); 143 ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallSucceeds()); 144 145 // Arbitrary. 146 constexpr char kBuf[] = "noop"; 147 ASSERT_THAT(RetryEINTR(write)(s_, kBuf, sizeof(kBuf)), 148 SyscallSucceedsWithValue(sizeof(kBuf))); 149 } 150 151 // Shutdown is a no-op for raw sockets (and datagram sockets in general). 152 TEST_P(RawSocketTest, ShutdownReadNoop) { 153 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 154 155 ASSERT_THAT( 156 connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 157 SyscallSucceeds()); 158 ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); 159 160 // Arbitrary. 161 constexpr char kBuf[] = "gdg"; 162 ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); 163 164 std::vector<char> c(sizeof(kBuf) + HdrLen()); 165 ASSERT_THAT(read(s_, c.data(), c.size()), SyscallSucceedsWithValue(c.size())); 166 } 167 168 // Test that listen() fails. 169 TEST_P(RawSocketTest, FailListen) { 170 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 171 172 ASSERT_THAT(listen(s_, 1), SyscallFailsWithErrno(ENOTSUP)); 173 } 174 175 // Test that accept() fails. 176 TEST_P(RawSocketTest, FailAccept) { 177 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 178 179 struct sockaddr saddr; 180 socklen_t addrlen; 181 ASSERT_THAT(accept(s_, &saddr, &addrlen), SyscallFailsWithErrno(ENOTSUP)); 182 } 183 184 // Test that getpeername() returns nothing before connect(). 185 TEST_P(RawSocketTest, FailGetPeerNameBeforeConnect) { 186 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 187 188 struct sockaddr saddr; 189 socklen_t addrlen = sizeof(saddr); 190 ASSERT_THAT(getpeername(s_, &saddr, &addrlen), 191 SyscallFailsWithErrno(ENOTCONN)); 192 } 193 194 // Test that getpeername() returns something after connect(). 195 TEST_P(RawSocketTest, GetPeerName) { 196 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 197 198 ASSERT_THAT( 199 connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 200 SyscallSucceeds()); 201 struct sockaddr saddr; 202 socklen_t addrlen = sizeof(saddr); 203 ASSERT_THAT(getpeername(s_, &saddr, &addrlen), 204 SyscallFailsWithErrno(ENOTCONN)); 205 ASSERT_GT(addrlen, 0); 206 } 207 208 // Test that the socket is writable immediately. 209 TEST_P(RawSocketTest, PollWritableImmediately) { 210 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 211 212 struct pollfd pfd = {}; 213 pfd.fd = s_; 214 pfd.events = POLLOUT; 215 ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 10000), SyscallSucceedsWithValue(1)); 216 } 217 218 // Test that the socket isn't readable before receiving anything. 219 TEST_P(RawSocketTest, PollNotReadableInitially) { 220 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 221 222 // Try to receive data with MSG_DONTWAIT, which returns immediately if there's 223 // nothing to be read. 224 char buf[117]; 225 ASSERT_THAT(RetryEINTR(recv)(s_, buf, sizeof(buf), MSG_DONTWAIT), 226 SyscallFailsWithErrno(EAGAIN)); 227 } 228 229 // Test that the socket becomes readable once something is written to it. 230 TEST_P(RawSocketTest, PollTriggeredOnWrite) { 231 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 232 233 // Write something so that there's data to be read. 234 // Arbitrary. 235 constexpr char kBuf[] = "JP5"; 236 ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); 237 238 struct pollfd pfd = {}; 239 pfd.fd = s_; 240 pfd.events = POLLIN; 241 ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 10000), SyscallSucceedsWithValue(1)); 242 } 243 244 // Test that we can connect() to a valid IP (loopback). 245 TEST_P(RawSocketTest, ConnectToLoopback) { 246 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 247 248 ASSERT_THAT( 249 connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 250 SyscallSucceeds()); 251 } 252 253 // Test that calling send() without connect() fails. 254 TEST_P(RawSocketTest, SendWithoutConnectFails) { 255 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 256 257 // Arbitrary. 258 constexpr char kBuf[] = "Endgame was good"; 259 ASSERT_THAT(send(s_, kBuf, sizeof(kBuf), 0), 260 SyscallFailsWithErrno(EDESTADDRREQ)); 261 } 262 263 // Wildcard Bind. 264 TEST_P(RawSocketTest, BindToWildcard) { 265 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 266 struct sockaddr_storage addr; 267 addr = {}; 268 269 // We don't set ports because raw sockets don't have a notion of ports. 270 if (Family() == AF_INET) { 271 struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr); 272 sin->sin_family = AF_INET; 273 sin->sin_addr.s_addr = htonl(INADDR_ANY); 274 } else { 275 struct sockaddr_in6* sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr); 276 sin6->sin6_family = AF_INET6; 277 sin6->sin6_addr = in6addr_any; 278 } 279 280 ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 281 SyscallSucceeds()); 282 } 283 284 // Bind to localhost. 285 TEST_P(RawSocketTest, BindToLocalhost) { 286 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 287 288 ASSERT_THAT( 289 bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 290 SyscallSucceeds()); 291 } 292 293 // Bind to a different address. 294 TEST_P(RawSocketTest, BindToInvalid) { 295 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 296 297 struct sockaddr_storage bind_addr = addr_; 298 if (Family() == AF_INET) { 299 struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&bind_addr); 300 sin->sin_addr = {1}; // 1.0.0.0 - An address that we can't bind to. 301 } else { 302 struct sockaddr_in6* sin6 = 303 reinterpret_cast<struct sockaddr_in6*>(&bind_addr); 304 memset(&sin6->sin6_addr.s6_addr, 0, sizeof(sin6->sin6_addr.s6_addr)); 305 sin6->sin6_addr.s6_addr[0] = 1; // 1: - An address that we can't bind to. 306 } 307 ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&bind_addr), 308 AddrLen()), SyscallFailsWithErrno(EADDRNOTAVAIL)); 309 } 310 311 // Send and receive an packet. 312 TEST_P(RawSocketTest, SendAndReceive) { 313 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 314 315 // Arbitrary. 316 constexpr char kBuf[] = "TB12"; 317 ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); 318 319 // Receive the packet and make sure it's identical. 320 std::vector<char> recv_buf(sizeof(kBuf) + HdrLen()); 321 ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); 322 EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0); 323 } 324 325 // We should be able to create multiple raw sockets for the same protocol and 326 // receive the same packet on both. 327 TEST_P(RawSocketTest, MultipleSocketReceive) { 328 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 329 330 int s2; 331 ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds()); 332 333 // Arbitrary. 334 constexpr char kBuf[] = "TB10"; 335 ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); 336 337 // Receive it on socket 1. 338 std::vector<char> recv_buf1(sizeof(kBuf) + HdrLen()); 339 ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf1.data(), recv_buf1.size())); 340 341 // Receive it on socket 2. 342 std::vector<char> recv_buf2(sizeof(kBuf) + HdrLen()); 343 ASSERT_NO_FATAL_FAILURE(ReceiveBufFrom(s2, recv_buf2.data(), 344 recv_buf2.size())); 345 346 EXPECT_EQ(memcmp(recv_buf1.data() + HdrLen(), 347 recv_buf2.data() + HdrLen(), sizeof(kBuf)), 348 0); 349 350 ASSERT_THAT(close(s2), SyscallSucceeds()); 351 } 352 353 // Test that connect sends packets to the right place. 354 TEST_P(RawSocketTest, SendAndReceiveViaConnect) { 355 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 356 357 ASSERT_THAT( 358 connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 359 SyscallSucceeds()); 360 361 // Arbitrary. 362 constexpr char kBuf[] = "JH4"; 363 ASSERT_THAT(send(s_, kBuf, sizeof(kBuf), 0), 364 SyscallSucceedsWithValue(sizeof(kBuf))); 365 366 // Receive the packet and make sure it's identical. 367 std::vector<char> recv_buf(sizeof(kBuf) + HdrLen()); 368 ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); 369 EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0); 370 } 371 372 // Bind to localhost, then send and receive packets. 373 TEST_P(RawSocketTest, BindSendAndReceive) { 374 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 375 376 ASSERT_THAT( 377 bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 378 SyscallSucceeds()); 379 380 // Arbitrary. 381 constexpr char kBuf[] = "DR16"; 382 ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); 383 384 // Receive the packet and make sure it's identical. 385 std::vector<char> recv_buf(sizeof(kBuf) + HdrLen()); 386 ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); 387 EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0); 388 } 389 390 // Bind and connect to localhost and send/receive packets. 391 TEST_P(RawSocketTest, BindConnectSendAndReceive) { 392 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 393 394 ASSERT_THAT( 395 bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 396 SyscallSucceeds()); 397 ASSERT_THAT( 398 connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 399 SyscallSucceeds()); 400 401 // Arbitrary. 402 constexpr char kBuf[] = "DG88"; 403 ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); 404 405 // Receive the packet and make sure it's identical. 406 std::vector<char> recv_buf(sizeof(kBuf) + HdrLen()); 407 ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); 408 EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0); 409 } 410 411 // Check that setting SO_RCVBUF below min is clamped to the minimum 412 // receive buffer size. 413 TEST_P(RawSocketTest, SetSocketRecvBufBelowMin) { 414 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 415 416 // Discover minimum receive buf size by trying to set it to zero. 417 // See: 418 // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820 419 constexpr int kRcvBufSz = 0; 420 ASSERT_THAT( 421 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), 422 SyscallSucceeds()); 423 424 int min = 0; 425 socklen_t min_len = sizeof(min); 426 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), 427 SyscallSucceeds()); 428 429 // Linux doubles the value so let's use a value that when doubled will still 430 // be smaller than min. 431 int below_min = min / 2 - 1; 432 ASSERT_THAT( 433 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &below_min, sizeof(below_min)), 434 SyscallSucceeds()); 435 436 int val = 0; 437 socklen_t val_len = sizeof(val); 438 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), 439 SyscallSucceeds()); 440 441 ASSERT_EQ(min, val); 442 } 443 444 // Check that setting SO_RCVBUF above max is clamped to the maximum 445 // receive buffer size. 446 TEST_P(RawSocketTest, SetSocketRecvBufAboveMax) { 447 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 448 449 // Discover max buf size by trying to set the largest possible buffer size. 450 constexpr int kRcvBufSz = 0xffffffff; 451 ASSERT_THAT( 452 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), 453 SyscallSucceeds()); 454 455 int max = 0; 456 socklen_t max_len = sizeof(max); 457 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len), 458 SyscallSucceeds()); 459 460 int above_max = max + 1; 461 ASSERT_THAT( 462 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &above_max, sizeof(above_max)), 463 SyscallSucceeds()); 464 465 int val = 0; 466 socklen_t val_len = sizeof(val); 467 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), 468 SyscallSucceeds()); 469 ASSERT_EQ(max, val); 470 } 471 472 // Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored. 473 TEST_P(RawSocketTest, SetSocketRecvBuf) { 474 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 475 476 int max = 0; 477 int min = 0; 478 { 479 // Discover max buf size by trying to set a really large buffer size. 480 constexpr int kRcvBufSz = 0xffffffff; 481 ASSERT_THAT( 482 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), 483 SyscallSucceeds()); 484 485 max = 0; 486 socklen_t max_len = sizeof(max); 487 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len), 488 SyscallSucceeds()); 489 } 490 491 { 492 // Discover minimum buffer size by trying to set a zero size receive buffer 493 // size. 494 // See: 495 // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820 496 constexpr int kRcvBufSz = 0; 497 ASSERT_THAT( 498 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), 499 SyscallSucceeds()); 500 501 socklen_t min_len = sizeof(min); 502 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), 503 SyscallSucceeds()); 504 } 505 506 int quarter_sz = min + (max - min) / 4; 507 ASSERT_THAT( 508 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &quarter_sz, sizeof(quarter_sz)), 509 SyscallSucceeds()); 510 511 int val = 0; 512 socklen_t val_len = sizeof(val); 513 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), 514 SyscallSucceeds()); 515 516 // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. 517 quarter_sz *= 2; 518 ASSERT_EQ(quarter_sz, val); 519 } 520 521 // Check that setting SO_SNDBUF below min is clamped to the minimum 522 // receive buffer size. 523 TEST_P(RawSocketTest, SetSocketSendBufBelowMin) { 524 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 525 526 // Discover minimum buffer size by trying to set it to zero. 527 constexpr int kSndBufSz = 0; 528 ASSERT_THAT( 529 setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), 530 SyscallSucceeds()); 531 532 int min = 0; 533 socklen_t min_len = sizeof(min); 534 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len), 535 SyscallSucceeds()); 536 537 // Linux doubles the value so let's use a value that when doubled will still 538 // be smaller than min. 539 int below_min = min / 2 - 1; 540 ASSERT_THAT( 541 setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &below_min, sizeof(below_min)), 542 SyscallSucceeds()); 543 544 int val = 0; 545 socklen_t val_len = sizeof(val); 546 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), 547 SyscallSucceeds()); 548 549 ASSERT_EQ(min, val); 550 } 551 552 // Check that setting SO_SNDBUF above max is clamped to the maximum 553 // send buffer size. 554 TEST_P(RawSocketTest, SetSocketSendBufAboveMax) { 555 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 556 557 // Discover maximum buffer size by trying to set it to a large value. 558 constexpr int kSndBufSz = 0xffffffff; 559 ASSERT_THAT( 560 setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), 561 SyscallSucceeds()); 562 563 int max = 0; 564 socklen_t max_len = sizeof(max); 565 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len), 566 SyscallSucceeds()); 567 568 int above_max = max + 1; 569 ASSERT_THAT( 570 setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &above_max, sizeof(above_max)), 571 SyscallSucceeds()); 572 573 int val = 0; 574 socklen_t val_len = sizeof(val); 575 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), 576 SyscallSucceeds()); 577 ASSERT_EQ(max, val); 578 } 579 580 // Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored. 581 TEST_P(RawSocketTest, SetSocketSendBuf) { 582 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 583 584 int max = 0; 585 int min = 0; 586 { 587 // Discover maximum buffer size by trying to set it to a large value. 588 constexpr int kSndBufSz = 0xffffffff; 589 ASSERT_THAT( 590 setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), 591 SyscallSucceeds()); 592 593 max = 0; 594 socklen_t max_len = sizeof(max); 595 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len), 596 SyscallSucceeds()); 597 } 598 599 { 600 // Discover minimum buffer size by trying to set it to zero. 601 constexpr int kSndBufSz = 0; 602 ASSERT_THAT( 603 setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), 604 SyscallSucceeds()); 605 606 socklen_t min_len = sizeof(min); 607 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len), 608 SyscallSucceeds()); 609 } 610 611 int quarter_sz = min + (max - min) / 4; 612 ASSERT_THAT( 613 setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &quarter_sz, sizeof(quarter_sz)), 614 SyscallSucceeds()); 615 616 int val = 0; 617 socklen_t val_len = sizeof(val); 618 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), 619 SyscallSucceeds()); 620 621 quarter_sz *= 2; 622 ASSERT_EQ(quarter_sz, val); 623 } 624 625 // Test that receive buffer limits are not enforced when the recv buffer is 626 // empty. 627 TEST_P(RawSocketTest, RecvBufLimitsEmptyRecvBuffer) { 628 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 629 630 ASSERT_THAT( 631 bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 632 SyscallSucceeds()); 633 ASSERT_THAT( 634 connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 635 SyscallSucceeds()); 636 637 int min = 0; 638 { 639 // Discover minimum buffer size by trying to set it to zero. 640 constexpr int kRcvBufSz = 0; 641 ASSERT_THAT( 642 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), 643 SyscallSucceeds()); 644 645 socklen_t min_len = sizeof(min); 646 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), 647 SyscallSucceeds()); 648 } 649 650 { 651 // Send data of size min and verify that it's received. 652 std::vector<char> buf(min); 653 RandomizeBuffer(buf.data(), buf.size()); 654 ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); 655 656 // Receive the packet and make sure it's identical. 657 std::vector<char> recv_buf(buf.size() + HdrLen()); 658 ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); 659 EXPECT_EQ( 660 memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), 661 0); 662 } 663 664 { 665 // Send data of size min + 1 and verify that its received. Both linux and 666 // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer 667 // is currently empty. 668 std::vector<char> buf(min + 1); 669 RandomizeBuffer(buf.data(), buf.size()); 670 ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); 671 // Receive the packet and make sure it's identical. 672 std::vector<char> recv_buf(buf.size() + HdrLen()); 673 ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); 674 EXPECT_EQ( 675 memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), 676 0); 677 } 678 } 679 680 TEST_P(RawSocketTest, RecvBufLimits) { 681 // TCP stack generates RSTs for unknown endpoints and it complicates the test 682 // as we have to deal with the RST packets as well. For testing the raw socket 683 // endpoints buffer limit enforcement we can just test for UDP. 684 // 685 // We don't use SKIP_IF here because root_test_runner explicitly fails if a 686 // test is skipped. 687 if (Protocol() == IPPROTO_TCP) { 688 return; 689 } 690 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 691 692 ASSERT_THAT( 693 bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 694 SyscallSucceeds()); 695 ASSERT_THAT( 696 connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 697 SyscallSucceeds()); 698 699 int min = 0; 700 { 701 // Discover minimum buffer size by trying to set it to zero. 702 constexpr int kRcvBufSz = 0; 703 ASSERT_THAT( 704 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), 705 SyscallSucceeds()); 706 707 socklen_t min_len = sizeof(min); 708 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), 709 SyscallSucceeds()); 710 } 711 712 // Now set the limit to min * 2. 713 int new_rcv_buf_sz = min * 2; 714 ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz, 715 sizeof(new_rcv_buf_sz)), 716 SyscallSucceeds()); 717 int rcv_buf_sz = 0; 718 { 719 socklen_t rcv_buf_len = sizeof(rcv_buf_sz); 720 ASSERT_THAT( 721 getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, &rcv_buf_len), 722 SyscallSucceeds()); 723 } 724 725 // Set a receive timeout so that we don't block forever on reads if the test 726 // fails. 727 struct timeval tv { 728 .tv_sec = 1, .tv_usec = 0, 729 }; 730 ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), 731 SyscallSucceeds()); 732 733 { 734 std::vector<char> buf(min); 735 RandomizeBuffer(buf.data(), buf.size()); 736 737 ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); 738 ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); 739 ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); 740 ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); 741 int sent = 4; 742 if (IsRunningOnGvisor()) { 743 // Linux seems to drop the 4th packet even though technically it should 744 // fit in the receive buffer. 745 ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); 746 sent++; 747 } 748 749 // Verify that the expected number of packets are available to be read. 750 for (int i = 0; i < sent - 1; i++) { 751 // Receive the packet and make sure it's identical. 752 std::vector<char> recv_buf(buf.size() + HdrLen()); 753 ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); 754 EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(), 755 buf.size()), 756 0); 757 } 758 759 // Assert that the last packet is dropped because the receive buffer should 760 // be full after the first four packets. 761 std::vector<char> recv_buf(buf.size() + HdrLen()); 762 struct iovec iov = {}; 763 iov.iov_base = static_cast<void*>(const_cast<char*>(recv_buf.data())); 764 iov.iov_len = buf.size(); 765 struct msghdr msg = {}; 766 msg.msg_iov = &iov; 767 msg.msg_iovlen = 1; 768 msg.msg_control = NULL; 769 msg.msg_controllen = 0; 770 msg.msg_flags = 0; 771 ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, MSG_DONTWAIT), 772 SyscallFailsWithErrno(EAGAIN)); 773 } 774 } 775 776 void RawSocketTest::SendBuf(const char* buf, int buf_len) { 777 // It's safe to use const_cast here because sendmsg won't modify the iovec or 778 // address. 779 struct iovec iov = {}; 780 iov.iov_base = static_cast<void*>(const_cast<char*>(buf)); 781 iov.iov_len = static_cast<size_t>(buf_len); 782 struct msghdr msg = {}; 783 msg.msg_name = static_cast<void*>(&addr_); 784 msg.msg_namelen = AddrLen(); 785 msg.msg_iov = &iov; 786 msg.msg_iovlen = 1; 787 msg.msg_control = NULL; 788 msg.msg_controllen = 0; 789 msg.msg_flags = 0; 790 ASSERT_THAT(sendmsg(s_, &msg, 0), SyscallSucceedsWithValue(buf_len)); 791 } 792 793 void RawSocketTest::ReceiveBuf(char* recv_buf, size_t recv_buf_len) { 794 ASSERT_NO_FATAL_FAILURE(ReceiveBufFrom(s_, recv_buf, recv_buf_len)); 795 } 796 797 void RawSocketTest::ReceiveBufFrom(int sock, char* recv_buf, 798 size_t recv_buf_len) { 799 ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sock, recv_buf, recv_buf_len)); 800 } 801 802 TEST_P(RawSocketTest, SetSocketDetachFilterNoInstalledFilter) { 803 // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER. 804 if (IsRunningOnGvisor()) { 805 constexpr int val = 0; 806 ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)), 807 SyscallSucceeds()); 808 return; 809 } 810 811 constexpr int val = 0; 812 ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)), 813 SyscallFailsWithErrno(ENOENT)); 814 } 815 816 TEST_P(RawSocketTest, GetSocketDetachFilter) { 817 int val = 0; 818 socklen_t val_len = sizeof(val); 819 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, &val_len), 820 SyscallFailsWithErrno(ENOPROTOOPT)); 821 } 822 823 // AF_INET6+SOCK_RAW+IPPROTO_RAW sockets can be created, but not written to. 824 TEST(RawSocketTest, IPv6ProtoRaw) { 825 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 826 827 int sock; 828 ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW), 829 SyscallSucceeds()); 830 831 // Verify that writing yields EINVAL. 832 char buf[] = "This is such a weird little edge case"; 833 struct sockaddr_in6 sin6 = {}; 834 sin6.sin6_family = AF_INET6; 835 sin6.sin6_addr = in6addr_loopback; 836 ASSERT_THAT(sendto(sock, buf, sizeof(buf), 0 /* flags */, 837 reinterpret_cast<struct sockaddr*>(&sin6), sizeof(sin6)), 838 SyscallFailsWithErrno(EINVAL)); 839 } 840 841 TEST(RawSocketTest, IPv6SendMsg) { 842 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 843 844 int sock; 845 ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_TCP), 846 SyscallSucceeds()); 847 848 char kBuf[] = "hello"; 849 struct iovec iov = {}; 850 iov.iov_base = static_cast<void*>(const_cast<char*>(kBuf)); 851 iov.iov_len = static_cast<size_t>(sizeof(kBuf)); 852 853 struct sockaddr_storage addr = {}; 854 struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr); 855 sin->sin_family = AF_INET; 856 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 857 858 struct msghdr msg = {}; 859 msg.msg_name = static_cast<void*>(&addr); 860 msg.msg_namelen = sizeof(sockaddr_in); 861 msg.msg_iov = &iov; 862 msg.msg_iovlen = 1; 863 msg.msg_control = NULL; 864 msg.msg_controllen = 0; 865 msg.msg_flags = 0; 866 ASSERT_THAT(sendmsg(sock, &msg, 0), SyscallFailsWithErrno(EINVAL)); 867 } 868 869 TEST_P(RawSocketTest, ConnectOnIPv6Socket) { 870 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))); 871 872 int sock; 873 ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_TCP), 874 SyscallSucceeds()); 875 876 struct sockaddr_storage addr = {}; 877 struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr); 878 sin->sin_family = AF_INET; 879 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 880 881 ASSERT_THAT(connect(sock, reinterpret_cast<struct sockaddr*>(&addr), 882 sizeof(sockaddr_in6)), 883 SyscallFailsWithErrno(EAFNOSUPPORT)); 884 } 885 886 INSTANTIATE_TEST_SUITE_P( 887 AllInetTests, RawSocketTest, 888 ::testing::Combine(::testing::Values(IPPROTO_TCP, IPPROTO_UDP), 889 ::testing::Values(AF_INET, AF_INET6))); 890 891 } // namespace 892 893 } // namespace testing 894 } // namespace gvisor