gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/test/syscalls/linux/raw_socket.cc (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include <arpa/inet.h> 16 #include <net/if.h> 17 #include <netinet/in.h> 18 #include <netinet/ip.h> 19 #include <netinet/ip6.h> 20 #include <netinet/ip_icmp.h> 21 #include <poll.h> 22 #include <sys/socket.h> 23 #include <sys/types.h> 24 #include <unistd.h> 25 26 #include "gmock/gmock.h" 27 #include "gtest/gtest.h" 28 #include "test/syscalls/linux/ip_socket_test_util.h" 29 #include "test/syscalls/linux/unix_domain_socket_test_util.h" 30 #include "test/util/capability_util.h" 31 #include "test/util/file_descriptor.h" 32 #include "test/util/socket_util.h" 33 #include "test/util/test_util.h" 34 35 // Note: in order to run these tests, /proc/sys/net/ipv4/ping_group_range will 36 // need to be configured to let the superuser create ping sockets (see icmp(7)). 37 38 namespace gvisor { 39 namespace testing { 40 41 namespace { 42 43 #define TCPHDR_RST 0x4 44 #define TCPHDR_FLAGS_OFF 13 45 46 using ::testing::AnyOf; 47 48 // Fixture for tests parameterized by protocol. 49 class RawSocketTest : public ::testing::TestWithParam<std::tuple<int, int>> { 50 protected: 51 // Creates a socket to be used in tests. 52 void SetUp() override; 53 54 // Closes the socket created by SetUp(). 55 void TearDown() override; 56 57 // Sends buf via s_. 58 void SendBuf(const char* buf, int buf_len); 59 60 // Reads from s_ into recv_buf. 61 void ReceiveBuf(char* recv_buf, size_t recv_buf_len); 62 63 void ReceiveBufFrom(int sock, char* recv_buf, size_t recv_buf_len); 64 65 int Protocol() { return std::get<0>(GetParam()); } 66 67 int Family() { return std::get<1>(GetParam()); } 68 69 socklen_t AddrLen() { 70 if (Family() == AF_INET) { 71 return sizeof(sockaddr_in); 72 } 73 return sizeof(sockaddr_in6); 74 } 75 76 int HdrLen() { 77 if (Family() == AF_INET) { 78 return sizeof(struct iphdr); 79 } 80 // IPv6 raw sockets don't include the header. 81 return 0; 82 } 83 84 uint16_t Port(struct sockaddr* s) { 85 if (Family() == AF_INET) { 86 return ntohs(reinterpret_cast<struct sockaddr_in*>(s)->sin_port); 87 } 88 return ntohs(reinterpret_cast<struct sockaddr_in6*>(s)->sin6_port); 89 } 90 91 void* Addr(struct sockaddr* s) { 92 if (Family() == AF_INET) { 93 return &(reinterpret_cast<struct sockaddr_in*>(s)->sin_addr); 94 } 95 return &(reinterpret_cast<struct sockaddr_in6*>(s)->sin6_addr); 96 } 97 98 // The socket used for both reading and writing. 99 int s_; 100 101 // The loopback address. 102 struct sockaddr_storage addr_; 103 }; 104 105 void RawSocketTest::SetUp() { 106 if (!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())) { 107 ASSERT_THAT(socket(Family(), SOCK_RAW, Protocol()), 108 SyscallFailsWithErrno(EPERM)); 109 GTEST_SKIP(); 110 } 111 112 ASSERT_THAT(s_ = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds()); 113 114 addr_ = {}; 115 116 // We don't set ports because raw sockets don't have a notion of ports. 117 if (Family() == AF_INET) { 118 struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr_); 119 sin->sin_family = AF_INET; 120 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 121 } else { 122 struct sockaddr_in6* sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr_); 123 sin6->sin6_family = AF_INET6; 124 sin6->sin6_addr = in6addr_loopback; 125 } 126 } 127 128 void RawSocketTest::TearDown() { 129 // TearDown will be run even if we skip the test. 130 if (ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())) { 131 EXPECT_THAT(close(s_), SyscallSucceeds()); 132 } 133 } 134 135 // We should be able to create multiple raw sockets for the same protocol. 136 // BasicRawSocket::Setup creates the first one, so we only have to create one 137 // more here. 138 TEST_P(RawSocketTest, MultipleCreation) { 139 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 140 141 int s2; 142 ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds()); 143 144 ASSERT_THAT(close(s2), SyscallSucceeds()); 145 } 146 147 // Test that shutting down an unconnected socket fails. 148 TEST_P(RawSocketTest, FailShutdownWithoutConnect) { 149 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 150 151 ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallFailsWithErrno(ENOTCONN)); 152 ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallFailsWithErrno(ENOTCONN)); 153 } 154 155 // Shutdown is a no-op for raw sockets (and datagram sockets in general). 156 TEST_P(RawSocketTest, ShutdownWriteNoop) { 157 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 158 159 ASSERT_THAT( 160 connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 161 SyscallSucceeds()); 162 ASSERT_THAT(shutdown(s_, SHUT_WR), SyscallSucceeds()); 163 164 // Arbitrary. 165 constexpr char kBuf[] = "noop"; 166 ASSERT_THAT(RetryEINTR(write)(s_, kBuf, sizeof(kBuf)), 167 SyscallSucceedsWithValue(sizeof(kBuf))); 168 } 169 170 // Shutdown is a no-op for raw sockets (and datagram sockets in general). 171 TEST_P(RawSocketTest, ShutdownReadNoop) { 172 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 173 174 ASSERT_THAT( 175 connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 176 SyscallSucceeds()); 177 ASSERT_THAT(shutdown(s_, SHUT_RD), SyscallSucceeds()); 178 179 // Arbitrary. 180 constexpr char kBuf[] = "gdg"; 181 ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); 182 183 std::vector<char> c(sizeof(kBuf) + HdrLen()); 184 ASSERT_THAT(read(s_, c.data(), c.size()), SyscallSucceedsWithValue(c.size())); 185 } 186 187 // Test that listen() fails. 188 TEST_P(RawSocketTest, FailListen) { 189 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 190 191 ASSERT_THAT(listen(s_, 1), SyscallFailsWithErrno(ENOTSUP)); 192 } 193 194 // Test that accept() fails. 195 TEST_P(RawSocketTest, FailAccept) { 196 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 197 198 struct sockaddr saddr; 199 socklen_t addrlen; 200 ASSERT_THAT(accept(s_, &saddr, &addrlen), SyscallFailsWithErrno(ENOTSUP)); 201 } 202 203 TEST_P(RawSocketTest, BindThenGetSockName) { 204 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 205 206 struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_); 207 ASSERT_THAT(bind(s_, addr, AddrLen()), SyscallSucceeds()); 208 struct sockaddr_storage saddr_storage; 209 struct sockaddr* saddr = reinterpret_cast<struct sockaddr*>(&saddr_storage); 210 socklen_t saddrlen = AddrLen(); 211 ASSERT_THAT(getsockname(s_, saddr, &saddrlen), SyscallSucceeds()); 212 ASSERT_EQ(saddrlen, AddrLen()); 213 214 // The port is expected to hold the protocol number. 215 EXPECT_EQ(Port(saddr), Protocol()); 216 217 char addrbuf[INET6_ADDRSTRLEN], saddrbuf[INET6_ADDRSTRLEN]; 218 const char* addrstr = 219 inet_ntop(addr->sa_family, Addr(addr), addrbuf, sizeof(addrbuf)); 220 ASSERT_NE(addrstr, nullptr); 221 const char* saddrstr = 222 inet_ntop(saddr->sa_family, Addr(saddr), saddrbuf, sizeof(saddrbuf)); 223 ASSERT_NE(saddrstr, nullptr); 224 EXPECT_STREQ(saddrstr, addrstr); 225 } 226 227 TEST_P(RawSocketTest, ConnectThenGetSockName) { 228 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 229 230 struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_); 231 ASSERT_THAT(connect(s_, addr, AddrLen()), SyscallSucceeds()); 232 struct sockaddr_storage saddr_storage; 233 struct sockaddr* saddr = reinterpret_cast<struct sockaddr*>(&saddr_storage); 234 socklen_t saddrlen = AddrLen(); 235 ASSERT_THAT(getsockname(s_, saddr, &saddrlen), SyscallSucceeds()); 236 ASSERT_EQ(saddrlen, AddrLen()); 237 238 // The port is expected to hold the protocol number. 239 EXPECT_EQ(Port(saddr), Protocol()); 240 241 char addrbuf[INET6_ADDRSTRLEN], saddrbuf[INET6_ADDRSTRLEN]; 242 const char* addrstr = 243 inet_ntop(addr->sa_family, Addr(addr), addrbuf, sizeof(addrbuf)); 244 ASSERT_NE(addrstr, nullptr); 245 const char* saddrstr = 246 inet_ntop(saddr->sa_family, Addr(saddr), saddrbuf, sizeof(saddrbuf)); 247 ASSERT_NE(saddrstr, nullptr); 248 EXPECT_STREQ(saddrstr, addrstr); 249 } 250 251 // Test that getpeername() returns nothing before connect(). 252 TEST_P(RawSocketTest, FailGetPeerNameBeforeConnect) { 253 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 254 255 struct sockaddr saddr; 256 socklen_t addrlen = sizeof(saddr); 257 ASSERT_THAT(getpeername(s_, &saddr, &addrlen), 258 SyscallFailsWithErrno(ENOTCONN)); 259 } 260 261 // Test that getpeername() returns something after connect(). 262 TEST_P(RawSocketTest, GetPeerName) { 263 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 264 265 ASSERT_THAT( 266 connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 267 SyscallSucceeds()); 268 struct sockaddr saddr; 269 socklen_t addrlen = sizeof(saddr); 270 ASSERT_THAT(getpeername(s_, &saddr, &addrlen), 271 SyscallFailsWithErrno(ENOTCONN)); 272 ASSERT_GT(addrlen, 0); 273 } 274 275 // Test that the socket is writable immediately. 276 TEST_P(RawSocketTest, PollWritableImmediately) { 277 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 278 279 struct pollfd pfd = {}; 280 pfd.fd = s_; 281 pfd.events = POLLOUT; 282 ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 10000), SyscallSucceedsWithValue(1)); 283 } 284 285 // Test that the socket isn't readable before receiving anything. 286 TEST_P(RawSocketTest, PollNotReadableInitially) { 287 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 288 289 // Try to receive data with MSG_DONTWAIT, which returns immediately if there's 290 // nothing to be read. 291 char buf[117]; 292 ASSERT_THAT(RetryEINTR(recv)(s_, buf, sizeof(buf), MSG_DONTWAIT), 293 SyscallFailsWithErrno(EAGAIN)); 294 } 295 296 // Test that the socket becomes readable once something is written to it. 297 TEST_P(RawSocketTest, PollTriggeredOnWrite) { 298 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 299 300 // Write something so that there's data to be read. 301 // Arbitrary. 302 constexpr char kBuf[] = "JP5"; 303 ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); 304 305 struct pollfd pfd = {}; 306 pfd.fd = s_; 307 pfd.events = POLLIN; 308 ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, 10000), SyscallSucceedsWithValue(1)); 309 } 310 311 // Test that we can connect() to a valid IP (loopback). 312 TEST_P(RawSocketTest, ConnectToLoopback) { 313 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 314 315 ASSERT_THAT( 316 connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 317 SyscallSucceeds()); 318 } 319 320 // Test that calling send() without connect() fails. 321 TEST_P(RawSocketTest, SendWithoutConnectFails) { 322 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 323 324 // Arbitrary. 325 constexpr char kBuf[] = "Endgame was good"; 326 ASSERT_THAT(send(s_, kBuf, sizeof(kBuf), 0), 327 SyscallFailsWithErrno(EDESTADDRREQ)); 328 } 329 330 // Wildcard Bind. 331 TEST_P(RawSocketTest, BindToWildcard) { 332 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 333 struct sockaddr_storage addr; 334 addr = {}; 335 336 // We don't set ports because raw sockets don't have a notion of ports. 337 if (Family() == AF_INET) { 338 struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr); 339 sin->sin_family = AF_INET; 340 sin->sin_addr.s_addr = htonl(INADDR_ANY); 341 } else { 342 struct sockaddr_in6* sin6 = reinterpret_cast<struct sockaddr_in6*>(&addr); 343 sin6->sin6_family = AF_INET6; 344 sin6->sin6_addr = in6addr_any; 345 } 346 347 ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 348 SyscallSucceeds()); 349 } 350 351 // Bind to localhost. 352 TEST_P(RawSocketTest, BindToLocalhost) { 353 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 354 355 ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 356 SyscallSucceeds()); 357 } 358 359 // Bind to a different address. 360 TEST_P(RawSocketTest, BindToInvalid) { 361 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 362 363 struct sockaddr_storage bind_addr = addr_; 364 if (Family() == AF_INET) { 365 struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&bind_addr); 366 sin->sin_addr = {1}; // 1.0.0.0 - An address that we can't bind to. 367 } else { 368 struct sockaddr_in6* sin6 = 369 reinterpret_cast<struct sockaddr_in6*>(&bind_addr); 370 memset(&sin6->sin6_addr.s6_addr, 0, sizeof(sin6->sin6_addr.s6_addr)); 371 sin6->sin6_addr.s6_addr[0] = 1; // 1: - An address that we can't bind to. 372 } 373 ASSERT_THAT( 374 bind(s_, reinterpret_cast<struct sockaddr*>(&bind_addr), AddrLen()), 375 SyscallFailsWithErrno(EADDRNOTAVAIL)); 376 } 377 378 // Send and receive an packet. 379 TEST_P(RawSocketTest, SendAndReceive) { 380 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 381 382 // Arbitrary. 383 constexpr char kBuf[] = "TB12"; 384 ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); 385 386 // Receive the packet and make sure it's identical. 387 std::vector<char> recv_buf(sizeof(kBuf) + HdrLen()); 388 ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); 389 EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0); 390 } 391 392 // We should be able to create multiple raw sockets for the same protocol and 393 // receive the same packet on both. 394 TEST_P(RawSocketTest, MultipleSocketReceive) { 395 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 396 397 int s2; 398 ASSERT_THAT(s2 = socket(Family(), SOCK_RAW, Protocol()), SyscallSucceeds()); 399 400 // Arbitrary. 401 constexpr char kBuf[] = "TB10"; 402 ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); 403 404 // Receive it on socket 1. 405 std::vector<char> recv_buf1(sizeof(kBuf) + HdrLen()); 406 ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf1.data(), recv_buf1.size())); 407 408 // Receive it on socket 2. 409 std::vector<char> recv_buf2(sizeof(kBuf) + HdrLen()); 410 ASSERT_NO_FATAL_FAILURE( 411 ReceiveBufFrom(s2, recv_buf2.data(), recv_buf2.size())); 412 413 EXPECT_EQ(memcmp(recv_buf1.data() + HdrLen(), recv_buf2.data() + HdrLen(), 414 sizeof(kBuf)), 415 0); 416 417 ASSERT_THAT(close(s2), SyscallSucceeds()); 418 } 419 420 // Test that connect sends packets to the right place. 421 TEST_P(RawSocketTest, SendAndReceiveViaConnect) { 422 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 423 424 ASSERT_THAT( 425 connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 426 SyscallSucceeds()); 427 428 // Arbitrary. 429 constexpr char kBuf[] = "JH4"; 430 ASSERT_THAT(send(s_, kBuf, sizeof(kBuf), 0), 431 SyscallSucceedsWithValue(sizeof(kBuf))); 432 433 // Receive the packet and make sure it's identical. 434 std::vector<char> recv_buf(sizeof(kBuf) + HdrLen()); 435 ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); 436 EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0); 437 } 438 439 // Bind to localhost, then send and receive packets. 440 TEST_P(RawSocketTest, BindSendAndReceive) { 441 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 442 443 ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 444 SyscallSucceeds()); 445 446 // Arbitrary. 447 constexpr char kBuf[] = "DR16"; 448 ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); 449 450 // Receive the packet and make sure it's identical. 451 std::vector<char> recv_buf(sizeof(kBuf) + HdrLen()); 452 ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); 453 EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0); 454 } 455 456 // Bind and connect to localhost and send/receive packets. 457 TEST_P(RawSocketTest, BindConnectSendAndReceive) { 458 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 459 460 ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 461 SyscallSucceeds()); 462 ASSERT_THAT( 463 connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 464 SyscallSucceeds()); 465 466 // Arbitrary. 467 constexpr char kBuf[] = "DG88"; 468 ASSERT_NO_FATAL_FAILURE(SendBuf(kBuf, sizeof(kBuf))); 469 470 // Receive the packet and make sure it's identical. 471 std::vector<char> recv_buf(sizeof(kBuf) + HdrLen()); 472 ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); 473 EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), kBuf, sizeof(kBuf)), 0); 474 } 475 476 // Check that setting SO_RCVBUF below min is clamped to the minimum 477 // receive buffer size. 478 TEST_P(RawSocketTest, SetSocketRecvBufBelowMin) { 479 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 480 481 // Discover minimum receive buf size by trying to set it to zero. 482 // See: 483 // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820 484 constexpr int kRcvBufSz = 0; 485 ASSERT_THAT( 486 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), 487 SyscallSucceeds()); 488 489 int min = 0; 490 socklen_t min_len = sizeof(min); 491 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), 492 SyscallSucceeds()); 493 494 // Linux doubles the value so let's use a value that when doubled will still 495 // be smaller than min. 496 int below_min = min / 2 - 1; 497 ASSERT_THAT( 498 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &below_min, sizeof(below_min)), 499 SyscallSucceeds()); 500 501 int val = 0; 502 socklen_t val_len = sizeof(val); 503 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), 504 SyscallSucceeds()); 505 506 ASSERT_EQ(min, val); 507 } 508 509 // Check that setting SO_RCVBUF above max is clamped to the maximum 510 // receive buffer size. 511 TEST_P(RawSocketTest, SetSocketRecvBufAboveMax) { 512 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 513 514 // Discover max buf size by trying to set the largest possible buffer size. 515 constexpr int kRcvBufSz = 0xffffffff; 516 ASSERT_THAT( 517 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), 518 SyscallSucceeds()); 519 520 int max = 0; 521 socklen_t max_len = sizeof(max); 522 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len), 523 SyscallSucceeds()); 524 525 int above_max = max + 1; 526 ASSERT_THAT( 527 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &above_max, sizeof(above_max)), 528 SyscallSucceeds()); 529 530 int val = 0; 531 socklen_t val_len = sizeof(val); 532 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), 533 SyscallSucceeds()); 534 ASSERT_EQ(max, val); 535 } 536 537 // Check that setting SO_RCVBUF min <= kRcvBufSz <= max is honored. 538 TEST_P(RawSocketTest, SetSocketRecvBuf) { 539 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 540 541 int max = 0; 542 int min = 0; 543 { 544 // Discover max buf size by trying to set a really large buffer size. 545 constexpr int kRcvBufSz = 0xffffffff; 546 ASSERT_THAT( 547 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), 548 SyscallSucceeds()); 549 550 max = 0; 551 socklen_t max_len = sizeof(max); 552 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &max, &max_len), 553 SyscallSucceeds()); 554 } 555 556 { 557 // Discover minimum buffer size by trying to set a zero size receive buffer 558 // size. 559 // See: 560 // https://github.com/torvalds/linux/blob/a5dc8300df75e8b8384b4c82225f1e4a0b4d9b55/net/core/sock.c#L820 561 constexpr int kRcvBufSz = 0; 562 ASSERT_THAT( 563 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), 564 SyscallSucceeds()); 565 566 socklen_t min_len = sizeof(min); 567 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), 568 SyscallSucceeds()); 569 } 570 571 int quarter_sz = min + (max - min) / 4; 572 ASSERT_THAT( 573 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &quarter_sz, sizeof(quarter_sz)), 574 SyscallSucceeds()); 575 576 int val = 0; 577 socklen_t val_len = sizeof(val); 578 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &val, &val_len), 579 SyscallSucceeds()); 580 581 // Linux doubles the value set by SO_SNDBUF/SO_RCVBUF. 582 quarter_sz *= 2; 583 ASSERT_EQ(quarter_sz, val); 584 } 585 586 // Check that setting SO_SNDBUF below min is clamped to the minimum 587 // receive buffer size. 588 TEST_P(RawSocketTest, SetSocketSendBufBelowMin) { 589 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 590 591 // Discover minimum buffer size by trying to set it to zero. 592 constexpr int kSndBufSz = 0; 593 ASSERT_THAT( 594 setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), 595 SyscallSucceeds()); 596 597 int min = 0; 598 socklen_t min_len = sizeof(min); 599 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len), 600 SyscallSucceeds()); 601 602 // Linux doubles the value so let's use a value that when doubled will still 603 // be smaller than min. 604 int below_min = min / 2 - 1; 605 ASSERT_THAT( 606 setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &below_min, sizeof(below_min)), 607 SyscallSucceeds()); 608 609 int val = 0; 610 socklen_t val_len = sizeof(val); 611 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), 612 SyscallSucceeds()); 613 614 ASSERT_EQ(min, val); 615 } 616 617 // Check that setting SO_SNDBUF above max is clamped to the maximum 618 // send buffer size. 619 TEST_P(RawSocketTest, SetSocketSendBufAboveMax) { 620 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 621 622 // Discover maximum buffer size by trying to set it to a large value. 623 constexpr int kSndBufSz = 0xffffffff; 624 ASSERT_THAT( 625 setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), 626 SyscallSucceeds()); 627 628 int max = 0; 629 socklen_t max_len = sizeof(max); 630 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len), 631 SyscallSucceeds()); 632 633 int above_max = max + 1; 634 ASSERT_THAT( 635 setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &above_max, sizeof(above_max)), 636 SyscallSucceeds()); 637 638 int val = 0; 639 socklen_t val_len = sizeof(val); 640 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), 641 SyscallSucceeds()); 642 ASSERT_EQ(max, val); 643 } 644 645 // Check that setting SO_SNDBUF min <= kSndBufSz <= max is honored. 646 TEST_P(RawSocketTest, SetSocketSendBuf) { 647 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 648 649 int max = 0; 650 int min = 0; 651 { 652 // Discover maximum buffer size by trying to set it to a large value. 653 constexpr int kSndBufSz = 0xffffffff; 654 ASSERT_THAT( 655 setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), 656 SyscallSucceeds()); 657 658 max = 0; 659 socklen_t max_len = sizeof(max); 660 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &max, &max_len), 661 SyscallSucceeds()); 662 } 663 664 { 665 // Discover minimum buffer size by trying to set it to zero. 666 constexpr int kSndBufSz = 0; 667 ASSERT_THAT( 668 setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &kSndBufSz, sizeof(kSndBufSz)), 669 SyscallSucceeds()); 670 671 socklen_t min_len = sizeof(min); 672 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &min, &min_len), 673 SyscallSucceeds()); 674 } 675 676 int quarter_sz = min + (max - min) / 4; 677 ASSERT_THAT( 678 setsockopt(s_, SOL_SOCKET, SO_SNDBUF, &quarter_sz, sizeof(quarter_sz)), 679 SyscallSucceeds()); 680 681 int val = 0; 682 socklen_t val_len = sizeof(val); 683 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_SNDBUF, &val, &val_len), 684 SyscallSucceeds()); 685 686 quarter_sz *= 2; 687 ASSERT_EQ(quarter_sz, val); 688 } 689 690 void randomizePacket(char* buf, size_t len, int proto) { 691 RandomizeBuffer(buf, len); 692 // When testing with TCP sockets, ensure the RST flag is set. This is to 693 // prevent the TCP stack from generating RSTs packets for unknown endpoints. 694 if (proto == IPPROTO_TCP && len > TCPHDR_FLAGS_OFF) 695 buf[TCPHDR_FLAGS_OFF] |= TCPHDR_RST; 696 } 697 698 // Test that receive buffer limits are not enforced when the recv buffer is 699 // empty. 700 TEST_P(RawSocketTest, RecvBufLimitsEmptyRecvBuffer) { 701 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 702 703 ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 704 SyscallSucceeds()); 705 ASSERT_THAT( 706 connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 707 SyscallSucceeds()); 708 709 int min = 0; 710 { 711 // Discover minimum buffer size by trying to set it to zero. 712 constexpr int kRcvBufSz = 0; 713 ASSERT_THAT( 714 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), 715 SyscallSucceeds()); 716 717 socklen_t min_len = sizeof(min); 718 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), 719 SyscallSucceeds()); 720 } 721 722 { 723 // Send data of size min and verify that it's received. 724 std::vector<char> buf(min); 725 randomizePacket(buf.data(), buf.size(), Protocol()); 726 ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); 727 728 // Receive the packet and make sure it's identical. 729 std::vector<char> recv_buf(buf.size() + HdrLen()); 730 ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); 731 EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), 0); 732 } 733 734 { 735 // Send data of size min + 1 and verify that its received. Both linux and 736 // Netstack accept a dgram that exceeds rcvBuf limits if the receive buffer 737 // is currently empty. 738 std::vector<char> buf(min + 1); 739 randomizePacket(buf.data(), buf.size(), Protocol()); 740 ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); 741 // Receive the packet and make sure it's identical. 742 std::vector<char> recv_buf(buf.size() + HdrLen()); 743 ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); 744 EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), 0); 745 } 746 } 747 748 TEST_P(RawSocketTest, RecvBufLimits) { 749 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 750 751 ASSERT_THAT(bind(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 752 SyscallSucceeds()); 753 ASSERT_THAT( 754 connect(s_, reinterpret_cast<struct sockaddr*>(&addr_), AddrLen()), 755 SyscallSucceeds()); 756 757 int min = 0; 758 { 759 // Discover minimum buffer size by trying to set it to zero. 760 constexpr int kRcvBufSz = 0; 761 ASSERT_THAT( 762 setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &kRcvBufSz, sizeof(kRcvBufSz)), 763 SyscallSucceeds()); 764 765 socklen_t min_len = sizeof(min); 766 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &min, &min_len), 767 SyscallSucceeds()); 768 } 769 770 // Now set the limit to min * 2. 771 int new_rcv_buf_sz = min * 2; 772 ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVBUF, &new_rcv_buf_sz, 773 sizeof(new_rcv_buf_sz)), 774 SyscallSucceeds()); 775 int rcv_buf_sz = 0; 776 { 777 socklen_t rcv_buf_len = sizeof(rcv_buf_sz); 778 ASSERT_THAT( 779 getsockopt(s_, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz, &rcv_buf_len), 780 SyscallSucceeds()); 781 } 782 783 // Set a receive timeout so that we don't block forever on reads if the test 784 // fails. 785 struct timeval tv { 786 .tv_sec = 1, .tv_usec = 0, 787 }; 788 ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)), 789 SyscallSucceeds()); 790 791 { 792 std::vector<char> buf(min); 793 randomizePacket(buf.data(), buf.size(), Protocol()); 794 795 ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); 796 ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); 797 ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); 798 ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); 799 int sent = 4; 800 if (IsRunningOnGvisor() && !IsRunningWithHostinet()) { 801 // Linux seems to drop the 4th packet even though technically it should 802 // fit in the receive buffer. 803 ASSERT_NO_FATAL_FAILURE(SendBuf(buf.data(), buf.size())); 804 sent++; 805 } 806 807 // Verify that the expected number of packets are available to be read. 808 for (int i = 0; i < sent - 1; i++) { 809 // Receive the packet and make sure it's identical. 810 std::vector<char> recv_buf(buf.size() + HdrLen()); 811 ASSERT_NO_FATAL_FAILURE(ReceiveBuf(recv_buf.data(), recv_buf.size())); 812 EXPECT_EQ(memcmp(recv_buf.data() + HdrLen(), buf.data(), buf.size()), 0); 813 } 814 815 // Assert that the last packet is dropped because the receive buffer should 816 // be full after the first four packets. 817 std::vector<char> recv_buf(buf.size() + HdrLen()); 818 struct iovec iov = {}; 819 iov.iov_base = static_cast<void*>(const_cast<char*>(recv_buf.data())); 820 iov.iov_len = buf.size(); 821 struct msghdr msg = {}; 822 msg.msg_iov = &iov; 823 msg.msg_iovlen = 1; 824 msg.msg_control = NULL; 825 msg.msg_controllen = 0; 826 msg.msg_flags = 0; 827 ASSERT_THAT(RetryEINTR(recvmsg)(s_, &msg, MSG_DONTWAIT), 828 SyscallFailsWithErrno(EAGAIN)); 829 } 830 } 831 832 void RawSocketTest::SendBuf(const char* buf, int buf_len) { 833 // It's safe to use const_cast here because sendmsg won't modify the iovec or 834 // address. 835 struct iovec iov = {}; 836 iov.iov_base = static_cast<void*>(const_cast<char*>(buf)); 837 iov.iov_len = static_cast<size_t>(buf_len); 838 struct msghdr msg = {}; 839 msg.msg_name = static_cast<void*>(&addr_); 840 msg.msg_namelen = AddrLen(); 841 msg.msg_iov = &iov; 842 msg.msg_iovlen = 1; 843 msg.msg_control = NULL; 844 msg.msg_controllen = 0; 845 msg.msg_flags = 0; 846 ASSERT_THAT(sendmsg(s_, &msg, 0), SyscallSucceedsWithValue(buf_len)); 847 } 848 849 void RawSocketTest::ReceiveBuf(char* recv_buf, size_t recv_buf_len) { 850 ASSERT_NO_FATAL_FAILURE(ReceiveBufFrom(s_, recv_buf, recv_buf_len)); 851 } 852 853 void RawSocketTest::ReceiveBufFrom(int sock, char* recv_buf, 854 size_t recv_buf_len) { 855 ASSERT_NO_FATAL_FAILURE(RecvNoCmsg(sock, recv_buf, recv_buf_len)); 856 } 857 858 TEST_P(RawSocketTest, SetSocketDetachFilterNoInstalledFilter) { 859 // TODO(gvisor.dev/2746): Support SO_ATTACH_FILTER/SO_DETACH_FILTER. 860 if (IsRunningOnGvisor()) { 861 constexpr int val = 0; 862 ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)), 863 SyscallSucceeds()); 864 return; 865 } 866 867 constexpr int val = 0; 868 ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val)), 869 SyscallFailsWithErrno(ENOENT)); 870 } 871 872 TEST_P(RawSocketTest, GetSocketDetachFilter) { 873 int val = 0; 874 socklen_t val_len = sizeof(val); 875 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_DETACH_FILTER, &val, &val_len), 876 SyscallFailsWithErrno(ENOPROTOOPT)); 877 } 878 879 TEST_P(RawSocketTest, BindToDevice) { 880 constexpr char kLoopbackDeviceName[] = "lo"; 881 ASSERT_THAT(setsockopt(s_, SOL_SOCKET, SO_BINDTODEVICE, &kLoopbackDeviceName, 882 sizeof(kLoopbackDeviceName)), 883 SyscallSucceeds()); 884 885 char got[IFNAMSIZ]; 886 socklen_t got_len = sizeof(got); 887 ASSERT_THAT(getsockopt(s_, SOL_SOCKET, SO_BINDTODEVICE, &got, &got_len), 888 SyscallSucceeds()); 889 ASSERT_EQ(got_len, sizeof(kLoopbackDeviceName)); 890 EXPECT_EQ(strcmp(kLoopbackDeviceName, got), 0); 891 } 892 893 // AF_INET6+SOCK_RAW+IPPROTO_RAW sockets can be created, but not written to. 894 TEST(RawSocketTest, IPv6ProtoRaw) { 895 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 896 897 int sock; 898 ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW), 899 SyscallSucceeds()); 900 901 // Verify that writing yields EINVAL. 902 char buf[] = "This is such a weird little edge case"; 903 struct sockaddr_in6 sin6 = {}; 904 sin6.sin6_family = AF_INET6; 905 sin6.sin6_addr = in6addr_loopback; 906 ASSERT_THAT(sendto(sock, buf, sizeof(buf), 0 /* flags */, 907 reinterpret_cast<struct sockaddr*>(&sin6), sizeof(sin6)), 908 SyscallFailsWithErrno(EINVAL)); 909 } 910 911 TEST(RawSocketTest, IPv6SendMsg) { 912 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 913 914 int sock; 915 ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_TCP), 916 SyscallSucceeds()); 917 918 char kBuf[] = "hello"; 919 struct iovec iov = {}; 920 iov.iov_base = static_cast<void*>(const_cast<char*>(kBuf)); 921 iov.iov_len = static_cast<size_t>(sizeof(kBuf)); 922 923 struct sockaddr_storage addr = {}; 924 struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr); 925 sin->sin_family = AF_INET; 926 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 927 928 struct msghdr msg = {}; 929 msg.msg_name = static_cast<void*>(&addr); 930 msg.msg_namelen = sizeof(sockaddr_in); 931 msg.msg_iov = &iov; 932 msg.msg_iovlen = 1; 933 msg.msg_control = NULL; 934 msg.msg_controllen = 0; 935 msg.msg_flags = 0; 936 ASSERT_THAT(sendmsg(sock, &msg, 0), SyscallFailsWithErrno(EINVAL)); 937 } 938 939 TEST_P(RawSocketTest, ConnectOnIPv6Socket) { 940 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 941 942 int sock; 943 ASSERT_THAT(sock = socket(AF_INET6, SOCK_RAW, IPPROTO_TCP), 944 SyscallSucceeds()); 945 946 struct sockaddr_storage addr = {}; 947 struct sockaddr_in* sin = reinterpret_cast<struct sockaddr_in*>(&addr); 948 sin->sin_family = AF_INET; 949 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 950 951 ASSERT_THAT(connect(sock, reinterpret_cast<struct sockaddr*>(&addr), 952 sizeof(sockaddr_in6)), 953 SyscallFailsWithErrno(EAFNOSUPPORT)); 954 } 955 956 INSTANTIATE_TEST_SUITE_P( 957 AllInetTests, RawSocketTest, 958 ::testing::Combine(::testing::Values(IPPROTO_TCP, IPPROTO_UDP), 959 ::testing::Values(AF_INET, AF_INET6))); 960 961 void TestRawSocketMaybeBindReceive(bool do_bind) { 962 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 963 964 constexpr char payload[] = "abcdefgh"; 965 966 const sockaddr_in addr = { 967 .sin_family = AF_INET, 968 .sin_addr = {.s_addr = htonl(INADDR_LOOPBACK)}, 969 }; 970 971 FileDescriptor udp_sock = 972 ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_DGRAM, 0)); 973 sockaddr_in udp_sock_bind_addr = addr; 974 socklen_t udp_sock_bind_addr_len = sizeof(udp_sock_bind_addr); 975 ASSERT_THAT(bind(udp_sock.get(), 976 reinterpret_cast<const sockaddr*>(&udp_sock_bind_addr), 977 sizeof(udp_sock_bind_addr)), 978 SyscallSucceeds()); 979 ASSERT_THAT(getsockname(udp_sock.get(), 980 reinterpret_cast<sockaddr*>(&udp_sock_bind_addr), 981 &udp_sock_bind_addr_len), 982 SyscallSucceeds()); 983 ASSERT_EQ(udp_sock_bind_addr_len, sizeof(udp_sock_bind_addr)); 984 985 FileDescriptor raw_sock = 986 ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP)); 987 988 auto test_recv = [&](const char* scope, uint32_t expected_destination) { 989 SCOPED_TRACE(scope); 990 991 constexpr int kInfinitePollTimeout = -1; 992 pollfd pfd = { 993 .fd = raw_sock.get(), 994 .events = POLLIN, 995 }; 996 ASSERT_THAT(RetryEINTR(poll)(&pfd, 1, kInfinitePollTimeout), 997 SyscallSucceedsWithValue(1)); 998 999 struct ipv4_udp_packet { 1000 iphdr ip; 1001 udphdr udp; 1002 char data[sizeof(payload)]; 1003 1004 // Used to make sure only the required space is used. 1005 char unused_space; 1006 } ABSL_ATTRIBUTE_PACKED; 1007 constexpr size_t kExpectedIPPacketSize = 1008 offsetof(ipv4_udp_packet, unused_space); 1009 1010 // Receive the whole IPv4 packet on the raw socket. 1011 ipv4_udp_packet read_raw_packet; 1012 sockaddr_in peer; 1013 socklen_t peerlen = sizeof(peer); 1014 ASSERT_EQ( 1015 recvfrom(raw_sock.get(), reinterpret_cast<char*>(&read_raw_packet), 1016 sizeof(read_raw_packet), 0 /* flags */, 1017 reinterpret_cast<sockaddr*>(&peer), &peerlen), 1018 static_cast<ssize_t>(kExpectedIPPacketSize)) 1019 << strerror(errno); 1020 ASSERT_EQ(peerlen, sizeof(peer)); 1021 EXPECT_EQ(read_raw_packet.ip.version, static_cast<unsigned int>(IPVERSION)); 1022 // IHL holds the number of header bytes in 4 byte units. 1023 EXPECT_EQ(read_raw_packet.ip.ihl, sizeof(read_raw_packet.ip) / 4); 1024 EXPECT_EQ(ntohs(read_raw_packet.ip.tot_len), kExpectedIPPacketSize); 1025 EXPECT_EQ(ntohs(read_raw_packet.ip.frag_off) & IP_OFFMASK, 0); 1026 EXPECT_EQ(read_raw_packet.ip.protocol, SOL_UDP); 1027 EXPECT_EQ(ntohl(read_raw_packet.ip.saddr), INADDR_LOOPBACK); 1028 EXPECT_EQ(ntohl(read_raw_packet.ip.daddr), expected_destination); 1029 EXPECT_EQ(read_raw_packet.udp.source, udp_sock_bind_addr.sin_port); 1030 EXPECT_EQ(read_raw_packet.udp.dest, udp_sock_bind_addr.sin_port); 1031 EXPECT_EQ(ntohs(read_raw_packet.udp.len), 1032 kExpectedIPPacketSize - sizeof(read_raw_packet.ip)); 1033 for (size_t i = 0; i < sizeof(payload); i++) { 1034 EXPECT_EQ(read_raw_packet.data[i], payload[i]) 1035 << "byte mismatch @ idx=" << i; 1036 } 1037 EXPECT_EQ(peer.sin_family, AF_INET); 1038 EXPECT_EQ(peer.sin_port, 0); 1039 EXPECT_EQ(ntohl(peer.sin_addr.s_addr), INADDR_LOOPBACK); 1040 }; 1041 1042 if (do_bind) { 1043 ASSERT_THAT(bind(raw_sock.get(), reinterpret_cast<const sockaddr*>(&addr), 1044 sizeof(addr)), 1045 SyscallSucceeds()); 1046 } 1047 1048 constexpr int kSendToFlags = 0; 1049 sockaddr_in different_addr = udp_sock_bind_addr; 1050 different_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK + 1); 1051 ASSERT_THAT(sendto(udp_sock.get(), payload, sizeof(payload), kSendToFlags, 1052 reinterpret_cast<const sockaddr*>(&different_addr), 1053 sizeof(different_addr)), 1054 SyscallSucceedsWithValue(sizeof(payload))); 1055 if (!do_bind) { 1056 ASSERT_NO_FATAL_FAILURE( 1057 test_recv("different_addr", ntohl(different_addr.sin_addr.s_addr))); 1058 } 1059 ASSERT_THAT(sendto(udp_sock.get(), payload, sizeof(payload), kSendToFlags, 1060 reinterpret_cast<const sockaddr*>(&udp_sock_bind_addr), 1061 sizeof(udp_sock_bind_addr)), 1062 SyscallSucceedsWithValue(sizeof(payload))); 1063 ASSERT_NO_FATAL_FAILURE( 1064 test_recv("addr", ntohl(udp_sock_bind_addr.sin_addr.s_addr))); 1065 } 1066 1067 TEST(RawSocketTest, UnboundReceive) { 1068 // Test that a raw socket receives packets destined to any address if it is 1069 // not bound to an address. 1070 ASSERT_NO_FATAL_FAILURE(TestRawSocketMaybeBindReceive(false /* do_bind */)); 1071 } 1072 1073 TEST(RawSocketTest, BindReceive) { 1074 // Test that a raw socket only receives packets destined to the address it is 1075 // bound to. 1076 ASSERT_NO_FATAL_FAILURE(TestRawSocketMaybeBindReceive(true /* do_bind */)); 1077 } 1078 1079 TEST(RawSocketTest, ReceiveIPPacketInfo) { 1080 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 1081 1082 FileDescriptor raw = 1083 ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP)); 1084 1085 const sockaddr_in addr_ = { 1086 .sin_family = AF_INET, 1087 .sin_addr = {.s_addr = htonl(INADDR_LOOPBACK)}, 1088 }; 1089 ASSERT_THAT( 1090 bind(raw.get(), reinterpret_cast<const sockaddr*>(&addr_), sizeof(addr_)), 1091 SyscallSucceeds()); 1092 1093 // Register to receive IP packet info. 1094 ASSERT_THAT(setsockopt(raw.get(), IPPROTO_IP, IP_PKTINFO, &kSockOptOn, 1095 sizeof(kSockOptOn)), 1096 SyscallSucceeds()); 1097 1098 constexpr char send_buf[] = "malformed UDP"; 1099 ASSERT_THAT(sendto(raw.get(), send_buf, sizeof(send_buf), /*flags=*/0, 1100 reinterpret_cast<const sockaddr*>(&addr_), sizeof(addr_)), 1101 SyscallSucceedsWithValue(sizeof(send_buf))); 1102 1103 struct { 1104 iphdr ip; 1105 char data[sizeof(send_buf)]; 1106 1107 // Extra space in the receive buffer should be unused. 1108 char unused_space; 1109 } ABSL_ATTRIBUTE_PACKED recv_buf; 1110 1111 size_t recv_buf_len = sizeof(recv_buf); 1112 in_pktinfo received_pktinfo; 1113 ASSERT_NO_FATAL_FAILURE(RecvPktInfo(raw.get(), 1114 reinterpret_cast<char*>(&recv_buf), 1115 &recv_buf_len, &received_pktinfo)); 1116 1117 EXPECT_EQ(recv_buf_len, sizeof(iphdr) + sizeof(send_buf)); 1118 EXPECT_EQ(memcmp(send_buf, &recv_buf.data, sizeof(send_buf)), 0); 1119 EXPECT_EQ(recv_buf.ip.version, static_cast<unsigned int>(IPVERSION)); 1120 // IHL holds the number of header bytes in 4 byte units. 1121 EXPECT_EQ(recv_buf.ip.ihl, sizeof(iphdr) / 4); 1122 EXPECT_EQ(ntohs(recv_buf.ip.tot_len), sizeof(iphdr) + sizeof(send_buf)); 1123 EXPECT_EQ(recv_buf.ip.protocol, IPPROTO_UDP); 1124 EXPECT_EQ(ntohl(recv_buf.ip.saddr), INADDR_LOOPBACK); 1125 EXPECT_EQ(ntohl(recv_buf.ip.daddr), INADDR_LOOPBACK); 1126 1127 EXPECT_EQ(received_pktinfo.ipi_ifindex, 1128 ASSERT_NO_ERRNO_AND_VALUE(GetLoopbackIndex())); 1129 EXPECT_EQ(ntohl(received_pktinfo.ipi_spec_dst.s_addr), INADDR_LOOPBACK); 1130 EXPECT_EQ(ntohl(received_pktinfo.ipi_addr.s_addr), INADDR_LOOPBACK); 1131 } 1132 1133 TEST(RawSocketTest, ReceiveIPv6PacketInfo) { 1134 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 1135 1136 FileDescriptor raw = 1137 ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_RAW, IPPROTO_UDP)); 1138 1139 const sockaddr_in6 addr_ = { 1140 .sin6_family = AF_INET6, 1141 .sin6_addr = in6addr_loopback, 1142 }; 1143 ASSERT_THAT( 1144 bind(raw.get(), reinterpret_cast<const sockaddr*>(&addr_), sizeof(addr_)), 1145 SyscallSucceeds()); 1146 1147 // Register to receive IPv6 packet info. 1148 ASSERT_THAT(setsockopt(raw.get(), IPPROTO_IPV6, IPV6_RECVPKTINFO, &kSockOptOn, 1149 sizeof(kSockOptOn)), 1150 SyscallSucceeds()); 1151 1152 constexpr char send_buf[] = "malformed UDP"; 1153 ASSERT_THAT(sendto(raw.get(), send_buf, sizeof(send_buf), /*flags=*/0, 1154 reinterpret_cast<const sockaddr*>(&addr_), sizeof(addr_)), 1155 SyscallSucceedsWithValue(sizeof(send_buf))); 1156 1157 char recv_buf[sizeof(send_buf) + 1]; 1158 size_t recv_buf_len = sizeof(recv_buf); 1159 in6_pktinfo received_pktinfo; 1160 ASSERT_NO_FATAL_FAILURE(RecvIPv6PktInfo(raw.get(), 1161 reinterpret_cast<char*>(&recv_buf), 1162 &recv_buf_len, &received_pktinfo)); 1163 EXPECT_EQ(recv_buf_len, sizeof(send_buf)); 1164 EXPECT_EQ(memcmp(send_buf, recv_buf, sizeof(send_buf)), 0); 1165 EXPECT_EQ(received_pktinfo.ipi6_ifindex, 1166 ASSERT_NO_ERRNO_AND_VALUE(GetLoopbackIndex())); 1167 ASSERT_EQ(memcmp(&received_pktinfo.ipi6_addr, &in6addr_loopback, 1168 sizeof(in6addr_loopback)), 1169 0); 1170 } 1171 1172 TEST(RawSocketTest, ReceiveTOS) { 1173 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 1174 1175 FileDescriptor raw = 1176 ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP)); 1177 1178 const sockaddr_in kAddr = { 1179 .sin_family = AF_INET, 1180 .sin_addr = {.s_addr = htonl(INADDR_LOOPBACK)}, 1181 }; 1182 ASSERT_THAT( 1183 bind(raw.get(), reinterpret_cast<const sockaddr*>(&kAddr), sizeof(kAddr)), 1184 SyscallSucceeds()); 1185 1186 constexpr int kArbitraryTOS = 42; 1187 ASSERT_THAT(setsockopt(raw.get(), IPPROTO_IP, IP_TOS, &kArbitraryTOS, 1188 sizeof(kArbitraryTOS)), 1189 SyscallSucceeds()); 1190 1191 constexpr char kSendBuf[] = "malformed UDP"; 1192 ASSERT_THAT(sendto(raw.get(), kSendBuf, sizeof(kSendBuf), 0 /* flags */, 1193 reinterpret_cast<const sockaddr*>(&kAddr), sizeof(kAddr)), 1194 SyscallSucceedsWithValue(sizeof(kSendBuf))); 1195 1196 // Register to receive TOS. 1197 constexpr int kOne = 1; 1198 ASSERT_THAT( 1199 setsockopt(raw.get(), IPPROTO_IP, IP_RECVTOS, &kOne, sizeof(kOne)), 1200 SyscallSucceeds()); 1201 1202 struct { 1203 iphdr ip; 1204 char data[sizeof(kSendBuf)]; 1205 1206 // Extra space in the receive buffer should be unused. 1207 char unused_space; 1208 } ABSL_ATTRIBUTE_PACKED recv_buf; 1209 uint8_t recv_tos; 1210 size_t recv_buf_len = sizeof(recv_buf); 1211 ASSERT_NO_FATAL_FAILURE(RecvTOS(raw.get(), reinterpret_cast<char*>(&recv_buf), 1212 &recv_buf_len, &recv_tos)); 1213 ASSERT_EQ(recv_buf_len, sizeof(iphdr) + sizeof(kSendBuf)); 1214 1215 EXPECT_EQ(recv_buf.ip.version, static_cast<unsigned int>(IPVERSION)); 1216 // IHL holds the number of header bytes in 4 byte units. 1217 EXPECT_EQ(recv_buf.ip.ihl, sizeof(iphdr) / 4); 1218 EXPECT_EQ(ntohs(recv_buf.ip.tot_len), sizeof(iphdr) + sizeof(kSendBuf)); 1219 EXPECT_EQ(recv_buf.ip.protocol, IPPROTO_UDP); 1220 EXPECT_EQ(ntohl(recv_buf.ip.saddr), INADDR_LOOPBACK); 1221 EXPECT_EQ(ntohl(recv_buf.ip.daddr), INADDR_LOOPBACK); 1222 1223 EXPECT_EQ(memcmp(kSendBuf, &recv_buf.data, sizeof(kSendBuf)), 0); 1224 1225 if (const char* val = getenv("TOS_TCLASS_EXPECT_DEFAULT"); 1226 val != nullptr && strcmp(val, "1") == 0) { 1227 // TODO(b/217448626): At least one Linux environment does not allow setting 1228 // a custom TOS. In this case, we additionally accept the default. 1229 EXPECT_THAT(recv_buf.ip.tos, AnyOf(kArbitraryTOS, 0u)); 1230 EXPECT_THAT(recv_tos, AnyOf(kArbitraryTOS, 0u)); 1231 } else { 1232 EXPECT_EQ(recv_buf.ip.tos, static_cast<uint8_t>(kArbitraryTOS)); 1233 EXPECT_EQ(recv_tos, kArbitraryTOS); 1234 } 1235 } 1236 1237 TEST(RawSocketTest, ReceiveTClass) { 1238 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 1239 1240 FileDescriptor raw = 1241 ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_RAW, IPPROTO_UDP)); 1242 1243 const sockaddr_in6 kAddr = { 1244 .sin6_family = AF_INET6, 1245 .sin6_addr = in6addr_loopback, 1246 }; 1247 ASSERT_THAT( 1248 bind(raw.get(), reinterpret_cast<const sockaddr*>(&kAddr), sizeof(kAddr)), 1249 SyscallSucceeds()); 1250 1251 constexpr int kArbitraryTClass = 42; 1252 ASSERT_THAT(setsockopt(raw.get(), IPPROTO_IPV6, IPV6_TCLASS, 1253 &kArbitraryTClass, sizeof(kArbitraryTClass)), 1254 SyscallSucceeds()); 1255 1256 constexpr char send_buf[] = "malformed UDP"; 1257 ASSERT_THAT(sendto(raw.get(), send_buf, sizeof(send_buf), 0 /* flags */, 1258 reinterpret_cast<const sockaddr*>(&kAddr), sizeof(kAddr)), 1259 SyscallSucceedsWithValue(sizeof(send_buf))); 1260 1261 // Register to receive TClass. 1262 constexpr int kOne = 1; 1263 ASSERT_THAT( 1264 setsockopt(raw.get(), IPPROTO_IPV6, IPV6_RECVTCLASS, &kOne, sizeof(kOne)), 1265 SyscallSucceeds()); 1266 1267 char recv_buf[sizeof(send_buf) + 1]; 1268 size_t recv_buf_len = sizeof(recv_buf); 1269 int recv_tclass; 1270 ASSERT_NO_FATAL_FAILURE( 1271 RecvTClass(raw.get(), recv_buf, &recv_buf_len, &recv_tclass)); 1272 ASSERT_EQ(recv_buf_len, sizeof(send_buf)); 1273 1274 EXPECT_EQ(memcmp(send_buf, recv_buf, sizeof(send_buf)), 0); 1275 1276 if (const char* val = getenv("TOS_TCLASS_EXPECT_DEFAULT"); 1277 val != nullptr && strcmp(val, "1") == 0) { 1278 // TODO(b/217448626): At least one Linux environment does not allow setting 1279 // a custom TCLASS. In this case, we additionally accept the default. 1280 EXPECT_THAT(recv_tclass, AnyOf(kArbitraryTClass, 0)); 1281 } else { 1282 EXPECT_EQ(recv_tclass, kArbitraryTClass); 1283 } 1284 } 1285 1286 TEST(RawSocketTest, ReceiveTTL) { 1287 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 1288 1289 FileDescriptor raw = 1290 ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET, SOCK_RAW, IPPROTO_UDP)); 1291 1292 const sockaddr_in kAddr = { 1293 .sin_family = AF_INET, 1294 .sin_addr = {.s_addr = htonl(INADDR_LOOPBACK)}, 1295 }; 1296 ASSERT_THAT( 1297 bind(raw.get(), reinterpret_cast<const sockaddr*>(&kAddr), sizeof(kAddr)), 1298 SyscallSucceeds()); 1299 ASSERT_THAT(connect(raw.get(), reinterpret_cast<const sockaddr*>(&kAddr), 1300 sizeof(kAddr)), 1301 SyscallSucceeds()); 1302 1303 constexpr int kArbitraryTTL = 42; 1304 ASSERT_THAT(setsockopt(raw.get(), IPPROTO_IP, IP_TTL, &kArbitraryTTL, 1305 sizeof(kArbitraryTTL)), 1306 SyscallSucceeds()); 1307 1308 char send_buf[] = "malformed UDP"; 1309 auto test_recv_ttl = [&](int expected_ttl) { 1310 // Register to receive TTL. 1311 constexpr int kOne = 1; 1312 ASSERT_THAT( 1313 setsockopt(raw.get(), IPPROTO_IP, IP_RECVTTL, &kOne, sizeof(kOne)), 1314 SyscallSucceeds()); 1315 1316 struct { 1317 iphdr ip; 1318 char data[sizeof(send_buf)]; 1319 } ABSL_ATTRIBUTE_PACKED recv_buf; 1320 1321 int recv_ttl; 1322 size_t recv_buf_len = sizeof(recv_buf); 1323 ASSERT_NO_FATAL_FAILURE(RecvTTL(raw.get(), 1324 reinterpret_cast<char*>(&recv_buf), 1325 &recv_buf_len, &recv_ttl)); 1326 ASSERT_EQ(recv_buf_len, sizeof(iphdr) + sizeof(send_buf)); 1327 1328 EXPECT_EQ(recv_buf.ip.version, static_cast<unsigned int>(IPVERSION)); 1329 // IHL holds the number of header bytes in 4 byte units. 1330 EXPECT_EQ(recv_buf.ip.ihl, sizeof(iphdr) / 4); 1331 EXPECT_EQ(ntohs(recv_buf.ip.tot_len), sizeof(iphdr) + sizeof(send_buf)); 1332 EXPECT_EQ(recv_buf.ip.protocol, IPPROTO_UDP); 1333 EXPECT_EQ(ntohl(recv_buf.ip.saddr), INADDR_LOOPBACK); 1334 EXPECT_EQ(ntohl(recv_buf.ip.daddr), INADDR_LOOPBACK); 1335 EXPECT_EQ(recv_buf.ip.ttl, static_cast<uint8_t>(expected_ttl)); 1336 1337 EXPECT_EQ(memcmp(send_buf, &recv_buf.data, sizeof(send_buf)), 0); 1338 1339 EXPECT_EQ(recv_ttl, expected_ttl); 1340 }; 1341 1342 ASSERT_THAT(send(raw.get(), send_buf, sizeof(send_buf), /*flags=*/0), 1343 SyscallSucceedsWithValue(sizeof(send_buf))); 1344 { 1345 SCOPED_TRACE("receive ttl set by option"); 1346 ASSERT_NO_FATAL_FAILURE(test_recv_ttl(kArbitraryTTL)); 1347 } 1348 1349 constexpr int kArbitrarySendmsgTTL = kArbitraryTTL + 1; 1350 ASSERT_NO_FATAL_FAILURE(SendTTL(raw.get(), send_buf, size_t(sizeof(send_buf)), 1351 kArbitrarySendmsgTTL)); 1352 { 1353 SCOPED_TRACE("receive ttl set by cmsg"); 1354 ASSERT_NO_FATAL_FAILURE(test_recv_ttl(kArbitrarySendmsgTTL)); 1355 } 1356 } 1357 1358 TEST(RawSocketTest, ReceiveHopLimit) { 1359 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 1360 1361 FileDescriptor raw = 1362 ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_RAW, IPPROTO_UDP)); 1363 1364 const sockaddr_in6 kAddr = { 1365 .sin6_family = AF_INET6, 1366 .sin6_addr = in6addr_loopback, 1367 }; 1368 ASSERT_THAT( 1369 bind(raw.get(), reinterpret_cast<const sockaddr*>(&kAddr), sizeof(kAddr)), 1370 SyscallSucceeds()); 1371 ASSERT_THAT(connect(raw.get(), reinterpret_cast<const sockaddr*>(&kAddr), 1372 sizeof(kAddr)), 1373 SyscallSucceeds()); 1374 1375 constexpr int kArbitraryHopLimit = 42; 1376 ASSERT_THAT(setsockopt(raw.get(), IPPROTO_IPV6, IPV6_UNICAST_HOPS, 1377 &kArbitraryHopLimit, sizeof(kArbitraryHopLimit)), 1378 SyscallSucceeds()); 1379 1380 // Register to receive HOPLIMIT. 1381 constexpr int kOne = 1; 1382 ASSERT_THAT(setsockopt(raw.get(), IPPROTO_IPV6, IPV6_RECVHOPLIMIT, &kOne, 1383 sizeof(kOne)), 1384 SyscallSucceeds()); 1385 1386 char send_buf[] = "malformed UDP"; 1387 auto test_recv_hoplimit = [&](int expected_hoplimit) { 1388 char recv_buf[sizeof(send_buf)]; 1389 size_t recv_buf_len = sizeof(recv_buf); 1390 int recv_hoplimit; 1391 ASSERT_NO_FATAL_FAILURE( 1392 RecvHopLimit(raw.get(), recv_buf, &recv_buf_len, &recv_hoplimit)); 1393 ASSERT_EQ(recv_buf_len, sizeof(send_buf)); 1394 1395 EXPECT_EQ(memcmp(send_buf, recv_buf, sizeof(send_buf)), 0); 1396 EXPECT_EQ(recv_hoplimit, expected_hoplimit); 1397 }; 1398 1399 ASSERT_THAT(send(raw.get(), send_buf, sizeof(send_buf), /*flags=*/0), 1400 SyscallSucceedsWithValue(sizeof(send_buf))); 1401 { 1402 SCOPED_TRACE("receive hoplimit set by option"); 1403 ASSERT_NO_FATAL_FAILURE(test_recv_hoplimit(kArbitraryHopLimit)); 1404 } 1405 1406 constexpr int kArbitrarySendmsgHopLimit = kArbitraryHopLimit + 1; 1407 ASSERT_NO_FATAL_FAILURE(SendHopLimit(raw.get(), send_buf, 1408 size_t(sizeof(send_buf)), 1409 kArbitrarySendmsgHopLimit)); 1410 { 1411 SCOPED_TRACE("receive hoplimit set by cmsg"); 1412 ASSERT_NO_FATAL_FAILURE(test_recv_hoplimit(kArbitrarySendmsgHopLimit)); 1413 } 1414 } 1415 1416 TEST(RawSocketTest, SetIPv6ChecksumError_MultipleOf2) { 1417 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 1418 1419 FileDescriptor fd = 1420 ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_RAW, IPPROTO_UDP)); 1421 1422 int intV = 3; 1423 ASSERT_THAT( 1424 setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &intV, sizeof(intV)), 1425 SyscallFailsWithErrno(EINVAL)); 1426 1427 intV = 5; 1428 ASSERT_THAT( 1429 setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &intV, sizeof(intV)), 1430 SyscallFailsWithErrno(EINVAL)); 1431 1432 intV = 2; 1433 ASSERT_THAT( 1434 setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &intV, sizeof(intV)), 1435 SyscallSucceeds()); 1436 1437 intV = 4; 1438 ASSERT_THAT( 1439 setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &intV, sizeof(intV)), 1440 SyscallSucceeds()); 1441 } 1442 1443 TEST(RawSocketTest, SetIPv6ChecksumError_ReadShort) { 1444 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 1445 1446 FileDescriptor fd = 1447 ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_RAW, IPPROTO_UDP)); 1448 1449 int intV = 2; 1450 if (IsRunningOnGvisor()) { 1451 // TODO(https://gvisor.dev/issue/6982): This is a deviation from Linux. We 1452 // should determine if we want to match the behaviour or handle the error 1453 // more gracefully. 1454 ASSERT_THAT( 1455 setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &intV, sizeof(intV) - 1), 1456 SyscallFailsWithErrno(EINVAL)); 1457 return; 1458 } 1459 1460 intV = std::numeric_limits<int>::max(); 1461 if (intV % 2) { 1462 intV--; 1463 } 1464 1465 if (const char* val = getenv("IPV6_CHECKSUM_SETSOCKOPT_SHORT_EXCEPTION"); 1466 val != nullptr && strcmp(val, "1") == 0) { 1467 // TODO(https://issuetracker.google.com/issues/212585236): As of writing, it 1468 // seems like at least one Linux environment considers optlen unlike a local 1469 // Linux environment. In this case we call setsockopt with the full int so 1470 // that the rest of the test passes. Once the root cause for this difference 1471 // is found, we can update this check. 1472 ASSERT_THAT( 1473 setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &intV, sizeof(intV)), 1474 SyscallSucceeds()); 1475 } else { 1476 ASSERT_THAT( 1477 setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &intV, sizeof(intV) - 1), 1478 SyscallSucceeds()); 1479 } 1480 1481 { 1482 int got; 1483 socklen_t got_len = sizeof(got); 1484 ASSERT_THAT(getsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &got, &got_len), 1485 SyscallSucceeds()); 1486 ASSERT_EQ(got_len, sizeof(got)); 1487 // Even though we called setsockopt with a length smaller than an int, Linux 1488 // seems to read the full int. 1489 EXPECT_EQ(got, intV); 1490 } 1491 1492 // If we have pass a pointer that points to memory less than the size of an 1493 // int, we get a bad address error. 1494 std::unique_ptr<uint8_t> u8V; 1495 // Linux seems to assume a full int but doesn't check the passed length. 1496 // 1497 // https://github.com/torvalds/linux/blob/a52a8e9eaf4a12dd58953fc622bb2bc08fd1d32c/net/ipv6/raw.c#L1023 1498 // shows that Linux copies optVal to an int without first checking optLen. 1499 ASSERT_THAT( 1500 setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, u8V.get(), sizeof(*u8V)), 1501 SyscallFailsWithErrno(EFAULT)); 1502 } 1503 1504 TEST(RawSocketTest, IPv6Checksum_ValidateAndCalculate) { 1505 SKIP_IF(!ASSERT_NO_ERRNO_AND_VALUE(HaveRawIPSocketCapability())); 1506 1507 FileDescriptor checksum_set = 1508 ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_RAW, IPPROTO_UDP)); 1509 1510 FileDescriptor checksum_not_set = 1511 ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_INET6, SOCK_RAW, IPPROTO_UDP)); 1512 1513 const sockaddr_in6 addr = { 1514 .sin6_family = AF_INET6, 1515 .sin6_addr = IN6ADDR_LOOPBACK_INIT, 1516 }; 1517 1518 auto bind_and_set_checksum = [&](const FileDescriptor& fd, int v) { 1519 ASSERT_THAT( 1520 bind(fd.get(), reinterpret_cast<const sockaddr*>(&addr), sizeof(addr)), 1521 SyscallSucceeds()); 1522 1523 int got; 1524 socklen_t got_len = sizeof(got); 1525 ASSERT_THAT(getsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &got, &got_len), 1526 SyscallSucceeds()); 1527 ASSERT_EQ(got_len, sizeof(got)); 1528 EXPECT_EQ(got, -1); 1529 1530 ASSERT_THAT(setsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &v, sizeof(v)), 1531 SyscallSucceeds()); 1532 ASSERT_THAT(getsockopt(fd.get(), SOL_IPV6, IPV6_CHECKSUM, &got, &got_len), 1533 SyscallSucceeds()); 1534 ASSERT_EQ(got_len, sizeof(got)); 1535 EXPECT_EQ(got, v); 1536 }; 1537 1538 struct udp_packet { 1539 udphdr udp; 1540 uint32_t value; 1541 } ABSL_ATTRIBUTE_PACKED; 1542 1543 ASSERT_NO_FATAL_FAILURE(bind_and_set_checksum( 1544 checksum_set, offsetof(udp_packet, udp) + offsetof(udphdr, uh_sum))); 1545 ASSERT_NO_FATAL_FAILURE(bind_and_set_checksum(checksum_not_set, -1)); 1546 1547 auto send = [&](const FileDescriptor& fd, uint32_t v) { 1548 const udp_packet packet = { 1549 .value = v, 1550 }; 1551 1552 ASSERT_THAT(sendto(fd.get(), &packet, sizeof(packet), /*flags=*/0, 1553 reinterpret_cast<const sockaddr*>(&addr), sizeof(addr)), 1554 SyscallSucceedsWithValue(sizeof(packet))); 1555 }; 1556 1557 auto expect_receive = [&](const FileDescriptor& fd, uint32_t v, 1558 bool should_check_xsum) { 1559 udp_packet packet; 1560 sockaddr_in6 sender; 1561 socklen_t sender_len = sizeof(sender); 1562 ASSERT_THAT( 1563 RetryEINTR(recvfrom)(fd.get(), &packet, sizeof(packet), /*flags=*/0, 1564 reinterpret_cast<sockaddr*>(&sender), &sender_len), 1565 SyscallSucceedsWithValue(sizeof(packet))); 1566 ASSERT_EQ(sender_len, sizeof(sender)); 1567 EXPECT_EQ(memcmp(&sender, &addr, sizeof(addr)), 0); 1568 EXPECT_EQ(packet.value, v); 1569 if (should_check_xsum) { 1570 EXPECT_NE(packet.udp.uh_sum, 0); 1571 } else { 1572 EXPECT_EQ(packet.udp.uh_sum, 0); 1573 } 1574 }; 1575 1576 uint32_t counter = 1; 1577 // Packets sent through checksum_not_set will not have a valid checksum set so 1578 // checksum_set should not accept those packets. 1579 ASSERT_NO_FATAL_FAILURE(send(checksum_not_set, counter)); 1580 ASSERT_NO_FATAL_FAILURE(expect_receive(checksum_not_set, counter, false)); 1581 1582 // Packets sent through checksum_set will have a valid checksum so both 1583 // sockets should accept them. 1584 ASSERT_NO_FATAL_FAILURE(send(checksum_set, ++counter)); 1585 ASSERT_NO_FATAL_FAILURE(expect_receive(checksum_set, counter, true)); 1586 ASSERT_NO_FATAL_FAILURE(expect_receive(checksum_not_set, counter, true)); 1587 } 1588 1589 } // namespace 1590 1591 } // namespace testing 1592 } // namespace gvisor