github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/test/syscalls/linux/aio.cc (about) 1 // Copyright 2018 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include <fcntl.h> 16 #include <linux/aio_abi.h> 17 #include <sys/mman.h> 18 #include <sys/syscall.h> 19 #include <sys/types.h> 20 #include <unistd.h> 21 22 #include <algorithm> 23 #include <string> 24 25 #include "gtest/gtest.h" 26 #include "test/syscalls/linux/file_base.h" 27 #include "test/util/cleanup.h" 28 #include "test/util/file_descriptor.h" 29 #include "test/util/fs_util.h" 30 #include "test/util/memory_util.h" 31 #include "test/util/posix_error.h" 32 #include "test/util/proc_util.h" 33 #include "test/util/temp_path.h" 34 #include "test/util/test_util.h" 35 36 using ::testing::_; 37 38 namespace gvisor { 39 namespace testing { 40 namespace { 41 42 // Returns the size of the VMA containing the given address. 43 PosixErrorOr<size_t> VmaSizeAt(uintptr_t addr) { 44 ASSIGN_OR_RETURN_ERRNO(std::string proc_self_maps, 45 GetContents("/proc/self/maps")); 46 ASSIGN_OR_RETURN_ERRNO(auto entries, ParseProcMaps(proc_self_maps)); 47 // Use binary search to find the first VMA that might contain addr. 48 ProcMapsEntry target = {}; 49 target.end = addr; 50 auto it = 51 std::upper_bound(entries.begin(), entries.end(), target, 52 [](const ProcMapsEntry& x, const ProcMapsEntry& y) { 53 return x.end < y.end; 54 }); 55 // Check that it actually contains addr. 56 if (it == entries.end() || addr < it->start) { 57 return PosixError(ENOENT, absl::StrCat("no VMA contains address ", addr)); 58 } 59 return it->end - it->start; 60 } 61 62 constexpr char kData[] = "hello world!"; 63 64 int SubmitCtx(aio_context_t ctx, long nr, struct iocb** iocbpp) { 65 return syscall(__NR_io_submit, ctx, nr, iocbpp); 66 } 67 68 class AIOTest : public FileTest { 69 public: 70 AIOTest() : ctx_(0) {} 71 72 int SetupContext(unsigned int nr) { 73 return syscall(__NR_io_setup, nr, &ctx_); 74 } 75 76 int Submit(long nr, struct iocb** iocbpp) { 77 return SubmitCtx(ctx_, nr, iocbpp); 78 } 79 80 int GetEvents(long min, long max, struct io_event* events, 81 struct timespec* timeout) { 82 return RetryEINTR(syscall)(__NR_io_getevents, ctx_, min, max, events, 83 timeout); 84 } 85 86 int DestroyContext() { return syscall(__NR_io_destroy, ctx_); } 87 88 void TearDown() override { 89 FileTest::TearDown(); 90 if (ctx_ != 0) { 91 ASSERT_THAT(DestroyContext(), SyscallSucceeds()); 92 ctx_ = 0; 93 } 94 } 95 96 struct iocb CreateCallback() { 97 struct iocb cb = {}; 98 cb.aio_data = 0x123; 99 cb.aio_fildes = test_file_fd_.get(); 100 cb.aio_lio_opcode = IOCB_CMD_PWRITE; 101 cb.aio_buf = reinterpret_cast<uint64_t>(kData); 102 cb.aio_offset = 0; 103 cb.aio_nbytes = strlen(kData); 104 return cb; 105 } 106 107 protected: 108 aio_context_t ctx_; 109 }; 110 111 TEST_F(AIOTest, BasicWrite) { 112 // Copied from fs/aio.c. 113 constexpr unsigned AIO_RING_MAGIC = 0xa10a10a1; 114 struct aio_ring { 115 unsigned id; 116 unsigned nr; 117 unsigned head; 118 unsigned tail; 119 unsigned magic; 120 unsigned compat_features; 121 unsigned incompat_features; 122 unsigned header_length; 123 struct io_event io_events[0]; 124 }; 125 126 // Setup a context that is 128 entries deep. 127 ASSERT_THAT(SetupContext(128), SyscallSucceeds()); 128 129 // Check that 'ctx_' points to a valid address. libaio uses it to check if 130 // aio implementation uses aio_ring. gVisor doesn't and returns all zeroes. 131 // Linux implements aio_ring, so skip the zeroes check. 132 // 133 // TODO(gvisor.dev/issue/204): Remove when gVisor implements aio_ring. 134 auto ring = reinterpret_cast<struct aio_ring*>(ctx_); 135 auto magic = IsRunningOnGvisor() ? 0 : AIO_RING_MAGIC; 136 EXPECT_EQ(ring->magic, magic); 137 138 struct iocb cb = CreateCallback(); 139 struct iocb* cbs[1] = {&cb}; 140 141 // Submit the request. 142 ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1)); 143 144 // Get the reply. 145 struct io_event events[1]; 146 ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1)); 147 148 // Verify that it is as expected. 149 EXPECT_EQ(events[0].data, 0x123); 150 EXPECT_EQ(events[0].obj, reinterpret_cast<long>(&cb)); 151 EXPECT_EQ(events[0].res, strlen(kData)); 152 153 // Verify that the file contains the contents. 154 char verify_buf[sizeof(kData)] = {}; 155 ASSERT_THAT(read(test_file_fd_.get(), verify_buf, sizeof(kData)), 156 SyscallSucceedsWithValue(strlen(kData))); 157 EXPECT_STREQ(verify_buf, kData); 158 } 159 160 TEST_F(AIOTest, BadWrite) { 161 // Create a pipe and immediately close the read end. 162 int pipefd[2]; 163 ASSERT_THAT(pipe(pipefd), SyscallSucceeds()); 164 165 FileDescriptor rfd(pipefd[0]); 166 FileDescriptor wfd(pipefd[1]); 167 168 rfd.reset(); // Close the read end. 169 170 // Setup a context that is 128 entries deep. 171 ASSERT_THAT(SetupContext(128), SyscallSucceeds()); 172 173 struct iocb cb = CreateCallback(); 174 // Try to write to the read end. 175 cb.aio_fildes = wfd.get(); 176 struct iocb* cbs[1] = {&cb}; 177 178 // Submit the request. 179 ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1)); 180 181 // Get the reply. 182 struct io_event events[1]; 183 ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1)); 184 185 // Verify that it fails with the right error code. 186 EXPECT_EQ(events[0].data, 0x123); 187 EXPECT_EQ(events[0].obj, reinterpret_cast<uint64_t>(&cb)); 188 EXPECT_LT(events[0].res, 0); 189 } 190 191 TEST_F(AIOTest, ExitWithPendingIo) { 192 // Setup a context that is 100 entries deep. 193 ASSERT_THAT(SetupContext(100), SyscallSucceeds()); 194 195 struct iocb cb = CreateCallback(); 196 struct iocb* cbs[] = {&cb}; 197 198 // Submit a request but don't complete it to make it pending. 199 for (int i = 0; i < 100; ++i) { 200 EXPECT_THAT(Submit(1, cbs), SyscallSucceeds()); 201 } 202 203 ASSERT_THAT(DestroyContext(), SyscallSucceeds()); 204 ctx_ = 0; 205 } 206 207 int Submitter(void* arg) { 208 auto test = reinterpret_cast<AIOTest*>(arg); 209 210 struct iocb cb = test->CreateCallback(); 211 struct iocb* cbs[1] = {&cb}; 212 213 // Submit the request. 214 TEST_CHECK(test->Submit(1, cbs) == 1); 215 return 0; 216 } 217 218 TEST_F(AIOTest, CloneVm) { 219 // Setup a context that is 128 entries deep. 220 ASSERT_THAT(SetupContext(128), SyscallSucceeds()); 221 222 const size_t kStackSize = 5 * kPageSize; 223 std::unique_ptr<char[]> stack(new char[kStackSize]); 224 char* bp = stack.get() + kStackSize; 225 pid_t child; 226 ASSERT_THAT(child = clone(Submitter, bp, CLONE_VM | SIGCHLD, 227 reinterpret_cast<void*>(this)), 228 SyscallSucceeds()); 229 230 // Get the reply. 231 struct io_event events[1]; 232 ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1)); 233 234 // Verify that it is as expected. 235 EXPECT_EQ(events[0].data, 0x123); 236 EXPECT_EQ(events[0].res, strlen(kData)); 237 238 // Verify that the file contains the contents. 239 char verify_buf[32] = {}; 240 ASSERT_THAT(read(test_file_fd_.get(), &verify_buf[0], strlen(kData)), 241 SyscallSucceeds()); 242 EXPECT_EQ(strcmp(kData, &verify_buf[0]), 0); 243 244 int status; 245 ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0), 246 SyscallSucceedsWithValue(child)); 247 EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0) 248 << " status " << status; 249 } 250 251 // Tests that AIO context can be remapped to a different address. 252 TEST_F(AIOTest, Mremap) { 253 // Setup a context that is 128 entries deep. 254 ASSERT_THAT(SetupContext(128), SyscallSucceeds()); 255 const size_t ctx_size = 256 ASSERT_NO_ERRNO_AND_VALUE(VmaSizeAt(reinterpret_cast<uintptr_t>(ctx_))); 257 258 struct iocb cb = CreateCallback(); 259 struct iocb* cbs[1] = {&cb}; 260 261 // Reserve address space for the mremap target so we have something safe to 262 // map over. 263 Mapping dst = 264 ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(ctx_size, PROT_READ, MAP_PRIVATE)); 265 266 // Remap context 'handle' to a different address. 267 ASSERT_THAT(Mremap(reinterpret_cast<void*>(ctx_), ctx_size, dst.len(), 268 MREMAP_FIXED | MREMAP_MAYMOVE, dst.ptr()), 269 IsPosixErrorOkAndHolds(dst.ptr())); 270 aio_context_t old_ctx = ctx_; 271 ctx_ = reinterpret_cast<aio_context_t>(dst.addr()); 272 // io_destroy() will unmap dst now. 273 dst.release(); 274 275 // Check that submitting the request with the old 'ctx_' fails. 276 ASSERT_THAT(SubmitCtx(old_ctx, 1, cbs), SyscallFailsWithErrno(EINVAL)); 277 278 // Submit the request with the new 'ctx_'. 279 ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1)); 280 281 // Remap again. 282 dst = ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(ctx_size, PROT_READ, MAP_PRIVATE)); 283 ASSERT_THAT(Mremap(reinterpret_cast<void*>(ctx_), ctx_size, dst.len(), 284 MREMAP_FIXED | MREMAP_MAYMOVE, dst.ptr()), 285 IsPosixErrorOkAndHolds(dst.ptr())); 286 ctx_ = reinterpret_cast<aio_context_t>(dst.addr()); 287 dst.release(); 288 289 // Get the reply with yet another 'ctx_' and verify it. 290 struct io_event events[1]; 291 ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1)); 292 EXPECT_EQ(events[0].data, 0x123); 293 EXPECT_EQ(events[0].obj, reinterpret_cast<long>(&cb)); 294 EXPECT_EQ(events[0].res, strlen(kData)); 295 296 // Verify that the file contains the contents. 297 char verify_buf[sizeof(kData)] = {}; 298 ASSERT_THAT(read(test_file_fd_.get(), verify_buf, sizeof(kData)), 299 SyscallSucceedsWithValue(strlen(kData))); 300 EXPECT_STREQ(verify_buf, kData); 301 } 302 303 // Tests that AIO context cannot be expanded with mremap. 304 TEST_F(AIOTest, MremapExpansion) { 305 // Setup a context that is 128 entries deep. 306 ASSERT_THAT(SetupContext(128), SyscallSucceeds()); 307 const size_t ctx_size = 308 ASSERT_NO_ERRNO_AND_VALUE(VmaSizeAt(reinterpret_cast<uintptr_t>(ctx_))); 309 310 // Reserve address space for the mremap target so we have something safe to 311 // map over. 312 Mapping dst = ASSERT_NO_ERRNO_AND_VALUE( 313 MmapAnon(ctx_size + kPageSize, PROT_NONE, MAP_PRIVATE)); 314 315 // Test that remapping to a larger address range fails. 316 ASSERT_THAT(Mremap(reinterpret_cast<void*>(ctx_), ctx_size, dst.len(), 317 MREMAP_FIXED | MREMAP_MAYMOVE, dst.ptr()), 318 PosixErrorIs(EFAULT, _)); 319 320 // mm/mremap.c:sys_mremap() => mremap_to() does do_munmap() of the destination 321 // before it hits the VM_DONTEXPAND check in vma_to_resize(), so we should no 322 // longer munmap it (another thread may have created a mapping there). 323 dst.release(); 324 } 325 326 // Tests that AIO calls fail if context's address is inaccessible. 327 TEST_F(AIOTest, Mprotect) { 328 // Setup a context that is 128 entries deep. 329 ASSERT_THAT(SetupContext(128), SyscallSucceeds()); 330 331 struct iocb cb = CreateCallback(); 332 struct iocb* cbs[1] = {&cb}; 333 334 ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1)); 335 336 // Makes the context 'handle' inaccessible and check that all subsequent 337 // calls fail. 338 ASSERT_THAT(mprotect(reinterpret_cast<void*>(ctx_), kPageSize, PROT_NONE), 339 SyscallSucceeds()); 340 struct io_event events[1]; 341 EXPECT_THAT(GetEvents(1, 1, events, nullptr), SyscallFailsWithErrno(EINVAL)); 342 ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EINVAL)); 343 EXPECT_THAT(DestroyContext(), SyscallFailsWithErrno(EINVAL)); 344 345 // Prevent TearDown from attempting to destroy the context and fail. 346 ctx_ = 0; 347 } 348 349 TEST_F(AIOTest, Timeout) { 350 // Setup a context that is 128 entries deep. 351 ASSERT_THAT(SetupContext(128), SyscallSucceeds()); 352 353 struct timespec timeout; 354 timeout.tv_sec = 0; 355 timeout.tv_nsec = 10; 356 struct io_event events[1]; 357 ASSERT_THAT(GetEvents(1, 1, events, &timeout), SyscallSucceedsWithValue(0)); 358 } 359 360 class AIOReadWriteParamTest : public AIOTest, 361 public ::testing::WithParamInterface<int> {}; 362 363 TEST_P(AIOReadWriteParamTest, BadOffset) { 364 // Setup a context that is 128 entries deep. 365 ASSERT_THAT(SetupContext(128), SyscallSucceeds()); 366 367 struct iocb cb = CreateCallback(); 368 struct iocb* cbs[1] = {&cb}; 369 370 // Create a buffer that we can write to. 371 char buf[] = "hello world!"; 372 cb.aio_buf = reinterpret_cast<uint64_t>(buf); 373 374 // Set the operation on the callback and give a negative offset. 375 const int opcode = GetParam(); 376 cb.aio_lio_opcode = opcode; 377 378 iovec iov = {}; 379 if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) { 380 // Create a valid iovec and set it in the callback. 381 iov.iov_base = reinterpret_cast<void*>(buf); 382 iov.iov_len = 1; 383 cb.aio_buf = reinterpret_cast<uint64_t>(&iov); 384 // aio_nbytes is the number of iovecs. 385 cb.aio_nbytes = 1; 386 } 387 388 // Pass a negative offset. 389 cb.aio_offset = -1; 390 391 // Should get error on submission. 392 ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EINVAL)); 393 } 394 395 INSTANTIATE_TEST_SUITE_P(BadOffset, AIOReadWriteParamTest, 396 ::testing::Values(IOCB_CMD_PREAD, IOCB_CMD_PWRITE, 397 IOCB_CMD_PREADV, IOCB_CMD_PWRITEV)); 398 399 class AIOVectorizedParamTest : public AIOTest, 400 public ::testing::WithParamInterface<int> {}; 401 402 TEST_P(AIOVectorizedParamTest, BadIOVecs) { 403 // Setup a context that is 128 entries deep. 404 ASSERT_THAT(SetupContext(128), SyscallSucceeds()); 405 406 struct iocb cb = CreateCallback(); 407 struct iocb* cbs[1] = {&cb}; 408 409 // Modify the callback to use the operation from the param. 410 cb.aio_lio_opcode = GetParam(); 411 412 // Create an iovec with address in kernel range, and pass that as the buffer. 413 iovec iov = {}; 414 iov.iov_base = reinterpret_cast<void*>(0xFFFFFFFF00000000); 415 iov.iov_len = 1; 416 cb.aio_buf = reinterpret_cast<uint64_t>(&iov); 417 // aio_nbytes is the number of iovecs. 418 cb.aio_nbytes = 1; 419 420 // Should get error on submission. 421 ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EFAULT)); 422 } 423 424 INSTANTIATE_TEST_SUITE_P(BadIOVecs, AIOVectorizedParamTest, 425 ::testing::Values(IOCB_CMD_PREADV, IOCB_CMD_PWRITEV)); 426 427 } // namespace 428 429 } // namespace testing 430 } // namespace gvisor