github.com/SagerNet/gvisor@v0.0.0-20210707092255-7731c139d75c/test/syscalls/linux/aio.cc (about)

     1  // Copyright 2018 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include <fcntl.h>
    16  #include <linux/aio_abi.h>
    17  #include <sys/mman.h>
    18  #include <sys/syscall.h>
    19  #include <sys/types.h>
    20  #include <unistd.h>
    21  
    22  #include <algorithm>
    23  #include <string>
    24  
    25  #include "gtest/gtest.h"
    26  #include "test/syscalls/linux/file_base.h"
    27  #include "test/util/cleanup.h"
    28  #include "test/util/file_descriptor.h"
    29  #include "test/util/fs_util.h"
    30  #include "test/util/memory_util.h"
    31  #include "test/util/posix_error.h"
    32  #include "test/util/proc_util.h"
    33  #include "test/util/temp_path.h"
    34  #include "test/util/test_util.h"
    35  
    36  using ::testing::_;
    37  
    38  namespace gvisor {
    39  namespace testing {
    40  namespace {
    41  
    42  // Returns the size of the VMA containing the given address.
    43  PosixErrorOr<size_t> VmaSizeAt(uintptr_t addr) {
    44    ASSIGN_OR_RETURN_ERRNO(std::string proc_self_maps,
    45                           GetContents("/proc/self/maps"));
    46    ASSIGN_OR_RETURN_ERRNO(auto entries, ParseProcMaps(proc_self_maps));
    47    // Use binary search to find the first VMA that might contain addr.
    48    ProcMapsEntry target = {};
    49    target.end = addr;
    50    auto it =
    51        std::upper_bound(entries.begin(), entries.end(), target,
    52                         [](const ProcMapsEntry& x, const ProcMapsEntry& y) {
    53                           return x.end < y.end;
    54                         });
    55    // Check that it actually contains addr.
    56    if (it == entries.end() || addr < it->start) {
    57      return PosixError(ENOENT, absl::StrCat("no VMA contains address ", addr));
    58    }
    59    return it->end - it->start;
    60  }
    61  
    62  constexpr char kData[] = "hello world!";
    63  
    64  int SubmitCtx(aio_context_t ctx, long nr, struct iocb** iocbpp) {
    65    return syscall(__NR_io_submit, ctx, nr, iocbpp);
    66  }
    67  
    68  class AIOTest : public FileTest {
    69   public:
    70    AIOTest() : ctx_(0) {}
    71  
    72    int SetupContext(unsigned int nr) {
    73      return syscall(__NR_io_setup, nr, &ctx_);
    74    }
    75  
    76    int Submit(long nr, struct iocb** iocbpp) {
    77      return SubmitCtx(ctx_, nr, iocbpp);
    78    }
    79  
    80    int GetEvents(long min, long max, struct io_event* events,
    81                  struct timespec* timeout) {
    82      return RetryEINTR(syscall)(__NR_io_getevents, ctx_, min, max, events,
    83                                 timeout);
    84    }
    85  
    86    int DestroyContext() { return syscall(__NR_io_destroy, ctx_); }
    87  
    88    void TearDown() override {
    89      FileTest::TearDown();
    90      if (ctx_ != 0) {
    91        ASSERT_THAT(DestroyContext(), SyscallSucceeds());
    92        ctx_ = 0;
    93      }
    94    }
    95  
    96    struct iocb CreateCallback() {
    97      struct iocb cb = {};
    98      cb.aio_data = 0x123;
    99      cb.aio_fildes = test_file_fd_.get();
   100      cb.aio_lio_opcode = IOCB_CMD_PWRITE;
   101      cb.aio_buf = reinterpret_cast<uint64_t>(kData);
   102      cb.aio_offset = 0;
   103      cb.aio_nbytes = strlen(kData);
   104      return cb;
   105    }
   106  
   107   protected:
   108    aio_context_t ctx_;
   109  };
   110  
   111  TEST_F(AIOTest, BasicWrite) {
   112    // Copied from fs/aio.c.
   113    constexpr unsigned AIO_RING_MAGIC = 0xa10a10a1;
   114    struct aio_ring {
   115      unsigned id;
   116      unsigned nr;
   117      unsigned head;
   118      unsigned tail;
   119      unsigned magic;
   120      unsigned compat_features;
   121      unsigned incompat_features;
   122      unsigned header_length;
   123      struct io_event io_events[0];
   124    };
   125  
   126    // Setup a context that is 128 entries deep.
   127    ASSERT_THAT(SetupContext(128), SyscallSucceeds());
   128  
   129    // Check that 'ctx_' points to a valid address. libaio uses it to check if
   130    // aio implementation uses aio_ring. gVisor doesn't and returns all zeroes.
   131    // Linux implements aio_ring, so skip the zeroes check.
   132    //
   133    // TODO(gvisor.dev/issue/204): Remove when gVisor implements aio_ring.
   134    auto ring = reinterpret_cast<struct aio_ring*>(ctx_);
   135    auto magic = IsRunningOnGvisor() ? 0 : AIO_RING_MAGIC;
   136    EXPECT_EQ(ring->magic, magic);
   137  
   138    struct iocb cb = CreateCallback();
   139    struct iocb* cbs[1] = {&cb};
   140  
   141    // Submit the request.
   142    ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1));
   143  
   144    // Get the reply.
   145    struct io_event events[1];
   146    ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1));
   147  
   148    // Verify that it is as expected.
   149    EXPECT_EQ(events[0].data, 0x123);
   150    EXPECT_EQ(events[0].obj, reinterpret_cast<long>(&cb));
   151    EXPECT_EQ(events[0].res, strlen(kData));
   152  
   153    // Verify that the file contains the contents.
   154    char verify_buf[sizeof(kData)] = {};
   155    ASSERT_THAT(read(test_file_fd_.get(), verify_buf, sizeof(kData)),
   156                SyscallSucceedsWithValue(strlen(kData)));
   157    EXPECT_STREQ(verify_buf, kData);
   158  }
   159  
   160  TEST_F(AIOTest, BadWrite) {
   161    // Create a pipe and immediately close the read end.
   162    int pipefd[2];
   163    ASSERT_THAT(pipe(pipefd), SyscallSucceeds());
   164  
   165    FileDescriptor rfd(pipefd[0]);
   166    FileDescriptor wfd(pipefd[1]);
   167  
   168    rfd.reset();  // Close the read end.
   169  
   170    // Setup a context that is 128 entries deep.
   171    ASSERT_THAT(SetupContext(128), SyscallSucceeds());
   172  
   173    struct iocb cb = CreateCallback();
   174    // Try to write to the read end.
   175    cb.aio_fildes = wfd.get();
   176    struct iocb* cbs[1] = {&cb};
   177  
   178    // Submit the request.
   179    ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1));
   180  
   181    // Get the reply.
   182    struct io_event events[1];
   183    ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1));
   184  
   185    // Verify that it fails with the right error code.
   186    EXPECT_EQ(events[0].data, 0x123);
   187    EXPECT_EQ(events[0].obj, reinterpret_cast<uint64_t>(&cb));
   188    EXPECT_LT(events[0].res, 0);
   189  }
   190  
   191  TEST_F(AIOTest, ExitWithPendingIo) {
   192    // Setup a context that is 100 entries deep.
   193    ASSERT_THAT(SetupContext(100), SyscallSucceeds());
   194  
   195    struct iocb cb = CreateCallback();
   196    struct iocb* cbs[] = {&cb};
   197  
   198    // Submit a request but don't complete it to make it pending.
   199    for (int i = 0; i < 100; ++i) {
   200      EXPECT_THAT(Submit(1, cbs), SyscallSucceeds());
   201    }
   202  
   203    ASSERT_THAT(DestroyContext(), SyscallSucceeds());
   204    ctx_ = 0;
   205  }
   206  
   207  int Submitter(void* arg) {
   208    auto test = reinterpret_cast<AIOTest*>(arg);
   209  
   210    struct iocb cb = test->CreateCallback();
   211    struct iocb* cbs[1] = {&cb};
   212  
   213    // Submit the request.
   214    TEST_CHECK(test->Submit(1, cbs) == 1);
   215    return 0;
   216  }
   217  
   218  TEST_F(AIOTest, CloneVm) {
   219    // Setup a context that is 128 entries deep.
   220    ASSERT_THAT(SetupContext(128), SyscallSucceeds());
   221  
   222    const size_t kStackSize = 5 * kPageSize;
   223    std::unique_ptr<char[]> stack(new char[kStackSize]);
   224    char* bp = stack.get() + kStackSize;
   225    pid_t child;
   226    ASSERT_THAT(child = clone(Submitter, bp, CLONE_VM | SIGCHLD,
   227                              reinterpret_cast<void*>(this)),
   228                SyscallSucceeds());
   229  
   230    // Get the reply.
   231    struct io_event events[1];
   232    ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1));
   233  
   234    // Verify that it is as expected.
   235    EXPECT_EQ(events[0].data, 0x123);
   236    EXPECT_EQ(events[0].res, strlen(kData));
   237  
   238    // Verify that the file contains the contents.
   239    char verify_buf[32] = {};
   240    ASSERT_THAT(read(test_file_fd_.get(), &verify_buf[0], strlen(kData)),
   241                SyscallSucceeds());
   242    EXPECT_EQ(strcmp(kData, &verify_buf[0]), 0);
   243  
   244    int status;
   245    ASSERT_THAT(RetryEINTR(waitpid)(child, &status, 0),
   246                SyscallSucceedsWithValue(child));
   247    EXPECT_TRUE(WIFEXITED(status) && WEXITSTATUS(status) == 0)
   248        << " status " << status;
   249  }
   250  
   251  // Tests that AIO context can be remapped to a different address.
   252  TEST_F(AIOTest, Mremap) {
   253    // Setup a context that is 128 entries deep.
   254    ASSERT_THAT(SetupContext(128), SyscallSucceeds());
   255    const size_t ctx_size =
   256        ASSERT_NO_ERRNO_AND_VALUE(VmaSizeAt(reinterpret_cast<uintptr_t>(ctx_)));
   257  
   258    struct iocb cb = CreateCallback();
   259    struct iocb* cbs[1] = {&cb};
   260  
   261    // Reserve address space for the mremap target so we have something safe to
   262    // map over.
   263    Mapping dst =
   264        ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(ctx_size, PROT_READ, MAP_PRIVATE));
   265  
   266    // Remap context 'handle' to a different address.
   267    ASSERT_THAT(Mremap(reinterpret_cast<void*>(ctx_), ctx_size, dst.len(),
   268                       MREMAP_FIXED | MREMAP_MAYMOVE, dst.ptr()),
   269                IsPosixErrorOkAndHolds(dst.ptr()));
   270    aio_context_t old_ctx = ctx_;
   271    ctx_ = reinterpret_cast<aio_context_t>(dst.addr());
   272    // io_destroy() will unmap dst now.
   273    dst.release();
   274  
   275    // Check that submitting the request with the old 'ctx_' fails.
   276    ASSERT_THAT(SubmitCtx(old_ctx, 1, cbs), SyscallFailsWithErrno(EINVAL));
   277  
   278    // Submit the request with the new 'ctx_'.
   279    ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1));
   280  
   281    // Remap again.
   282    dst = ASSERT_NO_ERRNO_AND_VALUE(MmapAnon(ctx_size, PROT_READ, MAP_PRIVATE));
   283    ASSERT_THAT(Mremap(reinterpret_cast<void*>(ctx_), ctx_size, dst.len(),
   284                       MREMAP_FIXED | MREMAP_MAYMOVE, dst.ptr()),
   285                IsPosixErrorOkAndHolds(dst.ptr()));
   286    ctx_ = reinterpret_cast<aio_context_t>(dst.addr());
   287    dst.release();
   288  
   289    // Get the reply with yet another 'ctx_' and verify it.
   290    struct io_event events[1];
   291    ASSERT_THAT(GetEvents(1, 1, events, nullptr), SyscallSucceedsWithValue(1));
   292    EXPECT_EQ(events[0].data, 0x123);
   293    EXPECT_EQ(events[0].obj, reinterpret_cast<long>(&cb));
   294    EXPECT_EQ(events[0].res, strlen(kData));
   295  
   296    // Verify that the file contains the contents.
   297    char verify_buf[sizeof(kData)] = {};
   298    ASSERT_THAT(read(test_file_fd_.get(), verify_buf, sizeof(kData)),
   299                SyscallSucceedsWithValue(strlen(kData)));
   300    EXPECT_STREQ(verify_buf, kData);
   301  }
   302  
   303  // Tests that AIO context cannot be expanded with mremap.
   304  TEST_F(AIOTest, MremapExpansion) {
   305    // Setup a context that is 128 entries deep.
   306    ASSERT_THAT(SetupContext(128), SyscallSucceeds());
   307    const size_t ctx_size =
   308        ASSERT_NO_ERRNO_AND_VALUE(VmaSizeAt(reinterpret_cast<uintptr_t>(ctx_)));
   309  
   310    // Reserve address space for the mremap target so we have something safe to
   311    // map over.
   312    Mapping dst = ASSERT_NO_ERRNO_AND_VALUE(
   313        MmapAnon(ctx_size + kPageSize, PROT_NONE, MAP_PRIVATE));
   314  
   315    // Test that remapping to a larger address range fails.
   316    ASSERT_THAT(Mremap(reinterpret_cast<void*>(ctx_), ctx_size, dst.len(),
   317                       MREMAP_FIXED | MREMAP_MAYMOVE, dst.ptr()),
   318                PosixErrorIs(EFAULT, _));
   319  
   320    // mm/mremap.c:sys_mremap() => mremap_to() does do_munmap() of the destination
   321    // before it hits the VM_DONTEXPAND check in vma_to_resize(), so we should no
   322    // longer munmap it (another thread may have created a mapping there).
   323    dst.release();
   324  }
   325  
   326  // Tests that AIO calls fail if context's address is inaccessible.
   327  TEST_F(AIOTest, Mprotect) {
   328    // Setup a context that is 128 entries deep.
   329    ASSERT_THAT(SetupContext(128), SyscallSucceeds());
   330  
   331    struct iocb cb = CreateCallback();
   332    struct iocb* cbs[1] = {&cb};
   333  
   334    ASSERT_THAT(Submit(1, cbs), SyscallSucceedsWithValue(1));
   335  
   336    // Makes the context 'handle' inaccessible and check that all subsequent
   337    // calls fail.
   338    ASSERT_THAT(mprotect(reinterpret_cast<void*>(ctx_), kPageSize, PROT_NONE),
   339                SyscallSucceeds());
   340    struct io_event events[1];
   341    EXPECT_THAT(GetEvents(1, 1, events, nullptr), SyscallFailsWithErrno(EINVAL));
   342    ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EINVAL));
   343    EXPECT_THAT(DestroyContext(), SyscallFailsWithErrno(EINVAL));
   344  
   345    // Prevent TearDown from attempting to destroy the context and fail.
   346    ctx_ = 0;
   347  }
   348  
   349  TEST_F(AIOTest, Timeout) {
   350    // Setup a context that is 128 entries deep.
   351    ASSERT_THAT(SetupContext(128), SyscallSucceeds());
   352  
   353    struct timespec timeout;
   354    timeout.tv_sec = 0;
   355    timeout.tv_nsec = 10;
   356    struct io_event events[1];
   357    ASSERT_THAT(GetEvents(1, 1, events, &timeout), SyscallSucceedsWithValue(0));
   358  }
   359  
   360  class AIOReadWriteParamTest : public AIOTest,
   361                                public ::testing::WithParamInterface<int> {};
   362  
   363  TEST_P(AIOReadWriteParamTest, BadOffset) {
   364    // Setup a context that is 128 entries deep.
   365    ASSERT_THAT(SetupContext(128), SyscallSucceeds());
   366  
   367    struct iocb cb = CreateCallback();
   368    struct iocb* cbs[1] = {&cb};
   369  
   370    // Create a buffer that we can write to.
   371    char buf[] = "hello world!";
   372    cb.aio_buf = reinterpret_cast<uint64_t>(buf);
   373  
   374    // Set the operation on the callback and give a negative offset.
   375    const int opcode = GetParam();
   376    cb.aio_lio_opcode = opcode;
   377  
   378    iovec iov = {};
   379    if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) {
   380      // Create a valid iovec and set it in the callback.
   381      iov.iov_base = reinterpret_cast<void*>(buf);
   382      iov.iov_len = 1;
   383      cb.aio_buf = reinterpret_cast<uint64_t>(&iov);
   384      // aio_nbytes is the number of iovecs.
   385      cb.aio_nbytes = 1;
   386    }
   387  
   388    // Pass a negative offset.
   389    cb.aio_offset = -1;
   390  
   391    // Should get error on submission.
   392    ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EINVAL));
   393  }
   394  
   395  INSTANTIATE_TEST_SUITE_P(BadOffset, AIOReadWriteParamTest,
   396                           ::testing::Values(IOCB_CMD_PREAD, IOCB_CMD_PWRITE,
   397                                             IOCB_CMD_PREADV, IOCB_CMD_PWRITEV));
   398  
   399  class AIOVectorizedParamTest : public AIOTest,
   400                                 public ::testing::WithParamInterface<int> {};
   401  
   402  TEST_P(AIOVectorizedParamTest, BadIOVecs) {
   403    // Setup a context that is 128 entries deep.
   404    ASSERT_THAT(SetupContext(128), SyscallSucceeds());
   405  
   406    struct iocb cb = CreateCallback();
   407    struct iocb* cbs[1] = {&cb};
   408  
   409    // Modify the callback to use the operation from the param.
   410    cb.aio_lio_opcode = GetParam();
   411  
   412    // Create an iovec with address in kernel range, and pass that as the buffer.
   413    iovec iov = {};
   414    iov.iov_base = reinterpret_cast<void*>(0xFFFFFFFF00000000);
   415    iov.iov_len = 1;
   416    cb.aio_buf = reinterpret_cast<uint64_t>(&iov);
   417    // aio_nbytes is the number of iovecs.
   418    cb.aio_nbytes = 1;
   419  
   420    // Should get error on submission.
   421    ASSERT_THAT(Submit(1, cbs), SyscallFailsWithErrno(EFAULT));
   422  }
   423  
   424  INSTANTIATE_TEST_SUITE_P(BadIOVecs, AIOVectorizedParamTest,
   425                           ::testing::Values(IOCB_CMD_PREADV, IOCB_CMD_PWRITEV));
   426  
   427  }  // namespace
   428  
   429  }  // namespace testing
   430  }  // namespace gvisor