gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/test/perf/linux/futex_benchmark.cc

gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/test/perf/linux/futex_benchmark.cc (about)

     1  // Copyright 2020 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include <linux/futex.h>
    16  
    17  #include <atomic>
    18  #include <cerrno>
    19  #include <cstdint>
    20  #include <cstdlib>
    21  #include <ctime>
    22  
    23  #include "gtest/gtest.h"
    24  #include "absl/time/clock.h"
    25  #include "absl/time/time.h"
    26  #include "benchmark/benchmark.h"
    27  #include "test/util/logging.h"
    28  #include "test/util/thread_util.h"
    29  
    30  namespace gvisor {
    31  namespace testing {
    32  
    33  namespace {
    34  
    35  inline int FutexWait(std::atomic<int32_t>* v, int32_t val) {
    36    return syscall(SYS_futex, v, FUTEX_WAIT_PRIVATE, val, nullptr);
    37  }
    38  
    39  inline int FutexWaitMonotonicTimeout(std::atomic<int32_t>* v, int32_t val,
    40                                       const struct timespec* timeout) {
    41    return syscall(SYS_futex, v, FUTEX_WAIT_PRIVATE, val, timeout);
    42  }
    43  
    44  inline int FutexWaitMonotonicDeadline(std::atomic<int32_t>* v, int32_t val,
    45                                        const struct timespec* deadline) {
    46    return syscall(SYS_futex, v, FUTEX_WAIT_BITSET_PRIVATE, val, deadline,
    47                   nullptr, FUTEX_BITSET_MATCH_ANY);
    48  }
    49  
    50  inline int FutexWaitRealtimeDeadline(std::atomic<int32_t>* v, int32_t val,
    51                                       const struct timespec* deadline) {
    52    return syscall(SYS_futex, v, FUTEX_WAIT_BITSET_PRIVATE | FUTEX_CLOCK_REALTIME,
    53                   val, deadline, nullptr, FUTEX_BITSET_MATCH_ANY);
    54  }
    55  
    56  inline int FutexWake(std::atomic<int32_t>* v, int32_t count) {
    57    return syscall(SYS_futex, v, FUTEX_WAKE_PRIVATE, count);
    58  }
    59  
    60  // This just uses FUTEX_WAKE on an address with nothing waiting, very simple.
    61  void BM_FutexWakeNop(benchmark::State& state) {
    62    std::atomic<int32_t> v(0);
    63  
    64    for (auto _ : state) {
    65      TEST_PCHECK(FutexWake(&v, 1) == 0);
    66    }
    67  }
    68  
    69  BENCHMARK(BM_FutexWakeNop)->MinTime(5);
    70  
    71  // This just uses FUTEX_WAIT on an address whose value has changed, i.e., the
    72  // syscall won't wait.
    73  void BM_FutexWaitNop(benchmark::State& state) {
    74    std::atomic<int32_t> v(0);
    75  
    76    for (auto _ : state) {
    77      TEST_PCHECK(FutexWait(&v, 1) == -1 && errno == EAGAIN);
    78    }
    79  }
    80  
    81  BENCHMARK(BM_FutexWaitNop)->MinTime(5);
    82  
    83  // This uses FUTEX_WAIT with a timeout on an address whose value never
    84  // changes, such that it always times out. Timeout overhead can be estimated by
    85  // timer overruns for short timeouts.
    86  void BM_FutexWaitMonotonicTimeout(benchmark::State& state) {
    87    const absl::Duration timeout = absl::Nanoseconds(state.range(0));
    88    std::atomic<int32_t> v(0);
    89    auto ts = absl::ToTimespec(timeout);
    90  
    91    for (auto _ : state) {
    92      TEST_PCHECK(FutexWaitMonotonicTimeout(&v, 0, &ts) == -1 &&
    93                  errno == ETIMEDOUT);
    94    }
    95  }
    96  
    97  BENCHMARK(BM_FutexWaitMonotonicTimeout)
    98      ->MinTime(5)
    99      ->UseRealTime()
   100      ->Arg(1)
   101      ->Arg(10)
   102      ->Arg(100)
   103      ->Arg(1000)
   104      ->Arg(10000);
   105  
   106  // This uses FUTEX_WAIT_BITSET with a deadline that is in the past. This allows
   107  // estimation of the overhead of setting up a timer for a deadline (as opposed
   108  // to a timeout as specified for FUTEX_WAIT).
   109  void BM_FutexWaitMonotonicDeadline(benchmark::State& state) {
   110    std::atomic<int32_t> v(0);
   111    struct timespec ts = {};
   112  
   113    for (auto _ : state) {
   114      TEST_PCHECK(FutexWaitMonotonicDeadline(&v, 0, &ts) == -1 &&
   115                  errno == ETIMEDOUT);
   116    }
   117  }
   118  
   119  BENCHMARK(BM_FutexWaitMonotonicDeadline)->MinTime(5);
   120  
   121  // This is equivalent to BM_FutexWaitMonotonicDeadline, but uses CLOCK_REALTIME
   122  // instead of CLOCK_MONOTONIC for the deadline.
   123  void BM_FutexWaitRealtimeDeadline(benchmark::State& state) {
   124    std::atomic<int32_t> v(0);
   125    struct timespec ts = {};
   126  
   127    for (auto _ : state) {
   128      TEST_PCHECK(FutexWaitRealtimeDeadline(&v, 0, &ts) == -1 &&
   129                  errno == ETIMEDOUT);
   130    }
   131  }
   132  
   133  BENCHMARK(BM_FutexWaitRealtimeDeadline)->MinTime(5);
   134  
   135  int64_t GetCurrentMonotonicTimeNanos() {
   136    struct timespec ts;
   137    TEST_CHECK(clock_gettime(CLOCK_MONOTONIC, &ts) != -1);
   138    return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
   139  }
   140  
   141  void SpinNanos(int64_t delay_ns) {
   142    if (delay_ns <= 0) {
   143      return;
   144    }
   145    const int64_t end = GetCurrentMonotonicTimeNanos() + delay_ns;
   146    while (GetCurrentMonotonicTimeNanos() < end) {
   147      // spin
   148    }
   149  }
   150  
   151  // Each iteration of FutexRoundtripDelayed involves a thread sending a futex
   152  // wakeup to another thread, which spins for delay_us and then sends a futex
   153  // wakeup back. The time per iteration is 2 * (delay_us + kBeforeWakeDelayNs +
   154  // futex/scheduling overhead).
   155  void BM_FutexRoundtripDelayed(benchmark::State& state) {
   156    const int delay_us = state.range(0);
   157    const int64_t delay_ns = delay_us * 1000;
   158    // Spin for an extra kBeforeWakeDelayNs before invoking FUTEX_WAKE to reduce
   159    // the probability that the wakeup comes before the wait, preventing the wait
   160    // from ever taking effect and causing the benchmark to underestimate the
   161    // actual wakeup time.
   162    constexpr int64_t kBeforeWakeDelayNs = 500;
   163    std::atomic<int32_t> v(0);
   164    ScopedThread t([&] {
   165      for (benchmark::IterationCount i = 0; i < state.max_iterations; i++) {
   166        SpinNanos(delay_ns);
   167        while (v.load(std::memory_order_acquire) == 0) {
   168          FutexWait(&v, 0);
   169        }
   170        SpinNanos(kBeforeWakeDelayNs + delay_ns);
   171        v.store(0, std::memory_order_release);
   172        FutexWake(&v, 1);
   173      }
   174    });
   175    for (auto _ : state) {
   176      SpinNanos(kBeforeWakeDelayNs + delay_ns);
   177      v.store(1, std::memory_order_release);
   178      FutexWake(&v, 1);
   179      SpinNanos(delay_ns);
   180      while (v.load(std::memory_order_acquire) == 1) {
   181        FutexWait(&v, 1);
   182      }
   183    }
   184  }
   185  
   186  BENCHMARK(BM_FutexRoundtripDelayed)
   187      ->MinTime(5)
   188      ->UseRealTime()
   189      ->Arg(0)
   190      ->Arg(10)
   191      ->Arg(20)
   192      ->Arg(50)
   193      ->Arg(100);
   194  
   195  }  // namespace
   196  
   197  }  // namespace testing
   198  }  // namespace gvisor