pgregory.net/rand@v1.0.3-0.20230808192358-a0b8ce02f4da/misc/cppbench/bench.cpp (about)

     1  // Copyright 2022 Gregory Petrosyan <gregory.petrosyan@gmail.com>
     2  //
     3  // This Source Code Form is subject to the terms of the Mozilla Public
     4  // License, v. 2.0. If a copy of the MPL was not distributed with this
     5  // file, You can obtain one at https://mozilla.org/MPL/2.0/.
     6  
     7  #include "vendor/nanobench.h"
     8  #include <cstdio>
     9  #include <cstdint>
    10  
    11  struct sfc64 {
    12      uint64_t a;
    13      uint64_t b;
    14      uint64_t c;
    15      uint64_t w;
    16  };
    17  
    18  uint64_t next(sfc64& s) {
    19      uint64_t out = s.a + s.b + s.w;
    20      s.w++;
    21      s.a = s.b ^ (s.b>>11);
    22      s.b = s.c + (s.c<<3);
    23      s.c = ((s.c << 24) | (s.c >> (64-24))) + out;
    24      return out;
    25  }
    26  
    27  uint32_t bound_mod(uint32_t n, uint64_t v) {
    28      return uint32_t(v) % n;
    29  }
    30  
    31  uint32_t bound_fp_32x32(uint32_t n, uint64_t v) {
    32      uint64_t r = uint64_t(n) * uint64_t(uint32_t(v));
    33      return r >> 32;
    34  }
    35  
    36  uint32_t bound_fp_32x64(uint32_t n, uint64_t v) {
    37      __uint128_t r = __uint128_t(n) * __uint128_t(v);
    38      return uint32_t(r >> 64);
    39  }
    40  
    41  uint32_t nearlydivisionless(uint32_t n, sfc64& s) {
    42      uint32_t x = uint32_t(next(s));
    43      uint64_t m = uint64_t(x) * uint64_t(n);
    44      uint32_t l = uint32_t(m);
    45      if (l < n) {
    46          uint32_t t = -n % n;
    47          while (l < t) {
    48              x = uint32_t(next(s));
    49              m = uint64_t(x) * uint64_t(n);
    50              l = uint32_t(m);
    51          }
    52      }
    53      return m >> 32;
    54  }
    55  
    56  int main() {
    57      ankerl::nanobench::Rng rng;
    58      uint32_t bound = rng();
    59  
    60      {
    61          uint64_t val = rng();
    62  
    63          ankerl::nanobench::Bench b;
    64          b.title("modulo reduction").unit("uint32_t").epochs(239).relative(true);
    65  
    66          b.run("modulo (biased)", [&]() {
    67              uint32_t x = bound_mod(bound, val);
    68              b.doNotOptimizeAway(x);
    69          });
    70          b.run("32x32 fixed point (biased)", [&]() {
    71              uint32_t x = bound_fp_32x32(bound, val);
    72              b.doNotOptimizeAway(x);
    73          });
    74          b.run("32x64 fixed point (unbiased*)", [&]() {
    75              uint32_t x = bound_fp_32x64(bound, val);
    76              b.doNotOptimizeAway(x);
    77          });
    78      }
    79      {
    80          sfc64 s{rng(), rng(), rng(), 1};
    81  
    82          ankerl::nanobench::Bench b;
    83          b.title("random number generation in range (sfc64)").unit("uint32_t").epochs(239).relative(true);
    84  
    85          b.run("raw uint32_t", [&]() {
    86              uint32_t x = next(s);
    87              b.doNotOptimizeAway(x);
    88          });
    89          b.run("modulo (biased)", [&]() {
    90              uint32_t x = bound_mod(bound, next(s));
    91              b.doNotOptimizeAway(x);
    92          });
    93          b.run("32x32 fixed point (biased)", [&]() {
    94              uint32_t x = bound_fp_32x32(bound, next(s));
    95              b.doNotOptimizeAway(x);
    96          });
    97          b.run("32x64 fixed point (unbiased*)", [&]() {
    98              uint32_t x = bound_fp_32x64(bound, next(s));
    99              b.doNotOptimizeAway(x);
   100          });
   101          b.run("Lemire's \"Nearly Divisionless\" (unbiased)", [&]() {
   102              uint32_t x = nearlydivisionless(bound, s);
   103              b.doNotOptimizeAway(x);
   104          });
   105      }
   106  }