pgregory.net/rand@v1.0.3-0.20230808192358-a0b8ce02f4da/misc/cppbench/bench.cpp (about) 1 // Copyright 2022 Gregory Petrosyan <gregory.petrosyan@gmail.com> 2 // 3 // This Source Code Form is subject to the terms of the Mozilla Public 4 // License, v. 2.0. If a copy of the MPL was not distributed with this 5 // file, You can obtain one at https://mozilla.org/MPL/2.0/. 6 7 #include "vendor/nanobench.h" 8 #include <cstdio> 9 #include <cstdint> 10 11 struct sfc64 { 12 uint64_t a; 13 uint64_t b; 14 uint64_t c; 15 uint64_t w; 16 }; 17 18 uint64_t next(sfc64& s) { 19 uint64_t out = s.a + s.b + s.w; 20 s.w++; 21 s.a = s.b ^ (s.b>>11); 22 s.b = s.c + (s.c<<3); 23 s.c = ((s.c << 24) | (s.c >> (64-24))) + out; 24 return out; 25 } 26 27 uint32_t bound_mod(uint32_t n, uint64_t v) { 28 return uint32_t(v) % n; 29 } 30 31 uint32_t bound_fp_32x32(uint32_t n, uint64_t v) { 32 uint64_t r = uint64_t(n) * uint64_t(uint32_t(v)); 33 return r >> 32; 34 } 35 36 uint32_t bound_fp_32x64(uint32_t n, uint64_t v) { 37 __uint128_t r = __uint128_t(n) * __uint128_t(v); 38 return uint32_t(r >> 64); 39 } 40 41 uint32_t nearlydivisionless(uint32_t n, sfc64& s) { 42 uint32_t x = uint32_t(next(s)); 43 uint64_t m = uint64_t(x) * uint64_t(n); 44 uint32_t l = uint32_t(m); 45 if (l < n) { 46 uint32_t t = -n % n; 47 while (l < t) { 48 x = uint32_t(next(s)); 49 m = uint64_t(x) * uint64_t(n); 50 l = uint32_t(m); 51 } 52 } 53 return m >> 32; 54 } 55 56 int main() { 57 ankerl::nanobench::Rng rng; 58 uint32_t bound = rng(); 59 60 { 61 uint64_t val = rng(); 62 63 ankerl::nanobench::Bench b; 64 b.title("modulo reduction").unit("uint32_t").epochs(239).relative(true); 65 66 b.run("modulo (biased)", [&]() { 67 uint32_t x = bound_mod(bound, val); 68 b.doNotOptimizeAway(x); 69 }); 70 b.run("32x32 fixed point (biased)", [&]() { 71 uint32_t x = bound_fp_32x32(bound, val); 72 b.doNotOptimizeAway(x); 73 }); 74 b.run("32x64 fixed point (unbiased*)", [&]() { 75 uint32_t x = bound_fp_32x64(bound, val); 76 b.doNotOptimizeAway(x); 77 }); 78 } 79 { 80 sfc64 s{rng(), rng(), rng(), 1}; 81 82 ankerl::nanobench::Bench b; 83 b.title("random number generation in range (sfc64)").unit("uint32_t").epochs(239).relative(true); 84 85 b.run("raw uint32_t", [&]() { 86 uint32_t x = next(s); 87 b.doNotOptimizeAway(x); 88 }); 89 b.run("modulo (biased)", [&]() { 90 uint32_t x = bound_mod(bound, next(s)); 91 b.doNotOptimizeAway(x); 92 }); 93 b.run("32x32 fixed point (biased)", [&]() { 94 uint32_t x = bound_fp_32x32(bound, next(s)); 95 b.doNotOptimizeAway(x); 96 }); 97 b.run("32x64 fixed point (unbiased*)", [&]() { 98 uint32_t x = bound_fp_32x64(bound, next(s)); 99 b.doNotOptimizeAway(x); 100 }); 101 b.run("Lemire's \"Nearly Divisionless\" (unbiased)", [&]() { 102 uint32_t x = nearlydivisionless(bound, s); 103 b.doNotOptimizeAway(x); 104 }); 105 } 106 }