<random>: Implement Lemire's fast integer generation by MattStephanson · Pull Request #3012 · microsoft/STL (original) (raw)
Implements @lemire's "Fast Random Integer Generation in an Interval", https://dl.acm.org/doi/10.1145/3230636 and https://arxiv.org/abs/1805.10941. Fixes #178.
I'm not happy with the x86 or LCG performance, but I've been tinkering with it for weeks and haven't been able to improve it further. I'm using a Surface Pro 8, i5-1135G7. It's plugged in and set to "Best Performance", but I'm otherwise not very knowledgeable about how to run good microbenchmarks. If anyone has any thoughts, I'd love to hear them.
Benchmark code
#include #include <benchmark/benchmark.h>
/// Test URBGs alone
static void BM_mt19937(benchmark::State& state) { std::mt19937 gen; for (auto _ : state) { benchmark::DoNotOptimize(gen()); } } BENCHMARK(BM_mt19937);
static void BM_mt19937_64(benchmark::State& state) { std::mt19937_64 gen; for (auto _ : state) { benchmark::DoNotOptimize(gen()); } } BENCHMARK(BM_mt19937_64);
static void BM_lcg(benchmark::State& state) { std::minstd_rand gen; for (auto _ : state) { benchmark::DoNotOptimize(gen()); } } BENCHMARK(BM_lcg);
uint32_t GetMax() { std::random_device gen; std::uniform_int_distribution dist(10'000'000, 20'000'000); return dist(gen); }
static const uint32_t max = GetMax(); // random divisor to prevent strength reduction
/// Test mt19937
static void BM_raw_mt19937_old(benchmark::State& state) { std::mt19937 gen; std::_Rng_from_urng<uint32_t, decltype(gen)> rng(gen); for (auto _ : state) { benchmark::DoNotOptimize(rng(max)); } } BENCHMARK(BM_raw_mt19937_old);
static void BM_raw_mt19937_new(benchmark::State& state) { std::mt19937 gen; std::_Rng_from_urng_v2<uint32_t, decltype(gen)> rng(gen); for (auto _ : state) { benchmark::DoNotOptimize(rng(max)); } } BENCHMARK(BM_raw_mt19937_new);
/// Test mt19937_64
static void BM_raw_mt19937_64_old(benchmark::State& state) { std::mt19937_64 gen; std::_Rng_from_urng<uint64_t, decltype(gen)> rng(gen); for (auto _ : state) { benchmark::DoNotOptimize(rng(max)); } } BENCHMARK(BM_raw_mt19937_64_old);
static void BM_raw_mt19937_64_new(benchmark::State& state) { std::mt19937_64 gen; std::_Rng_from_urng_v2<uint64_t, decltype(gen)> rng(gen); for (auto _ : state) { benchmark::DoNotOptimize(rng(max)); } } BENCHMARK(BM_raw_mt19937_64_new);
/// Test minstd_rand
static void BM_raw_lcg_old(benchmark::State& state) { std::minstd_rand gen; std::_Rng_from_urng<uint32_t, decltype(gen)> rng(gen); for (auto _ : state) { benchmark::DoNotOptimize(rng(max)); } } BENCHMARK(BM_raw_lcg_old);
static void BM_raw_lcg_new(benchmark::State& state) { std::minstd_rand gen; std::_Rng_from_urng_v2<uint32_t, decltype(gen)> rng(gen); for (auto _ : state) { benchmark::DoNotOptimize(rng(max)); } } BENCHMARK(BM_raw_lcg_new);
BENCHMARK_MAIN();
Benchmark results
x86
2022-08-08T19:53:31-07:00
Running C:\Users\steph\source\repos\sandbox\Release\sandbox.exe
Run on (8 X 2424.25 MHz CPU s)
CPU Caches:
L1 Data 48 KiB (x4)
L1 Instruction 32 KiB (x4)
L2 Unified 1280 KiB (x4)
L3 Unified 8192 KiB (x1)
----------------------------------------------------------------
Benchmark Time CPU Iterations
----------------------------------------------------------------
BM_mt19937 4.38 ns 4.39 ns 160000000
BM_mt19937_64 9.79 ns 9.77 ns 64000000
BM_lcg 9.39 ns 8.54 ns 64000000
BM_raw_mt19937_old 7.75 ns 7.67 ns 112000000
BM_raw_mt19937_new 5.18 ns 5.16 ns 100000000
BM_raw_mt19937_64_old 21.2 ns 21.0 ns 32000000
BM_raw_mt19937_64_new 19.0 ns 18.8 ns 37333333
BM_raw_lcg_old 25.9 ns 26.1 ns 26352941
BM_raw_lcg_new 28.2 ns 28.3 ns 24888889
x64
2022-08-08T19:54:41-07:00
Running C:\Users\steph\source\repos\sandbox\x64\Release\sandbox.exe
Run on (8 X 2444.76 MHz CPU s)
CPU Caches:
L1 Data 48 KiB (x4)
L1 Instruction 32 KiB (x4)
L2 Unified 1280 KiB (x4)
L3 Unified 8192 KiB (x1)
----------------------------------------------------------------
Benchmark Time CPU Iterations
----------------------------------------------------------------
BM_mt19937 3.77 ns 3.75 ns 179200000
BM_mt19937_64 3.87 ns 3.84 ns 179200000
BM_lcg 3.96 ns 4.01 ns 179200000
BM_raw_mt19937_old 5.70 ns 5.72 ns 112000000
BM_raw_mt19937_new 4.20 ns 4.24 ns 165925926
BM_raw_mt19937_64_old 8.50 ns 8.58 ns 74666667
BM_raw_mt19937_64_new 4.64 ns 4.50 ns 149333333
BM_raw_lcg_old 15.2 ns 15.4 ns 49777778
BM_raw_lcg_new 17.3 ns 17.3 ns 40727273