<random>: Optimize uniform_int_distribution with multiply-shift (original) (raw)

I reimplemented uniform_int_distribution long ago:

template <class _Diff, class _Urng>
class _Rng_from_urng { // wrap a URNG as an RNG
public:
using _Ty0 = make_unsigned_t<_Diff>;
using _Ty1 = typename _Urng::result_type;
using _Udiff = conditional_t<sizeof(_Ty1) < sizeof(_Ty0), _Ty0, _Ty1>;
explicit _Rng_from_urng(_Urng& _Func) : _Ref(_Func), _Bits(CHAR_BIT * sizeof(_Udiff)), _Bmask(_Udiff(-1)) {
for (; (_Urng::max)() - (_Urng::min)() < _Bmask; _Bmask >>= 1) {
--_Bits;
}
}
_Diff operator()(_Diff _Index) { // adapt _Urng closed range to [0, _Index)
for (;;) { // try a sample random value
_Udiff _Ret = 0; // random bits
_Udiff _Mask = 0; // 2^N - 1, _Ret is within [0, _Mask]
while (_Mask < _Udiff(_Index - 1)) { // need more random bits
_Ret <<= _Bits - 1; // avoid full shift
_Ret <<= 1;
_Ret |= _Get_bits();
_Mask <<= _Bits - 1; // avoid full shift
_Mask <<= 1;
_Mask |= _Bmask;
}
// _Ret is [0, _Mask], _Index - 1 <= _Mask, return if unbiased
if (_Ret / _Index < _Mask / _Index |
return static_cast<_Diff>(_Ret % _Index);
}
}
}
_Udiff _Get_all_bits() {
_Udiff _Ret = 0;
for (size_t _Num = 0; _Num < CHAR_BIT * sizeof(_Udiff); _Num += _Bits) { // don't mask away any bits
_Ret <<= _Bits - 1; // avoid full shift
_Ret <<= 1;
_Ret |= _Get_bits();
}
return _Ret;
}
_Rng_from_urng(const _Rng_from_urng&) = delete;
_Rng_from_urng& operator=(const _Rng_from_urng&) = delete;
private:
_Udiff _Get_bits() { // return a random value within [0, _Bmask]
for (;;) { // repeat until random value is in range
_Udiff _Val = _Ref() - (_Urng::min)();
if (_Val <= _Bmask) {
return _Val;
}
}
}
_Urng& _Ref; // reference to URNG
size_t _Bits; // number of random bits generated by _Get_bits()
_Udiff _Bmask; // 2^_Bits - 1
};

This technique described by Daniel Lemire should be a significant improvement: https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/