1/*
2 * PCG Random Number Generation for C++
3 *
4 * Copyright 2014-2017 Melissa O'Neill <oneill@pcg-random.org>,
5 * and the PCG Project contributors.
6 *
7 * SPDX-License-Identifier: (Apache-2.0 OR MIT)
8 *
9 * Licensed under the Apache License, Version 2.0 (provided in
10 * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
11 * or under the MIT license (provided in LICENSE-MIT.txt and at
12 * http://opensource.org/licenses/MIT), at your option. This file may not
13 * be copied, modified, or distributed except according to those terms.
14 *
15 * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
16 * express or implied. See your chosen license for details.
17 *
18 * For additional information about the PCG random number generation scheme,
19 * visit http://www.pcg-random.org/.
20 */
21
22/*
23 * This file provides support code that is useful for random-number generation
24 * but not specific to the PCG generation scheme, including:
25 * - 128-bit int support for platforms where it isn't available natively
26 * - bit twiddling operations
27 * - I/O of 128-bit and 8-bit integers
28 * - Handling the evilness of SeedSeq
29 * - Support for efficiently producing random numbers less than a given
30 * bound
31 */
32
33#ifndef PCG_EXTRAS_HPP_INCLUDED
34#define PCG_EXTRAS_HPP_INCLUDED 1
35
36#include <cinttypes>
37#include <cstddef>
38#include <cstdlib>
39#include <cstring>
40#include <cassert>
41#include <limits>
42#include <iostream>
43#include <type_traits>
44#include <utility>
45#include <locale>
46#include <iterator>
47
48#ifdef __GNUC__
49#include <cxxabi.h>
50#endif
51
52/*
53 * Abstractions for compiler-specific directives
54 */
55
56#ifdef __GNUC__
57 #define PCG_NOINLINE __attribute__((noinline))
58#else
59 #define PCG_NOINLINE
60#endif
61
62/*
63 * Some members of the PCG library use 128-bit math. When compiling on 64-bit
64 * platforms, both GCC and Clang provide 128-bit integer types that are ideal
65 * for the job.
66 *
67 * On 32-bit platforms (or with other compilers), we fall back to a C++
68 * class that provides 128-bit unsigned integers instead. It may seem
69 * like we're reinventing the wheel here, because libraries already exist
70 * that support large integers, but most existing libraries provide a very
71 * generic multiprecision code, but here we're operating at a fixed size.
72 * Also, most other libraries are fairly heavyweight. So we use a direct
73 * implementation. Sadly, it's much slower than hand-coded assembly or
74 * direct CPU support.
75 *
76 */
77#if __SIZEOF_INT128__
78 namespace pcg_extras {
79 typedef __uint128_t pcg128_t;
80 }
81 #define PCG_128BIT_CONSTANT(high,low) \
82 ((pcg_extras::pcg128_t(high) << 64) + low)
83#else
84 #include "pcg_uint128.hpp"
85 namespace pcg_extras {
86 typedef pcg_extras::uint_x4<uint32_t,uint64_t> pcg128_t;
87 }
88 #define PCG_128BIT_CONSTANT(high,low) \
89 pcg_extras::pcg128_t(high,low)
90 #define PCG_EMULATED_128BIT_MATH 1
91#endif
92
93
94namespace pcg_extras {
95
96/*
97 * We often need to represent a "number of bits". When used normally, these
98 * numbers are never greater than 128, so an unsigned char is plenty.
99 * If you're using a nonstandard generator of a larger size, you can set
100 * PCG_BITCOUNT_T to have it define it as a larger size. (Some compilers
101 * might produce faster code if you set it to an unsigned int.)
102 */
103
104#ifndef PCG_BITCOUNT_T
105 typedef uint8_t bitcount_t;
106#else
107 typedef PCG_BITCOUNT_T bitcount_t;
108#endif
109
110/*
111 * C++ requires us to be able to serialize RNG state by printing or reading
112 * it from a stream. Because we use 128-bit ints, we also need to be able
113 * ot print them, so here is code to do so.
114 *
115 * This code provides enough functionality to print 128-bit ints in decimal
116 * and zero-padded in hex. It's not a full-featured implementation.
117 */
118
119template <typename CharT, typename Traits>
120std::basic_ostream<CharT,Traits>&
121operator<<(std::basic_ostream<CharT,Traits>& out, pcg128_t value)
122{
123 auto desired_base = out.flags() & out.basefield;
124 bool want_hex = desired_base == out.hex;
125
126 if (want_hex) {
127 uint64_t highpart = uint64_t(value >> 64);
128 uint64_t lowpart = uint64_t(value);
129 auto desired_width = out.width();
130 if (desired_width > 16) {
131 out.width(desired_width - 16);
132 }
133 if (highpart != 0 || desired_width > 16)
134 out << highpart;
135 CharT oldfill = '\0';
136 if (highpart != 0) {
137 out.width(16);
138 oldfill = out.fill('0');
139 }
140 auto oldflags = out.setf(decltype(desired_base){}, out.showbase);
141 out << lowpart;
142 out.setf(oldflags);
143 if (highpart != 0) {
144 out.fill(oldfill);
145 }
146 return out;
147 }
148 constexpr size_t MAX_CHARS_128BIT = 40;
149
150 char buffer[MAX_CHARS_128BIT];
151 char* pos = buffer+sizeof(buffer);
152 *(--pos) = '\0';
153 constexpr auto BASE = pcg128_t(10ULL);
154 do {
155 auto div = value / BASE;
156 auto mod = uint32_t(value - (div * BASE));
157 *(--pos) = '0' + char(mod);
158 value = div;
159 } while(value != pcg128_t(0ULL));
160 return out << pos;
161}
162
163template <typename CharT, typename Traits>
164std::basic_istream<CharT,Traits>&
165operator>>(std::basic_istream<CharT,Traits>& in, pcg128_t& value)
166{
167 typename std::basic_istream<CharT,Traits>::sentry s(in);
168
169 if (!s)
170 return in;
171
172 constexpr auto BASE = pcg128_t(10ULL);
173 pcg128_t current(0ULL);
174 bool did_nothing = true;
175 bool overflow = false;
176 for(;;) {
177 CharT wide_ch = in.get();
178 if (!in.good())
179 break;
180 auto ch = in.narrow(wide_ch, '\0');
181 if (ch < '0' || ch > '9') {
182 in.unget();
183 break;
184 }
185 did_nothing = false;
186 pcg128_t digit(uint32_t(ch - '0'));
187 pcg128_t timesbase = current*BASE;
188 overflow = overflow || timesbase < current;
189 current = timesbase + digit;
190 overflow = overflow || current < digit;
191 }
192
193 if (did_nothing || overflow) {
194 in.setstate(std::ios::failbit);
195 if (overflow)
196 current = ~pcg128_t(0ULL);
197 }
198
199 value = current;
200
201 return in;
202}
203
204/*
205 * Likewise, if people use tiny rngs, we'll be serializing uint8_t.
206 * If we just used the provided IO operators, they'd read/write chars,
207 * not ints, so we need to define our own. We *can* redefine this operator
208 * here because we're in our own namespace.
209 */
210
211template <typename CharT, typename Traits>
212std::basic_ostream<CharT,Traits>&
213operator<<(std::basic_ostream<CharT,Traits>&out, uint8_t value)
214{
215 return out << uint32_t(value);
216}
217
218template <typename CharT, typename Traits>
219std::basic_istream<CharT,Traits>&
220operator>>(std::basic_istream<CharT,Traits>& in, uint8_t& target)
221{
222 uint32_t value = 0xdecea5edU;
223 in >> value;
224 if (!in && value == 0xdecea5edU)
225 return in;
226 if (value > uint8_t(~0)) {
227 in.setstate(std::ios::failbit);
228 value = ~0U;
229 }
230 target = uint8_t(value);
231 return in;
232}
233
234/* Unfortunately, the above functions don't get found in preference to the
235 * built in ones, so we create some more specific overloads that will.
236 * Ugh.
237 */
238
239inline std::ostream& operator<<(std::ostream& out, uint8_t value)
240{
241 return pcg_extras::operator<< <char>(out, value);
242}
243
244inline std::istream& operator>>(std::istream& in, uint8_t& value)
245{
246 return pcg_extras::operator>> <char>(in, target&: value);
247}
248
249
250
251/*
252 * Useful bitwise operations.
253 */
254
255/*
256 * XorShifts are invertable, but they are someting of a pain to invert.
257 * This function backs them out. It's used by the whacky "inside out"
258 * generator defined later.
259 */
260
261template <typename itype>
262inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift)
263{
264 if (2*shift >= bits) {
265 return x ^ (x >> shift);
266 }
267 itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1;
268 itype highmask1 = ~lowmask1;
269 itype top1 = x;
270 itype bottom1 = x & lowmask1;
271 top1 ^= top1 >> shift;
272 top1 &= highmask1;
273 x = top1 | bottom1;
274 itype lowmask2 = (itype(1U) << (bits - shift)) - 1;
275 itype bottom2 = x & lowmask2;
276 bottom2 = unxorshift(bottom2, bits - shift, shift);
277 bottom2 &= lowmask1;
278 return top1 | bottom2;
279}
280
281/*
282 * Rotate left and right.
283 *
284 * In ideal world, compilers would spot idiomatic rotate code and convert it
285 * to a rotate instruction. Of course, opinions vary on what the correct
286 * idiom is and how to spot it. For clang, sometimes it generates better
287 * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM.
288 */
289
290template <typename itype>
291inline itype rotl(itype value, bitcount_t rot)
292{
293 constexpr bitcount_t bits = sizeof(itype) * 8;
294 constexpr bitcount_t mask = bits - 1;
295#if PCG_USE_ZEROCHECK_ROTATE_IDIOM
296 return rot ? (value << rot) | (value >> (bits - rot)) : value;
297#else
298 return (value << rot) | (value >> ((- rot) & mask));
299#endif
300}
301
302template <typename itype>
303inline itype rotr(itype value, bitcount_t rot)
304{
305 constexpr bitcount_t bits = sizeof(itype) * 8;
306 constexpr bitcount_t mask = bits - 1;
307#if PCG_USE_ZEROCHECK_ROTATE_IDIOM
308 return rot ? (value >> rot) | (value << (bits - rot)) : value;
309#else
310 return (value >> rot) | (value << ((- rot) & mask));
311#endif
312}
313
314/* Unfortunately, both Clang and GCC sometimes perform poorly when it comes
315 * to properly recognizing idiomatic rotate code, so for we also provide
316 * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss.
317 * (I hope that these compilers get better so that this code can die.)
318 *
319 * These overloads will be preferred over the general template code above.
320 */
321#if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__ || __i386__)
322
323inline uint8_t rotr(uint8_t value, bitcount_t rot)
324{
325 asm ("rorb %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
326 return value;
327}
328
329inline uint16_t rotr(uint16_t value, bitcount_t rot)
330{
331 asm ("rorw %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
332 return value;
333}
334
335inline uint32_t rotr(uint32_t value, bitcount_t rot)
336{
337 asm ("rorl %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
338 return value;
339}
340
341#if __x86_64__
342inline uint64_t rotr(uint64_t value, bitcount_t rot)
343{
344 asm ("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
345 return value;
346}
347#endif // __x86_64__
348
349#elif defined(_MSC_VER)
350 // Use MSVC++ bit rotation intrinsics
351
352#pragma intrinsic(_rotr, _rotr64, _rotr8, _rotr16)
353
354inline uint8_t rotr(uint8_t value, bitcount_t rot)
355{
356 return _rotr8(value, rot);
357}
358
359inline uint16_t rotr(uint16_t value, bitcount_t rot)
360{
361 return _rotr16(value, rot);
362}
363
364inline uint32_t rotr(uint32_t value, bitcount_t rot)
365{
366 return _rotr(value, rot);
367}
368
369inline uint64_t rotr(uint64_t value, bitcount_t rot)
370{
371 return _rotr64(value, rot);
372}
373
374#endif // PCG_USE_INLINE_ASM
375
376
377/*
378 * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of
379 * 32-bit integers with seed data, but sometimes we want to produce
380 * larger or smaller integers.
381 *
382 * The following code handles this annoyance.
383 *
384 * uneven_copy will copy an array of 32-bit ints to an array of larger or
385 * smaller ints (actually, the code is general it only needing forward
386 * iterators). The copy is identical to the one that would be performed if
387 * we just did memcpy on a standard little-endian machine, but works
388 * regardless of the endian of the machine (or the weirdness of the ints
389 * involved).
390 *
391 * generate_to initializes an array of integers using a SeedSeq
392 * object. It is given the size as a static constant at compile time and
393 * tries to avoid memory allocation. If we're filling in 32-bit constants
394 * we just do it directly. If we need a separate buffer and it's small,
395 * we allocate it on the stack. Otherwise, we fall back to heap allocation.
396 * Ugh.
397 *
398 * generate_one produces a single value of some integral type using a
399 * SeedSeq object.
400 */
401
402 /* uneven_copy helper, case where destination ints are less than 32 bit. */
403
404template<class SrcIter, class DestIter>
405SrcIter uneven_copy_impl(
406 SrcIter src_first, DestIter dest_first, DestIter dest_last,
407 std::true_type)
408{
409 typedef typename std::iterator_traits<SrcIter>::value_type src_t;
410 typedef typename std::iterator_traits<DestIter>::value_type dest_t;
411
412 constexpr bitcount_t SRC_SIZE = sizeof(src_t);
413 constexpr bitcount_t DEST_SIZE = sizeof(dest_t);
414 constexpr bitcount_t DEST_BITS = DEST_SIZE * 8;
415 constexpr bitcount_t SCALE = SRC_SIZE / DEST_SIZE;
416
417 size_t count = 0;
418 src_t value = 0;
419
420 while (dest_first != dest_last) {
421 if ((count++ % SCALE) == 0)
422 value = *src_first++; // Get more bits
423 else
424 value >>= DEST_BITS; // Move down bits
425
426 *dest_first++ = dest_t(value); // Truncates, ignores high bits.
427 }
428 return src_first;
429}
430
431 /* uneven_copy helper, case where destination ints are more than 32 bit. */
432
433template<class SrcIter, class DestIter>
434SrcIter uneven_copy_impl(
435 SrcIter src_first, DestIter dest_first, DestIter dest_last,
436 std::false_type)
437{
438 typedef typename std::iterator_traits<SrcIter>::value_type src_t;
439 typedef typename std::iterator_traits<DestIter>::value_type dest_t;
440
441 constexpr auto SRC_SIZE = sizeof(src_t);
442 constexpr auto SRC_BITS = SRC_SIZE * 8;
443 constexpr auto DEST_SIZE = sizeof(dest_t);
444 constexpr auto SCALE = (DEST_SIZE+SRC_SIZE-1) / SRC_SIZE;
445
446 while (dest_first != dest_last) {
447 dest_t value(0UL);
448 unsigned int shift = 0;
449
450 for (size_t i = 0; i < SCALE; ++i) {
451 value |= dest_t(*src_first++) << shift;
452 shift += SRC_BITS;
453 }
454
455 *dest_first++ = value;
456 }
457 return src_first;
458}
459
460/* uneven_copy, call the right code for larger vs. smaller */
461
462template<class SrcIter, class DestIter>
463inline SrcIter uneven_copy(SrcIter src_first,
464 DestIter dest_first, DestIter dest_last)
465{
466 typedef typename std::iterator_traits<SrcIter>::value_type src_t;
467 typedef typename std::iterator_traits<DestIter>::value_type dest_t;
468
469 constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t);
470
471 return uneven_copy_impl(src_first, dest_first, dest_last,
472 std::integral_constant<bool, DEST_IS_SMALLER>{});
473}
474
475/* generate_to, fill in a fixed-size array of integral type using a SeedSeq
476 * (actually works for any random-access iterator)
477 */
478
479template <size_t size, typename SeedSeq, typename DestIter>
480inline void generate_to_impl(SeedSeq&& generator, DestIter dest,
481 std::true_type)
482{
483 generator.generate(dest, dest+size);
484}
485
486template <size_t size, typename SeedSeq, typename DestIter>
487void generate_to_impl(SeedSeq&& generator, DestIter dest,
488 std::false_type)
489{
490 typedef typename std::iterator_traits<DestIter>::value_type dest_t;
491 constexpr auto DEST_SIZE = sizeof(dest_t);
492 constexpr auto GEN_SIZE = sizeof(uint32_t);
493
494 constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE;
495 constexpr size_t FROM_ELEMS =
496 GEN_IS_SMALLER
497 ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE)
498 : (size + (GEN_SIZE / DEST_SIZE) - 1)
499 / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER);
500 // this odd code ^^^^^^^^^^^^^^^^^ is work-around for
501 // a bug: http://llvm.org/bugs/show_bug.cgi?id=21287
502
503 if (FROM_ELEMS <= 1024) {
504 uint32_t buffer[FROM_ELEMS];
505 generator.generate(buffer, buffer+FROM_ELEMS);
506 uneven_copy(buffer, dest, dest+size);
507 } else {
508 uint32_t* buffer = static_cast<uint32_t*>(malloc(size: GEN_SIZE * FROM_ELEMS));
509 generator.generate(buffer, buffer+FROM_ELEMS);
510 uneven_copy(buffer, dest, dest+size);
511 free(ptr: static_cast<void*>(buffer));
512 }
513}
514
515template <size_t size, typename SeedSeq, typename DestIter>
516inline void generate_to(SeedSeq&& generator, DestIter dest)
517{
518 typedef typename std::iterator_traits<DestIter>::value_type dest_t;
519 constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t);
520
521 generate_to_impl<size>(std::forward<SeedSeq>(generator), dest,
522 std::integral_constant<bool, IS_32BIT>{});
523}
524
525/* generate_one, produce a value of integral type using a SeedSeq
526 * (optionally, we can have it produce more than one and pick which one
527 * we want)
528 */
529
530template <typename UInt, size_t i = 0UL, size_t N = i+1UL, typename SeedSeq>
531inline UInt generate_one(SeedSeq&& generator)
532{
533 UInt result[N];
534 generate_to<N>(std::forward<SeedSeq>(generator), result);
535 return result[i];
536}
537
538template <typename RngType>
539auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound)
540 -> typename RngType::result_type
541{
542 typedef typename RngType::result_type rtype;
543 rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound)
544 % upper_bound;
545 for (;;) {
546 rtype r = rng() - RngType::min();
547 if (r >= threshold)
548 return r % upper_bound;
549 }
550}
551
552template <typename Iter, typename RandType>
553void shuffle(Iter from, Iter to, RandType&& rng)
554{
555 typedef typename std::iterator_traits<Iter>::difference_type delta_t;
556 typedef typename std::remove_reference<RandType>::type::result_type result_t;
557 auto count = to - from;
558 while (count > 1) {
559 delta_t chosen = delta_t(bounded_rand(rng, result_t(count)));
560 --count;
561 --to;
562 using std::swap;
563 swap(*(from + chosen), *to);
564 }
565}
566
567/*
568 * Although std::seed_seq is useful, it isn't everything. Often we want to
569 * initialize a random-number generator some other way, such as from a random
570 * device.
571 *
572 * Technically, it does not meet the requirements of a SeedSequence because
573 * it lacks some of the rarely-used member functions (some of which would
574 * be impossible to provide). However the C++ standard is quite specific
575 * that actual engines only called the generate method, so it ought not to be
576 * a problem in practice.
577 */
578
579template <typename RngType>
580class seed_seq_from {
581private:
582 RngType rng_;
583
584 typedef uint_least32_t result_type;
585
586public:
587 template<typename... Args>
588 seed_seq_from(Args&&... args) :
589 rng_(std::forward<Args>(args)...)
590 {
591 // Nothing (else) to do...
592 }
593
594 template<typename Iter>
595 void generate(Iter start, Iter finish)
596 {
597 for (auto i = start; i != finish; ++i)
598 *i = result_type(rng_());
599 }
600
601 constexpr size_t size() const
602 {
603 return (sizeof(typename RngType::result_type) > sizeof(result_type)
604 && RngType::max() > ~size_t(0UL))
605 ? ~size_t(0UL)
606 : size_t(RngType::max());
607 }
608};
609
610/*
611 * Sometimes you might want a distinct seed based on when the program
612 * was compiled. That way, a particular instance of the program will
613 * behave the same way, but when recompiled it'll produce a different
614 * value.
615 */
616
617template <typename IntType>
618struct static_arbitrary_seed {
619private:
620 static constexpr IntType fnv(IntType hash, const char* pos) {
621 return *pos == '\0'
622 ? hash
623 : fnv(hash: (hash * IntType(16777619U)) ^ *pos, pos: (pos+1));
624 }
625
626public:
627 static constexpr IntType value = fnv(hash: IntType(2166136261U ^ sizeof(IntType)),
628 __DATE__ __TIME__ __FILE__);
629};
630
631// Sometimes, when debugging or testing, it's handy to be able print the name
632// of a (in human-readable form). This code allows the idiom:
633//
634// cout << printable_typename<my_foo_type_t>()
635//
636// to print out my_foo_type_t (or its concrete type if it is a synonym)
637
638#if __cpp_rtti || __GXX_RTTI
639
640template <typename T>
641struct printable_typename {};
642
643template <typename T>
644std::ostream& operator<<(std::ostream& out, printable_typename<T>) {
645 const char *implementation_typename = typeid(T).name();
646#ifdef __GNUC__
647 int status;
648 char* pretty_name =
649 abi::__cxa_demangle(mangled_name: implementation_typename, output_buffer: nullptr, length: nullptr, status: &status);
650 if (status == 0)
651 out << pretty_name;
652 free(ptr: static_cast<void*>(pretty_name));
653 if (status == 0)
654 return out;
655#endif
656 out << implementation_typename;
657 return out;
658}
659
660#endif // __cpp_rtti || __GXX_RTTI
661
662} // namespace pcg_extras
663
664#endif // PCG_EXTRAS_HPP_INCLUDED