pcg_extras.hpp source code [Velox/build/_deps/duckdb-src/third_party/pcg/pcg_extras.hpp]

1	/*
2	* PCG Random Number Generation for C++
3	*
4	* Copyright 2014-2017 Melissa O'Neill <oneill@pcg-random.org>,
5	* and the PCG Project contributors.
6	*
7	* SPDX-License-Identifier: (Apache-2.0 OR MIT)
8	*
9	* Licensed under the Apache License, Version 2.0 (provided in
10	* LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
11	* or under the MIT license (provided in LICENSE-MIT.txt and at
12	* http://opensource.org/licenses/MIT), at your option. This file may not
13	* be copied, modified, or distributed except according to those terms.
14	*
15	* Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
16	* express or implied. See your chosen license for details.
17	*
18	* For additional information about the PCG random number generation scheme,
19	* visit http://www.pcg-random.org/.
20	*/
21
22	/*
23	* This file provides support code that is useful for random-number generation
24	* but not specific to the PCG generation scheme, including:
25	* - 128-bit int support for platforms where it isn't available natively
26	* - bit twiddling operations
27	* - I/O of 128-bit and 8-bit integers
28	* - Handling the evilness of SeedSeq
29	* - Support for efficiently producing random numbers less than a given
30	* bound
31	*/
32
33	#ifndef PCG_EXTRAS_HPP_INCLUDED
34	#define PCG_EXTRAS_HPP_INCLUDED 1
35
36	#include <cinttypes>
37	#include <cstddef>
38	#include <cstdlib>
39	#include <cstring>
40	#include <cassert>
41	#include <limits>
42	#include <iostream>
43	#include <type_traits>
44	#include <utility>
45	#include <locale>
46	#include <iterator>
47
48	#ifdef __GNUC__
49	#include <cxxabi.h>
50	#endif
51
52	/*
53	* Abstractions for compiler-specific directives
54	*/
55
56	#ifdef __GNUC__
57	#define PCG_NOINLINE __attribute__((noinline))
58	#else
59	#define PCG_NOINLINE
60	#endif
61
62	/*
63	* Some members of the PCG library use 128-bit math. When compiling on 64-bit
64	* platforms, both GCC and Clang provide 128-bit integer types that are ideal
65	* for the job.
66	*
67	* On 32-bit platforms (or with other compilers), we fall back to a C++
68	* class that provides 128-bit unsigned integers instead. It may seem
69	* like we're reinventing the wheel here, because libraries already exist
70	* that support large integers, but most existing libraries provide a very
71	* generic multiprecision code, but here we're operating at a fixed size.
72	* Also, most other libraries are fairly heavyweight. So we use a direct
73	* implementation. Sadly, it's much slower than hand-coded assembly or
74	* direct CPU support.
75	*
76	*/
77	#if __SIZEOF_INT128__
78	namespace pcg_extras {
79	typedef __uint128_t pcg128_t;
80	}
81	#define PCG_128BIT_CONSTANT(high,low) \
82	((pcg_extras::pcg128_t(high) << 64) + low)
83	#else
84	#include "pcg_uint128.hpp"
85	namespace pcg_extras {
86	typedef pcg_extras::uint_x4<uint32_t,uint64_t> pcg128_t;
87	}
88	#define PCG_128BIT_CONSTANT(high,low) \
89	pcg_extras::pcg128_t(high,low)
90	#define PCG_EMULATED_128BIT_MATH 1
91	#endif
92
93
94	namespace pcg_extras {
95
96	/*
97	* We often need to represent a "number of bits". When used normally, these
98	* numbers are never greater than 128, so an unsigned char is plenty.
99	* If you're using a nonstandard generator of a larger size, you can set
100	* PCG_BITCOUNT_T to have it define it as a larger size. (Some compilers
101	* might produce faster code if you set it to an unsigned int.)
102	*/
103
104	#ifndef PCG_BITCOUNT_T
105	typedef uint8_t bitcount_t;
106	#else
107	typedef PCG_BITCOUNT_T bitcount_t;
108	#endif
109
110	/*
111	* C++ requires us to be able to serialize RNG state by printing or reading
112	* it from a stream. Because we use 128-bit ints, we also need to be able
113	* ot print them, so here is code to do so.
114	*
115	* This code provides enough functionality to print 128-bit ints in decimal
116	* and zero-padded in hex. It's not a full-featured implementation.
117	*/
118
119	template <typename CharT, typename Traits>
120	std::basic_ostream<CharT,Traits>&
121	operator<<(std::basic_ostream<CharT,Traits>& out, pcg128_t value)
122	{
123	auto desired_base = out.flags() & out.basefield;
124	bool want_hex = desired_base == out.hex;
125
126	if (want_hex) {
127	uint64_t highpart = uint64_t(value >> `64`);
128	uint64_t lowpart = uint64_t(value);
129	auto desired_width = out.width();
130	if (desired_width > `16`) {
131	out.width(desired_width - `16`);
132	}
133	if (highpart != `0` \|\| desired_width > `16`)
134	out << highpart;
135	CharT oldfill = `'\0'`;
136	if (highpart != `0`) {
137	out.width(`16`);
138	oldfill = out.fill(`'0'`);
139	}
140	auto oldflags = out.setf(decltype(desired_base){}, out.showbase);
141	out << lowpart;
142	out.setf(oldflags);
143	if (highpart != `0`) {
144	out.fill(oldfill);
145	}
146	return out;
147	}
148	constexpr size_t MAX_CHARS_128BIT = `40`;
149
150	char buffer[MAX_CHARS_128BIT];
151	char* pos = buffer+sizeof(buffer);
152	*(--pos) = `'\0'`;
153	constexpr auto BASE = pcg128_t(`10ULL`);
154	do {
155	auto div = value / BASE;
156	auto mod = uint32_t(value - (div * BASE));
157	(--pos) = `'0'` + char*(mod);
158	value = div;
159	} while(value != pcg128_t(`0ULL`));
160	return out << pos;
161	}
162
163	template <typename CharT, typename Traits>
164	std::basic_istream<CharT,Traits>&
165	operator>>(std::basic_istream<CharT,Traits>& in, pcg128_t& value)
166	{
167	typename std::basic_istream<CharT,Traits>::sentry s(in);
168
169	if (!s)
170	return in;
171
172	constexpr auto BASE = pcg128_t(`10ULL`);
173	pcg128_t current(`0ULL`);
174	bool did_nothing = true;
175	bool overflow = false;
176	for(;;) {
177	CharT wide_ch = in.get();
178	if (!in.good())
179	break;
180	auto ch = in.narrow(wide_ch, `'\0'`);
181	if (ch < `'0'` \|\| ch > `'9'`) {
182	in.unget();
183	break;
184	}
185	did_nothing = false;
186	pcg128_t digit(uint32_t(ch - `'0'`));
187	pcg128_t timesbase = current*BASE;
188	overflow = overflow \|\| timesbase < current;
189	current = timesbase + digit;
190	overflow = overflow \|\| current < digit;
191	}
192
193	if (did_nothing \|\| overflow) {
194	in.setstate(std::ios::failbit);
195	if (overflow)
196	current = ~pcg128_t(`0ULL`);
197	}
198
199	value = current;
200
201	return in;
202	}
203
204	/*
205	* Likewise, if people use tiny rngs, we'll be serializing uint8_t.
206	* If we just used the provided IO operators, they'd read/write chars,
207	* not ints, so we need to define our own. We can redefine this operator
208	* here because we're in our own namespace.
209	*/
210
211	template <typename CharT, typename Traits>
212	std::basic_ostream<CharT,Traits>&
213	operator<<(std::basic_ostream<CharT,Traits>&out, uint8_t value)
214	{
215	return out << uint32_t(value);
216	}
217
218	template <typename CharT, typename Traits>
219	std::basic_istream<CharT,Traits>&
220	operator>>(std::basic_istream<CharT,Traits>& in, uint8_t& target)
221	{
222	uint32_t value = `0xdecea5edU`;
223	in >> value;
224	if (!in && value == `0xdecea5edU`)
225	return in;
226	if (value > uint8_t(~`0`)) {
227	in.setstate(std::ios::failbit);
228	value = ~`0U`;
229	}
230	target = uint8_t(value);
231	return in;
232	}
233
234	/ Unfortunately, the above functions don't get found in preference to the*
235	* built in ones, so we create some more specific overloads that will.
236	* Ugh.
237	*/
238
239	inline std::ostream& operator<<(std::ostream& out, uint8_t value)
240	{
241	return pcg_extras::operator<< <char>(out, value);
242	}
243
244	inline std::istream& operator>>(std::istream& in, uint8_t& value)
245	{
246	return pcg_extras::operator>> <char>(in, target&: value);
247	}
248
249
250
251	/*
252	* Useful bitwise operations.
253	*/
254
255	/*
256	* XorShifts are invertable, but they are someting of a pain to invert.
257	* This function backs them out. It's used by the whacky "inside out"
258	* generator defined later.
259	*/
260
261	template <typename itype>
262	inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift)
263	{
264	if (`2`*shift >= bits) {
265	return x ^ (x >> shift);
266	}
267	itype lowmask1 = (itype(`1U`) << (bits - shift*`2`)) - `1`;
268	itype highmask1 = ~lowmask1;
269	itype top1 = x;
270	itype bottom1 = x & lowmask1;
271	top1 ^= top1 >> shift;
272	top1 &= highmask1;
273	x = top1 \| bottom1;
274	itype lowmask2 = (itype(`1U`) << (bits - shift)) - `1`;
275	itype bottom2 = x & lowmask2;
276	bottom2 = unxorshift(bottom2, bits - shift, shift);
277	bottom2 &= lowmask1;
278	return top1 \| bottom2;
279	}
280
281	/*
282	* Rotate left and right.
283	*
284	* In ideal world, compilers would spot idiomatic rotate code and convert it
285	* to a rotate instruction. Of course, opinions vary on what the correct
286	* idiom is and how to spot it. For clang, sometimes it generates better
287	* (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM.
288	*/
289
290	template <typename itype>
291	inline itype rotl(itype value, bitcount_t rot)
292	{
293	constexpr bitcount_t bits = sizeof(itype) * `8`;
294	constexpr bitcount_t mask = bits - `1`;
295	#if PCG_USE_ZEROCHECK_ROTATE_IDIOM
296	return rot ? (value << rot) \| (value >> (bits - rot)) : value;
297	#else
298	return (value << rot) \| (value >> ((- rot) & mask));
299	#endif
300	}
301
302	template <typename itype>
303	inline itype rotr(itype value, bitcount_t rot)
304	{
305	constexpr bitcount_t bits = sizeof(itype) * `8`;
306	constexpr bitcount_t mask = bits - `1`;
307	#if PCG_USE_ZEROCHECK_ROTATE_IDIOM
308	return rot ? (value >> rot) \| (value << (bits - rot)) : value;
309	#else
310	return (value >> rot) \| (value << ((- rot) & mask));
311	#endif
312	}
313
314	/ Unfortunately, both Clang and GCC sometimes perform poorly when it comes*
315	* to properly recognizing idiomatic rotate code, so for we also provide
316	* assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss.
317	* (I hope that these compilers get better so that this code can die.)
318	*
319	* These overloads will be preferred over the general template code above.
320	*/
321	#if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__ \|\| __i386__)
322
323	inline uint8_t rotr(uint8_t value, bitcount_t rot)
324	{
325	asm ("rorb %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
326	return value;
327	}
328
329	inline uint16_t rotr(uint16_t value, bitcount_t rot)
330	{
331	asm ("rorw %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
332	return value;
333	}
334
335	inline uint32_t rotr(uint32_t value, bitcount_t rot)
336	{
337	asm ("rorl %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
338	return value;
339	}
340
341	#if __x86_64__
342	inline uint64_t rotr(uint64_t value, bitcount_t rot)
343	{
344	asm ("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot));
345	return value;
346	}
347	#endif // __x86_64__
348
349	#elif defined(_MSC_VER)
350	// Use MSVC++ bit rotation intrinsics
351
352	#pragma intrinsic(_rotr, _rotr64, _rotr8, _rotr16)
353
354	inline uint8_t rotr(uint8_t value, bitcount_t rot)
355	{
356	return _rotr8(value, rot);
357	}
358
359	inline uint16_t rotr(uint16_t value, bitcount_t rot)
360	{
361	return _rotr16(value, rot);
362	}
363
364	inline uint32_t rotr(uint32_t value, bitcount_t rot)
365	{
366	return _rotr(value, rot);
367	}
368
369	inline uint64_t rotr(uint64_t value, bitcount_t rot)
370	{
371	return _rotr64(value, rot);
372	}
373
374	#endif // PCG_USE_INLINE_ASM
375
376
377	/*
378	* The C++ SeedSeq concept (modelled by seed_seq) can fill an array of
379	* 32-bit integers with seed data, but sometimes we want to produce
380	* larger or smaller integers.
381	*
382	* The following code handles this annoyance.
383	*
384	* uneven_copy will copy an array of 32-bit ints to an array of larger or
385	* smaller ints (actually, the code is general it only needing forward
386	* iterators). The copy is identical to the one that would be performed if
387	* we just did memcpy on a standard little-endian machine, but works
388	* regardless of the endian of the machine (or the weirdness of the ints
389	* involved).
390	*
391	* generate_to initializes an array of integers using a SeedSeq
392	* object. It is given the size as a static constant at compile time and
393	* tries to avoid memory allocation. If we're filling in 32-bit constants
394	* we just do it directly. If we need a separate buffer and it's small,
395	* we allocate it on the stack. Otherwise, we fall back to heap allocation.
396	* Ugh.
397	*
398	* generate_one produces a single value of some integral type using a
399	* SeedSeq object.
400	*/
401
402	/ uneven_copy helper, case where destination ints are less than 32 bit. /
403
404	template<class SrcIter, class DestIter>
405	SrcIter uneven_copy_impl(
406	SrcIter src_first, DestIter dest_first, DestIter dest_last,
407	std::true_type)
408	{
409	typedef typename std::iterator_traits<SrcIter>::value_type src_t;
410	typedef typename std::iterator_traits<DestIter>::value_type dest_t;
411
412	constexpr bitcount_t SRC_SIZE = sizeof(src_t);
413	constexpr bitcount_t DEST_SIZE = sizeof(dest_t);
414	constexpr bitcount_t DEST_BITS = DEST_SIZE * `8`;
415	constexpr bitcount_t SCALE = SRC_SIZE / DEST_SIZE;
416
417	size_t count = `0`;
418	src_t value = `0`;
419
420	while (dest_first != dest_last) {
421	if ((count++ % SCALE) == `0`)
422	value = src_first++; // Get more bits*
423	else
424	value >>= DEST_BITS; // Move down bits
425
426	dest_first++ = dest_t(value); // Truncates, ignores high bits.*
427	}
428	return src_first;
429	}
430
431	/ uneven_copy helper, case where destination ints are more than 32 bit. /
432
433	template<class SrcIter, class DestIter>
434	SrcIter uneven_copy_impl(
435	SrcIter src_first, DestIter dest_first, DestIter dest_last,
436	std::false_type)
437	{
438	typedef typename std::iterator_traits<SrcIter>::value_type src_t;
439	typedef typename std::iterator_traits<DestIter>::value_type dest_t;
440
441	constexpr auto SRC_SIZE = sizeof(src_t);
442	constexpr auto SRC_BITS = SRC_SIZE * `8`;
443	constexpr auto DEST_SIZE = sizeof(dest_t);
444	constexpr auto SCALE = (DEST_SIZE+SRC_SIZE-`1`) / SRC_SIZE;
445
446	while (dest_first != dest_last) {
447	dest_t value(`0UL`);
448	unsigned int shift = `0`;
449
450	for (size_t i = `0`; i < SCALE; ++i) {
451	value \|= dest_t(*src_first++) << shift;
452	shift += SRC_BITS;
453	}
454
455	*dest_first++ = value;
456	}
457	return src_first;
458	}
459
460	/ uneven_copy, call the right code for larger vs. smaller /
461
462	template<class SrcIter, class DestIter>
463	inline SrcIter uneven_copy(SrcIter src_first,
464	DestIter dest_first, DestIter dest_last)
465	{
466	typedef typename std::iterator_traits<SrcIter>::value_type src_t;
467	typedef typename std::iterator_traits<DestIter>::value_type dest_t;
468
469	constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t);
470
471	return uneven_copy_impl(src_first, dest_first, dest_last,
472	std::integral_constant<bool, DEST_IS_SMALLER>{});
473	}
474
475	/ generate_to, fill in a fixed-size array of integral type using a SeedSeq*
476	* (actually works for any random-access iterator)
477	*/
478
479	template <size_t size, typename SeedSeq, typename DestIter>
480	inline void generate_to_impl(SeedSeq&& generator, DestIter dest,
481	std::true_type)
482	{
483	generator.generate(dest, dest+size);
484	}
485
486	template <size_t size, typename SeedSeq, typename DestIter>
487	void generate_to_impl(SeedSeq&& generator, DestIter dest,
488	std::false_type)
489	{
490	typedef typename std::iterator_traits<DestIter>::value_type dest_t;
491	constexpr auto DEST_SIZE = sizeof(dest_t);
492	constexpr auto GEN_SIZE = sizeof(uint32_t);
493
494	constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE;
495	constexpr size_t FROM_ELEMS =
496	GEN_IS_SMALLER
497	? size * ((DEST_SIZE+GEN_SIZE-`1`) / GEN_SIZE)
498	: (size + (GEN_SIZE / DEST_SIZE) - `1`)
499	/ ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER);
500	// this odd code ^^^^^^^^^^^^^^^^^ is work-around for
501	// a bug: http://llvm.org/bugs/show_bug.cgi?id=21287
502
503	if (FROM_ELEMS <= `1024`) {
504	uint32_t buffer[FROM_ELEMS];
505	generator.generate(buffer, buffer+FROM_ELEMS);
506	uneven_copy(buffer, dest, dest+size);
507	} else {
508	uint32_t* buffer = static_cast<uint32_t>(malloc(size: GEN_SIZE FROM_ELEMS));
509	generator.generate(buffer, buffer+FROM_ELEMS);
510	uneven_copy(buffer, dest, dest+size);
511	free(ptr: static_cast<void*>(buffer));
512	}
513	}
514
515	template <size_t size, typename SeedSeq, typename DestIter>
516	inline void generate_to(SeedSeq&& generator, DestIter dest)
517	{
518	typedef typename std::iterator_traits<DestIter>::value_type dest_t;
519	constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t);
520
521	generate_to_impl<size>(std::forward<SeedSeq>(generator), dest,
522	std::integral_constant<bool, IS_32BIT>{});
523	}
524
525	/ generate_one, produce a value of integral type using a SeedSeq*
526	* (optionally, we can have it produce more than one and pick which one
527	* we want)
528	*/
529
530	template <typename UInt, size_t i = `0UL`, size_t N = i+`1UL`, typename SeedSeq>
531	inline UInt generate_one(SeedSeq&& generator)
532	{
533	UInt result[N];
534	generate_to<N>(std::forward<SeedSeq>(generator), result);
535	return result[i];
536	}
537
538	template <typename RngType>
539	auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound)
540	-> typename RngType::result_type
541	{
542	typedef typename RngType::result_type rtype;
543	rtype threshold = (RngType::max() - RngType::min() + rtype(`1`) - upper_bound)
544	% upper_bound;
545	for (;;) {
546	rtype r = rng() - RngType::min();
547	if (r >= threshold)
548	return r % upper_bound;
549	}
550	}
551
552	template <typename Iter, typename RandType>
553	void shuffle(Iter from, Iter to, RandType&& rng)
554	{
555	typedef typename std::iterator_traits<Iter>::difference_type delta_t;
556	typedef typename std::remove_reference<RandType>::type::result_type result_t;
557	auto count = to - from;
558	while (count > `1`) {
559	delta_t chosen = delta_t(bounded_rand(rng, result_t(count)));
560	--count;
561	--to;
562	using std::swap;
563	swap((from + chosen), to);
564	}
565	}
566
567	/*
568	* Although std::seed_seq is useful, it isn't everything. Often we want to
569	* initialize a random-number generator some other way, such as from a random
570	* device.
571	*
572	* Technically, it does not meet the requirements of a SeedSequence because
573	* it lacks some of the rarely-used member functions (some of which would
574	* be impossible to provide). However the C++ standard is quite specific
575	* that actual engines only called the generate method, so it ought not to be
576	* a problem in practice.
577	*/
578
579	template <typename RngType>
580	class seed_seq_from {
581	private:
582	RngType rng_;
583
584	typedef uint_least32_t result_type;
585
586	public:
587	template<typename... Args>
588	seed_seq_from(Args&&... args) :
589	rng_(std::forward<Args>(args)...)
590	{
591	// Nothing (else) to do...
592	}
593
594	template<typename Iter>
595	void generate(Iter start, Iter finish)
596	{
597	for (auto i = start; i != finish; ++i)
598	*i = result_type(rng_());
599	}
600
601	constexpr size_t size() const
602	{
603	return (sizeof(typename RngType::result_type) > sizeof(result_type)
604	&& RngType::max() > ~size_t(`0UL`))
605	? ~size_t(`0UL`)
606	: size_t(RngType::max());
607	}
608	};
609
610	/*
611	* Sometimes you might want a distinct seed based on when the program
612	* was compiled. That way, a particular instance of the program will
613	* behave the same way, but when recompiled it'll produce a different
614	* value.
615	*/
616
617	template <typename IntType>
618	struct static_arbitrary_seed {
619	private:
620	static constexpr IntType fnv(IntType hash, const char* pos) {
621	return *pos == `'\0'`
622	? hash
623	: fnv(hash: (hash * IntType(`16777619U`)) ^ *pos, pos: (pos+`1`));
624	}
625
626	public:
627	static constexpr IntType value = fnv(hash: IntType(`2166136261U` ^ sizeof(IntType)),
628	__DATE__ __TIME__ __FILE__);
629	};
630
631	// Sometimes, when debugging or testing, it's handy to be able print the name
632	// of a (in human-readable form). This code allows the idiom:
633	//
634	// cout << printable_typename<my_foo_type_t>()
635	//
636	// to print out my_foo_type_t (or its concrete type if it is a synonym)
637
638	#if __cpp_rtti \|\| __GXX_RTTI
639
640	template <typename T>
641	struct printable_typename {};
642
643	template <typename T>
644	std::ostream& operator<<(std::ostream& out, printable_typename<T>) {
645	const char implementation_typename = typeid*(T).name();
646	#ifdef __GNUC__
647	int status;
648	char* pretty_name =
649	abi::__cxa_demangle(mangled_name: implementation_typename, output_buffer: nullptr, length: nullptr, status: &status);
650	if (status == `0`)
651	out << pretty_name;
652	free(ptr: static_cast<void*>(pretty_name));
653	if (status == `0`)
654	return out;
655	#endif
656	out << implementation_typename;
657	return out;
658	}
659
660	#endif // __cpp_rtti \|\| __GXX_RTTI
661
662	} // namespace pcg_extras
663
664	#endif // PCG_EXTRAS_HPP_INCLUDED

Browse the source code of Velox/build/_deps/duckdb-src/third_party/pcg/pcg_extras.hpp