pcg_random.hpp source code [ClickHouse/contrib/libpcg-random/include/pcg_random.hpp]

1	/*
2	* PCG Random Number Generation for C++
3	*
4	* Copyright 2014-2017 Melissa O'Neill <oneill@pcg-random.org>,
5	* and the PCG Project contributors.
6	*
7	* SPDX-License-Identifier: (Apache-2.0 OR MIT)
8	*
9	* Licensed under the Apache License, Version 2.0 (provided in
10	* LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
11	* or under the MIT license (provided in LICENSE-MIT.txt and at
12	* http://opensource.org/licenses/MIT), at your option. This file may not
13	* be copied, modified, or distributed except according to those terms.
14	*
15	* Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
16	* express or implied. See your chosen license for details.
17	*
18	* For additional information about the PCG random number generation scheme,
19	* visit http://www.pcg-random.org/.
20	*/
21
22	/*
23	* This code provides the reference implementation of the PCG family of
24	* random number generators. The code is complex because it implements
25	*
26	* - several members of the PCG family, specifically members corresponding
27	* to the output functions:
28	* - XSH RR (good for 64-bit state, 32-bit output)
29	* - XSH RS (good for 64-bit state, 32-bit output)
30	* - XSL RR (good for 128-bit state, 64-bit output)
31	* - RXS M XS (statistically most powerful generator)
32	* - XSL RR RR (good for 128-bit state, 128-bit output)
33	* - and RXS, RXS M, XSH, XSL (mostly for testing)
34	* - at potentially arbitrary bit sizes
35	* - with four different techniques for random streams (MCG, one-stream
36	* LCG, settable-stream LCG, unique-stream LCG)
37	* - and the extended generation schemes allowing arbitrary periods
38	* - with all features of C++11 random number generation (and more),
39	* some of which are somewhat painful, including
40	* - initializing with a SeedSequence which writes 32-bit values
41	* to memory, even though the state of the generator may not
42	* use 32-bit values (it might use smaller or larger integers)
43	* - I/O for RNGs and a prescribed format, which needs to handle
44	* the issue that 8-bit and 128-bit integers don't have working
45	* I/O routines (e.g., normally 8-bit = char, not integer)
46	* - equality and inequality for RNGs
47	* - and a number of convenience typedefs to mask all the complexity
48	*
49	* The code employes a fairly heavy level of abstraction, and has to deal
50	* with various C++ minutia. If you're looking to learn about how the PCG
51	* scheme works, you're probably best of starting with one of the other
52	* codebases (see www.pcg-random.org). But if you're curious about the
53	* constants for the various output functions used in those other, simpler,
54	* codebases, this code shows how they are calculated.
55	*
56	* On the positive side, at least there are convenience typedefs so that you
57	* can say
58	*
59	* pcg32 myRNG;
60	*
61	* rather than:
62	*
63	* pcg_detail::engine<
64	* uint32_t, // Output Type
65	* uint64_t, // State Type
66	* pcg_detail::xsh_rr_mixin<uint32_t, uint64_t>, true, // Output Func
67	* pcg_detail::specific_stream<uint64_t>, // Stream Kind
68	* pcg_detail::default_multiplier<uint64_t> // LCG Mult
69	* > myRNG;
70	*
71	*/
72
73	#ifndef PCG_RAND_HPP_INCLUDED
74	#define PCG_RAND_HPP_INCLUDED 1
75
76	#include <algorithm>
77	#include <cinttypes>
78	#include <cstddef>
79	#include <cstdlib>
80	#include <cstring>
81	#include <cassert>
82	#include <limits>
83	#include <iostream>
84	#include <iterator>
85	#include <type_traits>
86	#include <utility>
87	#include <locale>
88	#include <new>
89	#include <stdexcept>
90
91	#ifdef _MSC_VER
92	#pragma warning(disable:4146)
93	#endif
94
95	#ifdef _MSC_VER
96	#define PCG_ALWAYS_INLINE _forceinline
97	#elif __GNUC__
98	#define PCG_ALWAYS_INLINE __attribute__((always_inline))
99	#else
100	#define PCG_ALWAYS_INLINE inline
101	#endif
102
103	/*
104	* The pcg_extras namespace contains some support code that is likley to
105	* be useful for a variety of RNGs, including:
106	* - 128-bit int support for platforms where it isn't available natively
107	* - bit twiddling operations
108	* - I/O of 128-bit and 8-bit integers
109	* - Handling the evilness of SeedSeq
110	* - Support for efficiently producing random numbers less than a given
111	* bound
112	*/
113
114	#include "pcg_extras.hpp"
115
116	namespace pcg_detail {
117
118	using namespace pcg_extras;
119
120	/*
121	* The LCG generators need some constants to function. This code lets you
122	* look up the constant by type. For example
123	*
124	* default_multiplier<uint32_t>::multiplier()
125	*
126	* gives you the default multipler for 32-bit integers. We use the name
127	* of the constant and not a generic word like value to allow these classes
128	* to be used as mixins.
129	*/
130
131	template <typename T>
132	struct default_multiplier {
133	// Not defined for an arbitrary type
134	};
135
136	template <typename T>
137	struct default_increment {
138	// Not defined for an arbitrary type
139	};
140
141	#define PCG_DEFINE_CONSTANT(type, what, kind, constant) \
142	template <> \
143	struct what ## _ ## kind<type> { \
144	static constexpr type kind() { \
145	return constant; \
146	} \
147	};
148
149	PCG_DEFINE_CONSTANT(uint8_t, default, multiplier, `141U`)
150	PCG_DEFINE_CONSTANT(uint8_t, default, increment, `77U`)
151
152	PCG_DEFINE_CONSTANT(uint16_t, default, multiplier, `12829U`)
153	PCG_DEFINE_CONSTANT(uint16_t, default, increment, `47989U`)
154
155	PCG_DEFINE_CONSTANT(uint32_t, default, multiplier, `747796405U`)
156	PCG_DEFINE_CONSTANT(uint32_t, default, increment, `2891336453U`)
157
158	PCG_DEFINE_CONSTANT(uint64_t, default, multiplier, `6364136223846793005ULL`)
159	PCG_DEFINE_CONSTANT(uint64_t, default, increment, `1442695040888963407ULL`)
160
161	PCG_DEFINE_CONSTANT(pcg128_t, default, multiplier,
162	PCG_128BIT_CONSTANT(`2549297995355413924ULL`,`4865540595714422341ULL`))
163	PCG_DEFINE_CONSTANT(pcg128_t, default, increment,
164	PCG_128BIT_CONSTANT(`6364136223846793005ULL`,`1442695040888963407ULL`))
165
166
167	/*
168	* Each PCG generator is available in four variants, based on how it applies
169	* the additive constant for its underlying LCG; the variations are:
170	*
171	* single stream - all instances use the same fixed constant, thus
172	* the RNG always somewhere in same sequence
173	* mcg - adds zero, resulting in a single stream and reduced
174	* period
175	* specific stream - the constant can be changed at any time, selecting
176	* a different random sequence
177	* unique stream - the constant is based on the memory addresss of the
178	* object, thus every RNG has its own unique sequence
179	*
180	* This variation is provided though mixin classes which define a function
181	* value called increment() that returns the nesessary additive constant.
182	*/
183
184
185
186	/*
187	* unique stream
188	*/
189
190
191	template <typename itype>
192	class unique_stream {
193	protected:
194	static constexpr bool is_mcg = false;
195
196	// Is never called, but is provided for symmetry with specific_stream
197	void set_stream(...)
198	{
199	abort();
200	}
201
202	public:
203	typedef itype state_type;
204
205	constexpr itype increment() const {
206	return itype(reinterpret_cast<unsigned long>(this) \| `1`);
207	}
208
209	constexpr itype stream() const
210	{
211	return increment() >> `1`;
212	}
213
214	static constexpr bool can_specify_stream = false;
215
216	static constexpr size_t streams_pow2()
217	{
218	return (sizeof(itype) < sizeof(size_t) ? sizeof(itype)
219	: sizeof(size_t))*`8` - `1u`;
220	}
221
222	protected:
223	constexpr unique_stream() = default;
224	};
225
226
227	/*
228	* no stream (mcg)
229	*/
230
231	template <typename itype>
232	class no_stream {
233	protected:
234	static constexpr bool is_mcg = true;
235
236	// Is never called, but is provided for symmetry with specific_stream
237	void set_stream(...)
238	{
239	abort();
240	}
241
242	public:
243	typedef itype state_type;
244
245	static constexpr itype increment() {
246	return `0`;
247	}
248
249	static constexpr bool can_specify_stream = false;
250
251	static constexpr size_t streams_pow2()
252	{
253	return `0u`;
254	}
255
256	protected:
257	constexpr no_stream() = default;
258	};
259
260
261	/*
262	* single stream/sequence (oneseq)
263	*/
264
265	template <typename itype>
266	class oneseq_stream : public default_increment<itype> {
267	protected:
268	static constexpr bool is_mcg = false;
269
270	// Is never called, but is provided for symmetry with specific_stream
271	void set_stream(...)
272	{
273	abort();
274	}
275
276	public:
277	typedef itype state_type;
278
279	static constexpr itype stream()
280	{
281	return default_increment<itype>::increment() >> `1`;
282	}
283
284	static constexpr bool can_specify_stream = false;
285
286	static constexpr size_t streams_pow2()
287	{
288	return `0u`;
289	}
290
291	protected:
292	constexpr oneseq_stream() = default;
293	};
294
295
296	/*
297	* specific stream
298	*/
299
300	template <typename itype>
301	class specific_stream {
302	protected:
303	static constexpr bool is_mcg = false;
304
305	itype inc_ = default_increment<itype>::increment();
306
307	public:
308	typedef itype state_type;
309	typedef itype stream_state;
310
311	constexpr itype increment() const {
312	return inc_;
313	}
314
315	itype stream()
316	{
317	return inc_ >> `1`;
318	}
319
320	void set_stream(itype specific_seq)
321	{
322	inc_ = (specific_seq << `1`) \| `1`;
323	}
324
325	static constexpr bool can_specify_stream = true;
326
327	static constexpr size_t streams_pow2()
328	{
329	return (sizeof(itype)*`8`) - `1u`;
330	}
331
332	protected:
333	specific_stream() = default;
334
335	specific_stream(itype specific_seq)
336	: inc_(itype(specific_seq << `1`) \| itype(`1U`))
337	{
338	// Nothing (else) to do.
339	}
340	};
341
342
343	/*
344	* This is where it all comes together. This function joins together three
345	* mixin classes which define
346	* - the LCG additive constant (the stream)
347	* - the LCG multiplier
348	* - the output function
349	* in addition, we specify the type of the LCG state, and the result type,
350	* and whether to use the pre-advance version of the state for the output
351	* (increasing instruction-level parallelism) or the post-advance version
352	* (reducing register pressure).
353	*
354	* Given the high level of parameterization, the code has to use some
355	* template-metaprogramming tricks to handle some of the suble variations
356	* involved.
357	*/
358
359	template <typename xtype, typename itype,
360	typename output_mixin,
361	bool output_previous = true,
362	typename stream_mixin = oneseq_stream<itype>,
363	typename multiplier_mixin = default_multiplier<itype> >
364	class engine : protected output_mixin,
365	public stream_mixin,
366	protected multiplier_mixin {
367	protected:
368	itype state_;
369
370	struct can_specify_stream_tag {};
371	struct no_specifiable_stream_tag {};
372
373	using stream_mixin::increment;
374	using multiplier_mixin::multiplier;
375
376	public:
377	typedef xtype result_type;
378	typedef itype state_type;
379
380	static constexpr size_t period_pow2()
381	{
382	return sizeof(state_type)`8` - `2`stream_mixin::is_mcg;
383	}
384
385	// It would be nice to use std::numeric_limits for these, but
386	// we can't be sure that it'd be defined for the 128-bit types.
387
388	static constexpr result_type min()
389	{
390	return result_type(`0UL`);
391	}
392
393	static constexpr result_type max()
394	{
395	return result_type(~result_type(`0UL`));
396	}
397
398	protected:
399	itype bump(itype state)
400	{
401	return state * multiplier() + increment();
402	}
403
404	itype base_generate()
405	{
406	return state_ = bump(state_);
407	}
408
409	itype base_generate0()
410	{
411	itype old_state = state_;
412	state_ = bump(state_);
413	return old_state;
414	}
415
416	public:
417	result_type operator()()
418	{
419	if (output_previous)
420	return this->output(base_generate0());
421	else
422	return this->output(base_generate());
423	}
424
425	result_type operator()(result_type upper_bound)
426	{
427	return bounded_rand(*this, upper_bound);
428	}
429
430	protected:
431	static itype advance(itype state, itype delta,
432	itype cur_mult, itype cur_plus);
433
434	static itype distance(itype cur_state, itype newstate, itype cur_mult,
435	itype cur_plus, itype mask = ~itype(`0U`));
436
437	itype distance(itype newstate, itype mask = itype(~itype(`0U`))) const
438	{
439	return distance(state_, newstate, multiplier(), increment(), mask);
440	}
441
442	public:
443	void advance(itype delta)
444	{
445	state_ = advance(state_, delta, this->multiplier(), this->increment());
446	}
447
448	void backstep(itype delta)
449	{
450	advance(-delta);
451	}
452
453	void discard(itype delta)
454	{
455	advance(delta);
456	}
457
458	bool wrapped()
459	{
460	if (stream_mixin::is_mcg) {
461	// For MCGs, the low order two bits never change. In this
462	// implementation, we keep them fixed at 3 to make this test
463	// easier.
464	return state_ == `3`;
465	} else {
466	return state_ == `0`;
467	}
468	}
469
470	engine(itype state = itype(`0xcafef00dd15ea5e5ULL`))
471	: state_(this->is_mcg ? state\|state_type(`3U`)
472	: bump(state + this->increment()))
473	{
474	// Nothing else to do.
475	}
476
477	// This function may or may not exist. It thus has to be a template
478	// to use SFINAE; users don't have to worry about its template-ness.
479
480	template <typename sm = stream_mixin>
481	engine(itype state, typename sm::stream_state stream_seed)
482	: stream_mixin(stream_seed),
483	state_(this->is_mcg ? state\|state_type(`3U`)
484	: bump(state + this->increment()))
485	{
486	// Nothing else to do.
487	}
488
489	template<typename SeedSeq>
490	engine(SeedSeq&& seedSeq, typename std::enable_if<
491	!stream_mixin::can_specify_stream
492	&& !std::is_convertible<SeedSeq, itype>::value
493	&& !std::is_convertible<SeedSeq, engine>::value,
494	no_specifiable_stream_tag>::type = {})
495	: engine(generate_one<itype>(std::forward<SeedSeq>(seedSeq)))
496	{
497	// Nothing else to do.
498	}
499
500	template<typename SeedSeq>
501	engine(SeedSeq&& seedSeq, typename std::enable_if<
502	stream_mixin::can_specify_stream
503	&& !std::is_convertible<SeedSeq, itype>::value
504	&& !std::is_convertible<SeedSeq, engine>::value,
505	can_specify_stream_tag>::type = {})
506	: engine(generate_one<itype,`1`,`2`>(seedSeq),
507	generate_one<itype,`0`,`2`>(seedSeq))
508	{
509	// Nothing else to do.
510	}
511
512
513	template<typename... Args>
514	void seed(Args&&... args)
515	{
516	new (this) engine(std::forward<Args>(args)...);
517	}
518
519	template <typename xtype1, typename itype1,
520	typename output_mixin1, bool output_previous1,
521	typename stream_mixin_lhs, typename multiplier_mixin_lhs,
522	typename stream_mixin_rhs, typename multiplier_mixin_rhs>
523	friend bool operator==(const engine<xtype1,itype1,
524	output_mixin1,output_previous1,
525	stream_mixin_lhs, multiplier_mixin_lhs>&,
526	const engine<xtype1,itype1,
527	output_mixin1,output_previous1,
528	stream_mixin_rhs, multiplier_mixin_rhs>&);
529
530	template <typename xtype1, typename itype1,
531	typename output_mixin1, bool output_previous1,
532	typename stream_mixin_lhs, typename multiplier_mixin_lhs,
533	typename stream_mixin_rhs, typename multiplier_mixin_rhs>
534	friend itype1 operator-(const engine<xtype1,itype1,
535	output_mixin1,output_previous1,
536	stream_mixin_lhs, multiplier_mixin_lhs>&,
537	const engine<xtype1,itype1,
538	output_mixin1,output_previous1,
539	stream_mixin_rhs, multiplier_mixin_rhs>&);
540
541	template <typename CharT, typename Traits,
542	typename xtype1, typename itype1,
543	typename output_mixin1, bool output_previous1,
544	typename stream_mixin1, typename multiplier_mixin1>
545	friend std::basic_ostream<CharT,Traits>&
546	operator<<(std::basic_ostream<CharT,Traits>& out,
547	const engine<xtype1,itype1,
548	output_mixin1,output_previous1,
549	stream_mixin1, multiplier_mixin1>&);
550
551	template <typename CharT, typename Traits,
552	typename xtype1, typename itype1,
553	typename output_mixin1, bool output_previous1,
554	typename stream_mixin1, typename multiplier_mixin1>
555	friend std::basic_istream<CharT,Traits>&
556	operator>>(std::basic_istream<CharT,Traits>& in,
557	engine<xtype1, itype1,
558	output_mixin1, output_previous1,
559	stream_mixin1, multiplier_mixin1>& rng);
560	};
561
562	template <typename CharT, typename Traits,
563	typename xtype, typename itype,
564	typename output_mixin, bool output_previous,
565	typename stream_mixin, typename multiplier_mixin>
566	std::basic_ostream<CharT,Traits>&
567	operator<<(std::basic_ostream<CharT,Traits>& out,
568	const engine<xtype,itype,
569	output_mixin,output_previous,
570	stream_mixin, multiplier_mixin>& rng)
571	{
572	auto orig_flags = out.flags(std::ios_base::dec \| std::ios_base::left);
573	auto space = out.widen(`' '`);
574	auto orig_fill = out.fill();
575
576	out << rng.multiplier() << space
577	<< rng.increment() << space
578	<< rng.state_;
579
580	out.flags(orig_flags);
581	out.fill(orig_fill);
582	return out;
583	}
584
585
586	template <typename CharT, typename Traits,
587	typename xtype, typename itype,
588	typename output_mixin, bool output_previous,
589	typename stream_mixin, typename multiplier_mixin>
590	std::basic_istream<CharT,Traits>&
591	operator>>(std::basic_istream<CharT,Traits>& in,
592	engine<xtype,itype,
593	output_mixin,output_previous,
594	stream_mixin, multiplier_mixin>& rng)
595	{
596	auto orig_flags = in.flags(std::ios_base::dec \| std::ios_base::skipws);
597
598	itype multiplier, increment, state;
599	in >> multiplier >> increment >> state;
600
601	if (!in.fail()) {
602	bool good = true;
603	if (multiplier != rng.multiplier()) {
604	good = false;
605	} else if (rng.can_specify_stream) {
606	rng.set_stream(increment >> `1`);
607	} else if (increment != rng.increment()) {
608	good = false;
609	}
610	if (good) {
611	rng.state_ = state;
612	} else {
613	in.clear(std::ios::failbit);
614	}
615	}
616
617	in.flags(orig_flags);
618	return in;
619	}
620
621
622	template <typename xtype, typename itype,
623	typename output_mixin, bool output_previous,
624	typename stream_mixin, typename multiplier_mixin>
625	itype engine<xtype,itype,output_mixin,output_previous,stream_mixin,
626	multiplier_mixin>::advance(
627	itype state, itype delta, itype cur_mult, itype cur_plus)
628	{
629	// The method used here is based on Brown, "Random Number Generation
630	// with Arbitrary Stride,", Transactions of the American Nuclear
631	// Society (Nov. 1994). The algorithm is very similar to fast
632	// exponentiation.
633	//
634	// Even though delta is an unsigned integer, we can pass a
635	// signed integer to go backwards, it just goes "the long way round".
636
637	constexpr itype ZERO = `0u`; // itype may be a non-trivial types, so
638	constexpr itype ONE = `1u`; // we define some ugly constants.
639	itype acc_mult = `1`;
640	itype acc_plus = `0`;
641	while (delta > ZERO) {
642	if (delta & ONE) {
643	acc_mult *= cur_mult;
644	acc_plus = acc_plus*cur_mult + cur_plus;
645	}
646	cur_plus = (cur_mult+ONE)*cur_plus;
647	cur_mult *= cur_mult;
648	delta >>= `1`;
649	}
650	return acc_mult * state + acc_plus;
651	}
652
653	template <typename xtype, typename itype,
654	typename output_mixin, bool output_previous,
655	typename stream_mixin, typename multiplier_mixin>
656	itype engine<xtype,itype,output_mixin,output_previous,stream_mixin,
657	multiplier_mixin>::distance(
658	itype cur_state, itype newstate, itype cur_mult, itype cur_plus, itype mask)
659	{
660	constexpr itype ONE = `1u`; // itype could be weird, so use constant
661	itype the_bit = stream_mixin::is_mcg ? itype(`4u`) : itype(`1u`);
662	itype distance = `0u`;
663	while ((cur_state & mask) != (newstate & mask)) {
664	if ((cur_state & the_bit) != (newstate & the_bit)) {
665	cur_state = cur_state * cur_mult + cur_plus;
666	distance \|= the_bit;
667	}
668	assert((cur_state & the_bit) == (newstate & the_bit));
669	the_bit <<= `1`;
670	cur_plus = (cur_mult+ONE)*cur_plus;
671	cur_mult *= cur_mult;
672	}
673	return stream_mixin::is_mcg ? distance >> `2` : distance;
674	}
675
676	template <typename xtype, typename itype,
677	typename output_mixin, bool output_previous,
678	typename stream_mixin_lhs, typename multiplier_mixin_lhs,
679	typename stream_mixin_rhs, typename multiplier_mixin_rhs>
680	itype operator-(const engine<xtype,itype,
681	output_mixin,output_previous,
682	stream_mixin_lhs, multiplier_mixin_lhs>& lhs,
683	const engine<xtype,itype,
684	output_mixin,output_previous,
685	stream_mixin_rhs, multiplier_mixin_rhs>& rhs)
686	{
687	static_assert(
688	std::is_same<stream_mixin_lhs, stream_mixin_rhs>::value &&
689	std::is_same<multiplier_mixin_lhs, multiplier_mixin_rhs>::value,
690	"Incomparable generators");
691	return rhs.distance(lhs.state_);
692	}
693
694
695	template <typename xtype, typename itype,
696	typename output_mixin, bool output_previous,
697	typename stream_mixin_lhs, typename multiplier_mixin_lhs,
698	typename stream_mixin_rhs, typename multiplier_mixin_rhs>
699	bool operator==(const engine<xtype,itype,
700	output_mixin,output_previous,
701	stream_mixin_lhs, multiplier_mixin_lhs>& lhs,
702	const engine<xtype,itype,
703	output_mixin,output_previous,
704	stream_mixin_rhs, multiplier_mixin_rhs>& rhs)
705	{
706	return (lhs.multiplier() == rhs.multiplier())
707	&& (lhs.increment() == rhs.increment())
708	&& (lhs.state_ == rhs.state_);
709	}
710
711	template <typename xtype, typename itype,
712	typename output_mixin, bool output_previous,
713	typename stream_mixin_lhs, typename multiplier_mixin_lhs,
714	typename stream_mixin_rhs, typename multiplier_mixin_rhs>
715	inline bool operator!=(const engine<xtype,itype,
716	output_mixin,output_previous,
717	stream_mixin_lhs, multiplier_mixin_lhs>& lhs,
718	const engine<xtype,itype,
719	output_mixin,output_previous,
720	stream_mixin_rhs, multiplier_mixin_rhs>& rhs)
721	{
722	return !operator==(lhs,rhs);
723	}
724
725
726	template <typename xtype, typename itype,
727	template<typename XT,typename IT> class output_mixin,
728	bool output_previous = (sizeof(itype) <= `8`)>
729	using oneseq_base = engine<xtype, itype,
730	output_mixin<xtype, itype>, output_previous,
731	oneseq_stream<itype> >;
732
733	template <typename xtype, typename itype,
734	template<typename XT,typename IT> class output_mixin,
735	bool output_previous = (sizeof(itype) <= `8`)>
736	using unique_base = engine<xtype, itype,
737	output_mixin<xtype, itype>, output_previous,
738	unique_stream<itype> >;
739
740	template <typename xtype, typename itype,
741	template<typename XT,typename IT> class output_mixin,
742	bool output_previous = (sizeof(itype) <= `8`)>
743	using setseq_base = engine<xtype, itype,
744	output_mixin<xtype, itype>, output_previous,
745	specific_stream<itype> >;
746
747	template <typename xtype, typename itype,
748	template<typename XT,typename IT> class output_mixin,
749	bool output_previous = (sizeof(itype) <= `8`)>
750	using mcg_base = engine<xtype, itype,
751	output_mixin<xtype, itype>, output_previous,
752	no_stream<itype> >;
753
754	/*
755	* OUTPUT FUNCTIONS.
756	*
757	* These are the core of the PCG generation scheme. They specify how to
758	* turn the base LCG's internal state into the output value of the final
759	* generator.
760	*
761	* They're implemented as mixin classes.
762	*
763	* All of the classes have code that is written to allow it to be applied
764	* at arbitrary bit sizes, although in practice they'll only be used at
765	* standard sizes supported by C++.
766	*/
767
768	/*
769	* XSH RS -- high xorshift, followed by a random shift
770	*
771	* Fast. A good performer.
772	*/
773
774	template <typename xtype, typename itype>
775	struct xsh_rs_mixin {
776	static xtype output(itype internal)
777	{
778	constexpr bitcount_t bits = bitcount_t(sizeof(itype) * `8`);
779	constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * `8`);
780	constexpr bitcount_t sparebits = bits - xtypebits;
781	constexpr bitcount_t opbits =
782	sparebits-`5` >= `64` ? `5`
783	: sparebits-`4` >= `32` ? `4`
784	: sparebits-`3` >= `16` ? `3`
785	: sparebits-`2` >= `4` ? `2`
786	: sparebits-`1` >= `1` ? `1`
787	: `0`;
788	constexpr bitcount_t mask = (`1` << opbits) - `1`;
789	constexpr bitcount_t maxrandshift = mask;
790	constexpr bitcount_t topspare = opbits;
791	constexpr bitcount_t bottomspare = sparebits - topspare;
792	constexpr bitcount_t xshift = topspare + (xtypebits+maxrandshift)/`2`;
793	bitcount_t rshift =
794	opbits ? bitcount_t(internal >> (bits - opbits)) & mask : `0`;
795	internal ^= internal >> xshift;
796	xtype result = xtype(internal >> (bottomspare - maxrandshift + rshift));
797	return result;
798	}
799	};
800
801	/*
802	* XSH RR -- high xorshift, followed by a random rotate
803	*
804	* Fast. A good performer. Slightly better statistically than XSH RS.
805	*/
806
807	template <typename xtype, typename itype>
808	struct xsh_rr_mixin {
809	static xtype output(itype internal)
810	{
811	constexpr bitcount_t bits = bitcount_t(sizeof(itype) * `8`);
812	constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype)*`8`);
813	constexpr bitcount_t sparebits = bits - xtypebits;
814	constexpr bitcount_t wantedopbits =
815	xtypebits >= `128` ? `7`
816	: xtypebits >= `64` ? `6`
817	: xtypebits >= `32` ? `5`
818	: xtypebits >= `16` ? `4`
819	: `3`;
820	constexpr bitcount_t opbits =
821	sparebits >= wantedopbits ? wantedopbits
822	: sparebits;
823	constexpr bitcount_t amplifier = wantedopbits - opbits;
824	constexpr bitcount_t mask = (`1` << opbits) - `1`;
825	constexpr bitcount_t topspare = opbits;
826	constexpr bitcount_t bottomspare = sparebits - topspare;
827	constexpr bitcount_t xshift = (topspare + xtypebits)/`2`;
828	bitcount_t rot = opbits ? bitcount_t(internal >> (bits - opbits)) & mask
829	: `0`;
830	bitcount_t amprot = (rot << amplifier) & mask;
831	internal ^= internal >> xshift;
832	xtype result = xtype(internal >> bottomspare);
833	result = rotr(result, amprot);
834	return result;
835	}
836	};
837
838	/*
839	* RXS -- random xorshift
840	*/
841
842	template <typename xtype, typename itype>
843	struct rxs_mixin {
844	static xtype output_rxs(itype internal)
845	{
846	constexpr bitcount_t bits = bitcount_t(sizeof(itype) * `8`);
847	constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype)*`8`);
848	constexpr bitcount_t shift = bits - xtypebits;
849	constexpr bitcount_t extrashift = (xtypebits - shift)/`2`;
850	bitcount_t rshift = shift > `64`+`8` ? (internal >> (bits - `6`)) & `63`
851	: shift > `32`+`4` ? (internal >> (bits - `5`)) & `31`
852	: shift > `16`+`2` ? (internal >> (bits - `4`)) & `15`
853	: shift > `8`+`1` ? (internal >> (bits - `3`)) & `7`
854	: shift > `4`+`1` ? (internal >> (bits - `2`)) & `3`
855	: shift > `2`+`1` ? (internal >> (bits - `1`)) & `1`
856	: `0`;
857	internal ^= internal >> (shift + extrashift - rshift);
858	xtype result = internal >> rshift;
859	return result;
860	}
861	};
862
863	/*
864	* RXS M XS -- random xorshift, mcg multiply, fixed xorshift
865	*
866	* The most statistically powerful generator, but all those steps
867	* make it slower than some of the others. We give it the rottenest jobs.
868	*
869	* Because it's usually used in contexts where the state type and the
870	* result type are the same, it is a permutation and is thus invertable.
871	* We thus provide a function to invert it. This function is used to
872	* for the "inside out" generator used by the extended generator.
873	*/
874
875	/ Defined type-based concepts for the multiplication step. They're actually*
876	* all derived by truncating the 128-bit, which was computed to be a good
877	* "universal" constant.
878	*/
879
880	template <typename T>
881	struct mcg_multiplier {
882	// Not defined for an arbitrary type
883	};
884
885	template <typename T>
886	struct mcg_unmultiplier {
887	// Not defined for an arbitrary type
888	};
889
890	PCG_DEFINE_CONSTANT(uint8_t, mcg, multiplier, `217U`)
891	PCG_DEFINE_CONSTANT(uint8_t, mcg, unmultiplier, `105U`)
892
893	PCG_DEFINE_CONSTANT(uint16_t, mcg, multiplier, `62169U`)
894	PCG_DEFINE_CONSTANT(uint16_t, mcg, unmultiplier, `28009U`)
895
896	PCG_DEFINE_CONSTANT(uint32_t, mcg, multiplier, `277803737U`)
897	PCG_DEFINE_CONSTANT(uint32_t, mcg, unmultiplier, `2897767785U`)
898
899	PCG_DEFINE_CONSTANT(uint64_t, mcg, multiplier, `12605985483714917081ULL`)
900	PCG_DEFINE_CONSTANT(uint64_t, mcg, unmultiplier, `15009553638781119849ULL`)
901
902	PCG_DEFINE_CONSTANT(pcg128_t, mcg, multiplier,
903	PCG_128BIT_CONSTANT(`17766728186571221404ULL`, `12605985483714917081ULL`))
904	PCG_DEFINE_CONSTANT(pcg128_t, mcg, unmultiplier,
905	PCG_128BIT_CONSTANT(`14422606686972528997ULL`, `15009553638781119849ULL`))
906
907
908	template <typename xtype, typename itype>
909	struct rxs_m_xs_mixin {
910	static xtype output(itype internal)
911	{
912	constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * `8`);
913	constexpr bitcount_t bits = bitcount_t(sizeof(itype) * `8`);
914	constexpr bitcount_t opbits = xtypebits >= `128` ? `6`
915	: xtypebits >= `64` ? `5`
916	: xtypebits >= `32` ? `4`
917	: xtypebits >= `16` ? `3`
918	: `2`;
919	constexpr bitcount_t shift = bits - xtypebits;
920	constexpr bitcount_t mask = (`1` << opbits) - `1`;
921	bitcount_t rshift =
922	opbits ? bitcount_t(internal >> (bits - opbits)) & mask : `0`;
923	internal ^= internal >> (opbits + rshift);
924	internal *= mcg_multiplier<itype>::multiplier();
925	xtype result = internal >> shift;
926	result ^= result >> ((`2U`*xtypebits+`2U`)/`3U`);
927	return result;
928	}
929
930	static itype unoutput(itype internal)
931	{
932	constexpr bitcount_t bits = bitcount_t(sizeof(itype) * `8`);
933	constexpr bitcount_t opbits = bits >= `128` ? `6`
934	: bits >= `64` ? `5`
935	: bits >= `32` ? `4`
936	: bits >= `16` ? `3`
937	: `2`;
938	constexpr bitcount_t mask = (`1` << opbits) - `1`;
939
940	internal = unxorshift(internal, bits, (`2U`*bits+`2U`)/`3U`);
941
942	internal *= mcg_unmultiplier<itype>::unmultiplier();
943
944	bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : `0`;
945	internal = unxorshift(internal, bits, opbits + rshift);
946
947	return internal;
948	}
949	};
950
951
952	/*
953	* RXS M -- random xorshift, mcg multiply
954	*/
955
956	template <typename xtype, typename itype>
957	struct rxs_m_mixin {
958	static xtype output(itype internal)
959	{
960	constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * `8`);
961	constexpr bitcount_t bits = bitcount_t(sizeof(itype) * `8`);
962	constexpr bitcount_t opbits = xtypebits >= `128` ? `6`
963	: xtypebits >= `64` ? `5`
964	: xtypebits >= `32` ? `4`
965	: xtypebits >= `16` ? `3`
966	: `2`;
967	constexpr bitcount_t shift = bits - xtypebits;
968	constexpr bitcount_t mask = (`1` << opbits) - `1`;
969	bitcount_t rshift = opbits ? (internal >> (bits - opbits)) & mask : `0`;
970	internal ^= internal >> (opbits + rshift);
971	internal *= mcg_multiplier<itype>::multiplier();
972	xtype result = internal >> shift;
973	return result;
974	}
975	};
976
977	/*
978	* XSL RR -- fixed xorshift (to low bits), random rotate
979	*
980	* Useful for 128-bit types that are split across two CPU registers.
981	*/
982
983	template <typename xtype, typename itype>
984	struct xsl_rr_mixin {
985	static xtype output(itype internal)
986	{
987	constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * `8`);
988	constexpr bitcount_t bits = bitcount_t(sizeof(itype) * `8`);
989	constexpr bitcount_t sparebits = bits - xtypebits;
990	constexpr bitcount_t wantedopbits = xtypebits >= `128` ? `7`
991	: xtypebits >= `64` ? `6`
992	: xtypebits >= `32` ? `5`
993	: xtypebits >= `16` ? `4`
994	: `3`;
995	constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits
996	: sparebits;
997	constexpr bitcount_t amplifier = wantedopbits - opbits;
998	constexpr bitcount_t mask = (`1` << opbits) - `1`;
999	constexpr bitcount_t topspare = sparebits;
1000	constexpr bitcount_t bottomspare = sparebits - topspare;
1001	constexpr bitcount_t xshift = (topspare + xtypebits) / `2`;
1002
1003	bitcount_t rot =
1004	opbits ? bitcount_t(internal >> (bits - opbits)) & mask : `0`;
1005	bitcount_t amprot = (rot << amplifier) & mask;
1006	internal ^= internal >> xshift;
1007	xtype result = xtype(internal >> bottomspare);
1008	result = rotr(result, amprot);
1009	return result;
1010	}
1011	};
1012
1013
1014	/*
1015	* XSL RR RR -- fixed xorshift (to low bits), random rotate (both parts)
1016	*
1017	* Useful for 128-bit types that are split across two CPU registers.
1018	* If you really want an invertable 128-bit RNG, I guess this is the one.
1019	*/
1020
1021	template <typename T> struct halfsize_trait {};
1022	template <> struct halfsize_trait<pcg128_t> { typedef uint64_t type; };
1023	template <> struct halfsize_trait<uint64_t> { typedef uint32_t type; };
1024	template <> struct halfsize_trait<uint32_t> { typedef uint16_t type; };
1025	template <> struct halfsize_trait<uint16_t> { typedef uint8_t type; };
1026
1027	template <typename xtype, typename itype>
1028	struct xsl_rr_rr_mixin {
1029	typedef typename halfsize_trait<itype>::type htype;
1030
1031	static itype output(itype internal)
1032	{
1033	constexpr bitcount_t htypebits = bitcount_t(sizeof(htype) * `8`);
1034	constexpr bitcount_t bits = bitcount_t(sizeof(itype) * `8`);
1035	constexpr bitcount_t sparebits = bits - htypebits;
1036	constexpr bitcount_t wantedopbits = htypebits >= `128` ? `7`
1037	: htypebits >= `64` ? `6`
1038	: htypebits >= `32` ? `5`
1039	: htypebits >= `16` ? `4`
1040	: `3`;
1041	constexpr bitcount_t opbits = sparebits >= wantedopbits ? wantedopbits
1042	: sparebits;
1043	constexpr bitcount_t amplifier = wantedopbits - opbits;
1044	constexpr bitcount_t mask = (`1` << opbits) - `1`;
1045	constexpr bitcount_t topspare = sparebits;
1046	constexpr bitcount_t xshift = (topspare + htypebits) / `2`;
1047
1048	bitcount_t rot =
1049	opbits ? bitcount_t(internal >> (bits - opbits)) & mask : `0`;
1050	bitcount_t amprot = (rot << amplifier) & mask;
1051	internal ^= internal >> xshift;
1052	htype lowbits = htype(internal);
1053	lowbits = rotr(lowbits, amprot);
1054	htype highbits = htype(internal >> topspare);
1055	bitcount_t rot2 = lowbits & mask;
1056	bitcount_t amprot2 = (rot2 << amplifier) & mask;
1057	highbits = rotr(highbits, amprot2);
1058	return (itype(highbits) << topspare) ^ itype(lowbits);
1059	}
1060	};
1061
1062
1063	/*
1064	* XSH -- fixed xorshift (to high bits)
1065	*
1066	* You shouldn't use this at 64-bits or less.
1067	*/
1068
1069	template <typename xtype, typename itype>
1070	struct xsh_mixin {
1071	static xtype output(itype internal)
1072	{
1073	constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * `8`);
1074	constexpr bitcount_t bits = bitcount_t(sizeof(itype) * `8`);
1075	constexpr bitcount_t sparebits = bits - xtypebits;
1076	constexpr bitcount_t topspare = `0`;
1077	constexpr bitcount_t bottomspare = sparebits - topspare;
1078	constexpr bitcount_t xshift = (topspare + xtypebits) / `2`;
1079
1080	internal ^= internal >> xshift;
1081	xtype result = internal >> bottomspare;
1082	return result;
1083	}
1084	};
1085
1086	/*
1087	* XSL -- fixed xorshift (to low bits)
1088	*
1089	* You shouldn't use this at 64-bits or less.
1090	*/
1091
1092	template <typename xtype, typename itype>
1093	struct xsl_mixin {
1094	inline xtype output(itype internal)
1095	{
1096	constexpr bitcount_t xtypebits = bitcount_t(sizeof(xtype) * `8`);
1097	constexpr bitcount_t bits = bitcount_t(sizeof(itype) * `8`);
1098	constexpr bitcount_t sparebits = bits - xtypebits;
1099	constexpr bitcount_t topspare = sparebits;
1100	constexpr bitcount_t bottomspare = sparebits - topspare;
1101	constexpr bitcount_t xshift = (topspare + xtypebits) / `2`;
1102
1103	internal ^= internal >> xshift;
1104	xtype result = internal >> bottomspare;
1105	return result;
1106	}
1107	};
1108
1109	/ ---- End of Output Functions ---- /
1110
1111
1112	template <typename baseclass>
1113	struct inside_out : private baseclass {
1114	inside_out() = delete;
1115
1116	typedef typename baseclass::result_type result_type;
1117	typedef typename baseclass::state_type state_type;
1118	static_assert(sizeof(result_type) == sizeof(state_type),
1119	"Require a RNG whose output function is a permutation");
1120
1121	static bool external_step(result_type& randval, size_t i)
1122	{
1123	state_type state = baseclass::unoutput(randval);
1124	state = state * baseclass::multiplier() + baseclass::increment()
1125	+ state_type(i*`2`);
1126	result_type result = baseclass::output(state);
1127	randval = result;
1128	state_type zero =
1129	baseclass::is_mcg ? state & state_type(`3U`) : state_type(`0U`);
1130	return result == zero;
1131	}
1132
1133	static bool external_advance(result_type& randval, size_t i,
1134	result_type delta, bool forwards = true)
1135	{
1136	state_type state = baseclass::unoutput(randval);
1137	state_type mult = baseclass::multiplier();
1138	state_type inc = baseclass::increment() + state_type(i*`2`);
1139	state_type zero =
1140	baseclass::is_mcg ? state & state_type(`3U`) : state_type(`0U`);
1141	state_type dist_to_zero = baseclass::distance(state, zero, mult, inc);
1142	bool crosses_zero =
1143	forwards ? dist_to_zero <= delta
1144	: (-dist_to_zero) <= delta;
1145	if (!forwards)
1146	delta = -delta;
1147	state = baseclass::advance(state, delta, mult, inc);
1148	randval = baseclass::output(state);
1149	return crosses_zero;
1150	}
1151	};
1152
1153
1154	template <bitcount_t table_pow2, bitcount_t advance_pow2, typename baseclass, typename extvalclass, bool kdd = true>
1155	class extended : public baseclass {
1156	public:
1157	typedef typename baseclass::state_type state_type;
1158	typedef typename baseclass::result_type result_type;
1159	typedef inside_out<extvalclass> insideout;
1160
1161	private:
1162	static constexpr bitcount_t rtypebits = sizeof(result_type)*`8`;
1163	static constexpr bitcount_t stypebits = sizeof(state_type)*`8`;
1164
1165	static constexpr bitcount_t tick_limit_pow2 = `64U`;
1166
1167	static constexpr size_t table_size = `1UL` << table_pow2;
1168	static constexpr size_t table_shift = stypebits - table_pow2;
1169	static constexpr state_type table_mask =
1170	(state_type(`1U`) << table_pow2) - state_type(`1U`);
1171
1172	static constexpr bool may_tick =
1173	(advance_pow2 < stypebits) && (advance_pow2 < tick_limit_pow2);
1174	static constexpr size_t tick_shift = stypebits - advance_pow2;
1175	static constexpr state_type tick_mask =
1176	may_tick ? state_type(
1177	(uint64_t(`1`) << (advance_pow2*may_tick)) - `1`)
1178	// ^-- stupidity to appease GCC warnings
1179	: ~state_type(`0U`);
1180
1181	static constexpr bool may_tock = stypebits < tick_limit_pow2;
1182
1183	result_type data_[table_size];
1184
1185	PCG_NOINLINE void advance_table();
1186
1187	PCG_NOINLINE void advance_table(state_type delta, bool isForwards = true);
1188
1189	result_type& get_extended_value()
1190	{
1191	state_type state = this->state_;
1192	if (kdd && baseclass::is_mcg) {
1193	// The low order bits of an MCG are constant, so drop them.
1194	state >>= `2`;
1195	}
1196	size_t index = kdd ? state & table_mask
1197	: state >> table_shift;
1198
1199	if (may_tick) {
1200	bool tick = kdd ? (state & tick_mask) == state_type(`0u`)
1201	: (state >> tick_shift) == state_type(`0u`);
1202	if (tick)
1203	advance_table();
1204	}
1205	if (may_tock) {
1206	bool tock = state == state_type(`0u`);
1207	if (tock)
1208	advance_table();
1209	}
1210	return data_[index];
1211	}
1212
1213	public:
1214	static constexpr size_t period_pow2()
1215	{
1216	return baseclass::period_pow2() + table_size*extvalclass::period_pow2();
1217	}
1218
1219	PCG_ALWAYS_INLINE result_type operator()()
1220	{
1221	result_type rhs = get_extended_value();
1222	result_type lhs = this->baseclass::operator()();
1223	return lhs ^ rhs;
1224	}
1225
1226	result_type operator()(result_type upper_bound)
1227	{
1228	return bounded_rand(*this, upper_bound);
1229	}
1230
1231	void set(result_type wanted)
1232	{
1233	result_type& rhs = get_extended_value();
1234	result_type lhs = this->baseclass::operator()();
1235	rhs = lhs ^ wanted;
1236	}
1237
1238	void advance(state_type distance, bool forwards = true);
1239
1240	void backstep(state_type distance)
1241	{
1242	advance(distance, false);
1243	}
1244
1245	extended(const result_type* data)
1246	: baseclass()
1247	{
1248	datainit(data);
1249	}
1250
1251	extended(const result_type* data, state_type seed)
1252	: baseclass(seed)
1253	{
1254	datainit(data);
1255	}
1256
1257	// This function may or may not exist. It thus has to be a template
1258	// to use SFINAE; users don't have to worry about its template-ness.
1259
1260	template <typename bc = baseclass>
1261	extended(const result_type* data, state_type seed,
1262	typename bc::stream_state stream_seed)
1263	: baseclass(seed, stream_seed)
1264	{
1265	datainit(data);
1266	}
1267
1268	extended()
1269	: baseclass()
1270	{
1271	selfinit();
1272	}
1273
1274	extended(state_type seed)
1275	: baseclass(seed)
1276	{
1277	selfinit();
1278	}
1279
1280	// This function may or may not exist. It thus has to be a template
1281	// to use SFINAE; users don't have to worry about its template-ness.
1282
1283	template <typename bc = baseclass>
1284	extended(state_type seed, typename bc::stream_state stream_seed)
1285	: baseclass(seed, stream_seed)
1286	{
1287	selfinit();
1288	}
1289
1290	private:
1291	void selfinit();
1292	void datainit(const result_type* data);
1293
1294	public:
1295
1296	template<typename SeedSeq, typename = typename std::enable_if<
1297	!std::is_convertible<SeedSeq, result_type>::value
1298	&& !std::is_convertible<SeedSeq, extended>::value>::type>
1299	extended(SeedSeq&& seedSeq)
1300	: baseclass(seedSeq)
1301	{
1302	generate_to<table_size>(seedSeq, data_);
1303	}
1304
1305	template<typename... Args>
1306	void seed(Args&&... args)
1307	{
1308	new (this) extended(std::forward<Args>(args)...);
1309	}
1310
1311	template <bitcount_t table_pow2_, bitcount_t advance_pow2_,
1312	typename baseclass_, typename extvalclass_, bool kdd_>
1313	friend bool operator==(const extended<table_pow2_, advance_pow2_,
1314	baseclass_, extvalclass_, kdd_>&,
1315	const extended<table_pow2_, advance_pow2_,
1316	baseclass_, extvalclass_, kdd_>&);
1317
1318	template <typename CharT, typename Traits,
1319	bitcount_t table_pow2_, bitcount_t advance_pow2_,
1320	typename baseclass_, typename extvalclass_, bool kdd_>
1321	friend std::basic_ostream<CharT,Traits>&
1322	operator<<(std::basic_ostream<CharT,Traits>& out,
1323	const extended<table_pow2_, advance_pow2_,
1324	baseclass_, extvalclass_, kdd_>&);
1325
1326	template <typename CharT, typename Traits,
1327	bitcount_t table_pow2_, bitcount_t advance_pow2_,
1328	typename baseclass_, typename extvalclass_, bool kdd_>
1329	friend std::basic_istream<CharT,Traits>&
1330	operator>>(std::basic_istream<CharT,Traits>& in,
1331	extended<table_pow2_, advance_pow2_,
1332	baseclass_, extvalclass_, kdd_>&);
1333
1334	};
1335
1336
1337	template <bitcount_t table_pow2, bitcount_t advance_pow2,
1338	typename baseclass, typename extvalclass, bool kdd>
1339	void extended<table_pow2,advance_pow2,baseclass,extvalclass,kdd>::datainit(
1340	const result_type* data)
1341	{
1342	for (size_t i = `0`; i < table_size; ++i)
1343	data_[i] = data[i];
1344	}
1345
1346	template <bitcount_t table_pow2, bitcount_t advance_pow2,
1347	typename baseclass, typename extvalclass, bool kdd>
1348	void extended<table_pow2,advance_pow2,baseclass,extvalclass,kdd>::selfinit()
1349	{
1350	// We need to fill the extended table with something, and we have
1351	// very little provided data, so we use the base generator to
1352	// produce values. Although not ideal (use a seed sequence, folks!),
1353	// unexpected correlations are mitigated by
1354	// - using XOR differences rather than the number directly
1355	// - the way the table is accessed, its values won't* be accessed*
1356	// in the same order the were written.
1357	// - any strange correlations would only be apparent if we
1358	// were to backstep the generator so that the base generator
1359	// was generating the same values again
1360	result_type xdiff = baseclass::operator()() - baseclass::operator()();
1361	for (size_t i = `0`; i < table_size; ++i) {
1362	data_[i] = baseclass::operator()() ^ xdiff;
1363	}
1364	}
1365
1366	template <bitcount_t table_pow2, bitcount_t advance_pow2,
1367	typename baseclass, typename extvalclass, bool kdd>
1368	bool operator==(const extended<table_pow2, advance_pow2,
1369	baseclass, extvalclass, kdd>& lhs,
1370	const extended<table_pow2, advance_pow2,
1371	baseclass, extvalclass, kdd>& rhs)
1372	{
1373	auto& base_lhs = static_cast<const baseclass&>(lhs);
1374	auto& base_rhs = static_cast<const baseclass&>(rhs);
1375	return base_lhs == base_rhs
1376	&& std::equal(
1377	std::begin(lhs.data_), std::end(lhs.data_),
1378	std::begin(rhs.data_)
1379	);
1380	}
1381
1382	template <bitcount_t table_pow2, bitcount_t advance_pow2,
1383	typename baseclass, typename extvalclass, bool kdd>
1384	inline bool operator!=(const extended<table_pow2, advance_pow2,
1385	baseclass, extvalclass, kdd>& lhs,
1386	const extended<table_pow2, advance_pow2,
1387	baseclass, extvalclass, kdd>& rhs)
1388	{
1389	return !operator==(lhs, rhs);
1390	}
1391
1392	template <typename CharT, typename Traits,
1393	bitcount_t table_pow2, bitcount_t advance_pow2,
1394	typename baseclass, typename extvalclass, bool kdd>
1395	std::basic_ostream<CharT,Traits>&
1396	operator<<(std::basic_ostream<CharT,Traits>& out,
1397	const extended<table_pow2, advance_pow2,
1398	baseclass, extvalclass, kdd>& rng)
1399	{
1400	auto orig_flags = out.flags(std::ios_base::dec \| std::ios_base::left);
1401	auto space = out.widen(`' '`);
1402	auto orig_fill = out.fill();
1403
1404	out << rng.multiplier() << space
1405	<< rng.increment() << space
1406	<< rng.state_;
1407
1408	for (const auto& datum : rng.data_)
1409	out << space << datum;
1410
1411	out.flags(orig_flags);
1412	out.fill(orig_fill);
1413	return out;
1414	}
1415
1416	template <typename CharT, typename Traits,
1417	bitcount_t table_pow2, bitcount_t advance_pow2,
1418	typename baseclass, typename extvalclass, bool kdd>
1419	std::basic_istream<CharT,Traits>&
1420	operator>>(std::basic_istream<CharT,Traits>& in,
1421	extended<table_pow2, advance_pow2,
1422	baseclass, extvalclass, kdd>& rng)
1423	{
1424	extended<table_pow2, advance_pow2, baseclass, extvalclass> new_rng;
1425	auto& base_rng = static_cast<baseclass&>(new_rng);
1426	in >> base_rng;
1427
1428	if (in.fail())
1429	return in;
1430
1431	auto orig_flags = in.flags(std::ios_base::dec \| std::ios_base::skipws);
1432
1433	for (auto& datum : new_rng.data_) {
1434	in >> datum;
1435	if (in.fail())
1436	goto bail;
1437	}
1438
1439	rng = new_rng;
1440
1441	bail:
1442	in.flags(orig_flags);
1443	return in;
1444	}
1445
1446
1447
1448	template <bitcount_t table_pow2, bitcount_t advance_pow2,
1449	typename baseclass, typename extvalclass, bool kdd>
1450	void
1451	extended<table_pow2,advance_pow2,baseclass,extvalclass,kdd>::advance_table()
1452	{
1453	bool carry = false;
1454	for (size_t i = `0`; i < table_size; ++i) {
1455	if (carry) {
1456	carry = insideout::external_step(data_[i],i+`1`);
1457	}
1458	bool carry2 = insideout::external_step(data_[i],i+`1`);
1459	carry = carry \|\| carry2;
1460	}
1461	}
1462
1463	template <bitcount_t table_pow2, bitcount_t advance_pow2,
1464	typename baseclass, typename extvalclass, bool kdd>
1465	void
1466	extended<table_pow2,advance_pow2,baseclass,extvalclass,kdd>::advance_table(
1467	state_type delta, bool isForwards)
1468	{
1469	typedef typename baseclass::state_type base_state_t;
1470	typedef typename extvalclass::state_type ext_state_t;
1471	constexpr bitcount_t basebits = sizeof(base_state_t)*`8`;
1472	constexpr bitcount_t extbits = sizeof(ext_state_t)*`8`;
1473	static_assert(basebits <= extbits \|\| advance_pow2 > `0`,
1474	"Current implementation might overflow its carry");
1475
1476	base_state_t carry = `0`;
1477	for (size_t i = `0`; i < table_size; ++i) {
1478	base_state_t total_delta = carry + delta;
1479	ext_state_t trunc_delta = ext_state_t(total_delta);
1480	if (basebits > extbits) {
1481	carry = total_delta >> extbits;
1482	} else {
1483	carry = `0`;
1484	}
1485	carry +=
1486	insideout::external_advance(data_[i],i+`1`, trunc_delta, isForwards);
1487	}
1488	}
1489
1490	template <bitcount_t table_pow2, bitcount_t advance_pow2,
1491	typename baseclass, typename extvalclass, bool kdd>
1492	void extended<table_pow2,advance_pow2,baseclass,extvalclass,kdd>::advance(
1493	state_type distance, bool forwards)
1494	{
1495	static_assert(kdd,
1496	"Efficient advance is too hard for non-kdd extension. "
1497	"For a weak advance, cast to base class");
1498	state_type zero =
1499	baseclass::is_mcg ? this->state_ & state_type(`3U`) : state_type(`0U`);
1500	if (may_tick) {
1501	state_type ticks = distance >> (advance_pow2*may_tick);
1502	// ^-- stupidity to appease GCC
1503	// warnings
1504	state_type adv_mask =
1505	baseclass::is_mcg ? tick_mask << `2` : tick_mask;
1506	state_type next_advance_distance = this->distance(zero, adv_mask);
1507	if (!forwards)
1508	next_advance_distance = (-next_advance_distance) & tick_mask;
1509	if (next_advance_distance < (distance & tick_mask)) {
1510	++ticks;
1511	}
1512	if (ticks)
1513	advance_table(ticks, forwards);
1514	}
1515	if (forwards) {
1516	if (may_tock && this->distance(zero) <= distance)
1517	advance_table();
1518	baseclass::advance(distance);
1519	} else {
1520	if (may_tock && -(this->distance(zero)) <= distance)
1521	advance_table(state_type(`1U`), false);
1522	baseclass::advance(-distance);
1523	}
1524	}
1525
1526	} // namespace pcg_detail
1527
1528	namespace pcg_engines {
1529
1530	using namespace pcg_detail;
1531
1532	/ Predefined types for XSH RS /
1533
1534	typedef oneseq_base<uint8_t, uint16_t, xsh_rs_mixin> oneseq_xsh_rs_16_8;
1535	typedef oneseq_base<uint16_t, uint32_t, xsh_rs_mixin> oneseq_xsh_rs_32_16;
1536	typedef oneseq_base<uint32_t, uint64_t, xsh_rs_mixin> oneseq_xsh_rs_64_32;
1537	typedef oneseq_base<uint64_t, pcg128_t, xsh_rs_mixin> oneseq_xsh_rs_128_64;
1538
1539	typedef unique_base<uint8_t, uint16_t, xsh_rs_mixin> unique_xsh_rs_16_8;
1540	typedef unique_base<uint16_t, uint32_t, xsh_rs_mixin> unique_xsh_rs_32_16;
1541	typedef unique_base<uint32_t, uint64_t, xsh_rs_mixin> unique_xsh_rs_64_32;
1542	typedef unique_base<uint64_t, pcg128_t, xsh_rs_mixin> unique_xsh_rs_128_64;
1543
1544	typedef setseq_base<uint8_t, uint16_t, xsh_rs_mixin> setseq_xsh_rs_16_8;
1545	typedef setseq_base<uint16_t, uint32_t, xsh_rs_mixin> setseq_xsh_rs_32_16;
1546	typedef setseq_base<uint32_t, uint64_t, xsh_rs_mixin> setseq_xsh_rs_64_32;
1547	typedef setseq_base<uint64_t, pcg128_t, xsh_rs_mixin> setseq_xsh_rs_128_64;
1548
1549	typedef mcg_base<uint8_t, uint16_t, xsh_rs_mixin> mcg_xsh_rs_16_8;
1550	typedef mcg_base<uint16_t, uint32_t, xsh_rs_mixin> mcg_xsh_rs_32_16;
1551	typedef mcg_base<uint32_t, uint64_t, xsh_rs_mixin> mcg_xsh_rs_64_32;
1552	typedef mcg_base<uint64_t, pcg128_t, xsh_rs_mixin> mcg_xsh_rs_128_64;
1553
1554	/ Predefined types for XSH RR /
1555
1556	typedef oneseq_base<uint8_t, uint16_t, xsh_rr_mixin> oneseq_xsh_rr_16_8;
1557	typedef oneseq_base<uint16_t, uint32_t, xsh_rr_mixin> oneseq_xsh_rr_32_16;
1558	typedef oneseq_base<uint32_t, uint64_t, xsh_rr_mixin> oneseq_xsh_rr_64_32;
1559	typedef oneseq_base<uint64_t, pcg128_t, xsh_rr_mixin> oneseq_xsh_rr_128_64;
1560
1561	typedef unique_base<uint8_t, uint16_t, xsh_rr_mixin> unique_xsh_rr_16_8;
1562	typedef unique_base<uint16_t, uint32_t, xsh_rr_mixin> unique_xsh_rr_32_16;
1563	typedef unique_base<uint32_t, uint64_t, xsh_rr_mixin> unique_xsh_rr_64_32;
1564	typedef unique_base<uint64_t, pcg128_t, xsh_rr_mixin> unique_xsh_rr_128_64;
1565
1566	typedef setseq_base<uint8_t, uint16_t, xsh_rr_mixin> setseq_xsh_rr_16_8;
1567	typedef setseq_base<uint16_t, uint32_t, xsh_rr_mixin> setseq_xsh_rr_32_16;
1568	typedef setseq_base<uint32_t, uint64_t, xsh_rr_mixin> setseq_xsh_rr_64_32;
1569	typedef setseq_base<uint64_t, pcg128_t, xsh_rr_mixin> setseq_xsh_rr_128_64;
1570
1571	typedef mcg_base<uint8_t, uint16_t, xsh_rr_mixin> mcg_xsh_rr_16_8;
1572	typedef mcg_base<uint16_t, uint32_t, xsh_rr_mixin> mcg_xsh_rr_32_16;
1573	typedef mcg_base<uint32_t, uint64_t, xsh_rr_mixin> mcg_xsh_rr_64_32;
1574	typedef mcg_base<uint64_t, pcg128_t, xsh_rr_mixin> mcg_xsh_rr_128_64;
1575
1576
1577	/ Predefined types for RXS M XS /
1578
1579	typedef oneseq_base<uint8_t, uint8_t, rxs_m_xs_mixin> oneseq_rxs_m_xs_8_8;
1580	typedef oneseq_base<uint16_t, uint16_t, rxs_m_xs_mixin> oneseq_rxs_m_xs_16_16;
1581	typedef oneseq_base<uint32_t, uint32_t, rxs_m_xs_mixin> oneseq_rxs_m_xs_32_32;
1582	typedef oneseq_base<uint64_t, uint64_t, rxs_m_xs_mixin> oneseq_rxs_m_xs_64_64;
1583	typedef oneseq_base<pcg128_t, pcg128_t, rxs_m_xs_mixin> oneseq_rxs_m_xs_128_128;
1584
1585	typedef unique_base<uint8_t, uint8_t, rxs_m_xs_mixin> unique_rxs_m_xs_8_8;
1586	typedef unique_base<uint16_t, uint16_t, rxs_m_xs_mixin> unique_rxs_m_xs_16_16;
1587	typedef unique_base<uint32_t, uint32_t, rxs_m_xs_mixin> unique_rxs_m_xs_32_32;
1588	typedef unique_base<uint64_t, uint64_t, rxs_m_xs_mixin> unique_rxs_m_xs_64_64;
1589	typedef unique_base<pcg128_t, pcg128_t, rxs_m_xs_mixin> unique_rxs_m_xs_128_128;
1590
1591	typedef setseq_base<uint8_t, uint8_t, rxs_m_xs_mixin> setseq_rxs_m_xs_8_8;
1592	typedef setseq_base<uint16_t, uint16_t, rxs_m_xs_mixin> setseq_rxs_m_xs_16_16;
1593	typedef setseq_base<uint32_t, uint32_t, rxs_m_xs_mixin> setseq_rxs_m_xs_32_32;
1594	typedef setseq_base<uint64_t, uint64_t, rxs_m_xs_mixin> setseq_rxs_m_xs_64_64;
1595	typedef setseq_base<pcg128_t, pcg128_t, rxs_m_xs_mixin> setseq_rxs_m_xs_128_128;
1596
1597	// MCG versions don't make sense here, so aren't defined.
1598
1599	/ Predefined types for XSL RR (only defined for "large" types) /
1600
1601	typedef oneseq_base<uint32_t, uint64_t, xsl_rr_mixin> oneseq_xsl_rr_64_32;
1602	typedef oneseq_base<uint64_t, pcg128_t, xsl_rr_mixin> oneseq_xsl_rr_128_64;
1603
1604	typedef unique_base<uint32_t, uint64_t, xsl_rr_mixin> unique_xsl_rr_64_32;
1605	typedef unique_base<uint64_t, pcg128_t, xsl_rr_mixin> unique_xsl_rr_128_64;
1606
1607	typedef setseq_base<uint32_t, uint64_t, xsl_rr_mixin> setseq_xsl_rr_64_32;
1608	typedef setseq_base<uint64_t, pcg128_t, xsl_rr_mixin> setseq_xsl_rr_128_64;
1609
1610	typedef mcg_base<uint32_t, uint64_t, xsl_rr_mixin> mcg_xsl_rr_64_32;
1611	typedef mcg_base<uint64_t, pcg128_t, xsl_rr_mixin> mcg_xsl_rr_128_64;
1612
1613
1614	/ Predefined types for XSL RR RR (only defined for "large" types) /
1615
1616	typedef oneseq_base<uint64_t, uint64_t, xsl_rr_rr_mixin>
1617	oneseq_xsl_rr_rr_64_64;
1618	typedef oneseq_base<pcg128_t, pcg128_t, xsl_rr_rr_mixin>
1619	oneseq_xsl_rr_rr_128_128;
1620
1621	typedef unique_base<uint64_t, uint64_t, xsl_rr_rr_mixin>
1622	unique_xsl_rr_rr_64_64;
1623	typedef unique_base<pcg128_t, pcg128_t, xsl_rr_rr_mixin>
1624	unique_xsl_rr_rr_128_128;
1625
1626	typedef setseq_base<uint64_t, uint64_t, xsl_rr_rr_mixin>
1627	setseq_xsl_rr_rr_64_64;
1628	typedef setseq_base<pcg128_t, pcg128_t, xsl_rr_rr_mixin>
1629	setseq_xsl_rr_rr_128_128;
1630
1631	// MCG versions don't make sense here, so aren't defined.
1632
1633	/ Extended generators /
1634
1635	template <bitcount_t table_pow2, bitcount_t advance_pow2,
1636	typename BaseRNG, bool kdd = true>
1637	using ext_std8 = extended<table_pow2, advance_pow2, BaseRNG,
1638	oneseq_rxs_m_xs_8_8, kdd>;
1639
1640	template <bitcount_t table_pow2, bitcount_t advance_pow2,
1641	typename BaseRNG, bool kdd = true>
1642	using ext_std16 = extended<table_pow2, advance_pow2, BaseRNG,
1643	oneseq_rxs_m_xs_16_16, kdd>;
1644
1645	template <bitcount_t table_pow2, bitcount_t advance_pow2,
1646	typename BaseRNG, bool kdd = true>
1647	using ext_std32 = extended<table_pow2, advance_pow2, BaseRNG,
1648	oneseq_rxs_m_xs_32_32, kdd>;
1649
1650	template <bitcount_t table_pow2, bitcount_t advance_pow2,
1651	typename BaseRNG, bool kdd = true>
1652	using ext_std64 = extended<table_pow2, advance_pow2, BaseRNG,
1653	oneseq_rxs_m_xs_64_64, kdd>;
1654
1655
1656	template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
1657	using ext_oneseq_rxs_m_xs_32_32 =
1658	ext_std32<table_pow2, advance_pow2, oneseq_rxs_m_xs_32_32, kdd>;
1659
1660	template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
1661	using ext_mcg_xsh_rs_64_32 =
1662	ext_std32<table_pow2, advance_pow2, mcg_xsh_rs_64_32, kdd>;
1663
1664	template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
1665	using ext_oneseq_xsh_rs_64_32 =
1666	ext_std32<table_pow2, advance_pow2, oneseq_xsh_rs_64_32, kdd>;
1667
1668	template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
1669	using ext_setseq_xsh_rr_64_32 =
1670	ext_std32<table_pow2, advance_pow2, setseq_xsh_rr_64_32, kdd>;
1671
1672	template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
1673	using ext_mcg_xsl_rr_128_64 =
1674	ext_std64<table_pow2, advance_pow2, mcg_xsl_rr_128_64, kdd>;
1675
1676	template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
1677	using ext_oneseq_xsl_rr_128_64 =
1678	ext_std64<table_pow2, advance_pow2, oneseq_xsl_rr_128_64, kdd>;
1679
1680	template <bitcount_t table_pow2, bitcount_t advance_pow2, bool kdd = true>
1681	using ext_setseq_xsl_rr_128_64 =
1682	ext_std64<table_pow2, advance_pow2, setseq_xsl_rr_128_64, kdd>;
1683
1684	} // namespace pcg_engines
1685
1686	typedef pcg_engines::setseq_xsh_rr_64_32 pcg32;
1687	typedef pcg_engines::oneseq_xsh_rr_64_32 pcg32_oneseq;
1688	typedef pcg_engines::unique_xsh_rr_64_32 pcg32_unique;
1689	typedef pcg_engines::mcg_xsh_rs_64_32 pcg32_fast;
1690
1691	typedef pcg_engines::setseq_xsl_rr_128_64 pcg64;
1692	typedef pcg_engines::oneseq_xsl_rr_128_64 pcg64_oneseq;
1693	typedef pcg_engines::unique_xsl_rr_128_64 pcg64_unique;
1694	typedef pcg_engines::mcg_xsl_rr_128_64 pcg64_fast;
1695
1696	typedef pcg_engines::setseq_rxs_m_xs_8_8 pcg8_once_insecure;
1697	typedef pcg_engines::setseq_rxs_m_xs_16_16 pcg16_once_insecure;
1698	typedef pcg_engines::setseq_rxs_m_xs_32_32 pcg32_once_insecure;
1699	typedef pcg_engines::setseq_rxs_m_xs_64_64 pcg64_once_insecure;
1700	typedef pcg_engines::setseq_xsl_rr_rr_128_128 pcg128_once_insecure;
1701
1702	typedef pcg_engines::oneseq_rxs_m_xs_8_8 pcg8_oneseq_once_insecure;
1703	typedef pcg_engines::oneseq_rxs_m_xs_16_16 pcg16_oneseq_once_insecure;
1704	typedef pcg_engines::oneseq_rxs_m_xs_32_32 pcg32_oneseq_once_insecure;
1705	typedef pcg_engines::oneseq_rxs_m_xs_64_64 pcg64_oneseq_once_insecure;
1706	typedef pcg_engines::oneseq_xsl_rr_rr_128_128 pcg128_oneseq_once_insecure;
1707
1708
1709	// These two extended RNGs provide two-dimensionally equidistributed
1710	// 32-bit generators. pcg32_k2_fast occupies the same space as pcg64,
1711	// and can be called twice to generate 64 bits, but does not required
1712	// 128-bit math; on 32-bit systems, it's faster than pcg64 as well.
1713
1714	typedef pcg_engines::ext_setseq_xsh_rr_64_32<`1`,`16`,true> pcg32_k2;
1715	typedef pcg_engines::ext_oneseq_xsh_rs_64_32<`1`,`32`,true> pcg32_k2_fast;
1716
1717	// These eight extended RNGs have about as much state as arc4random
1718	//
1719	// - the k variants are k-dimensionally equidistributed
1720	// - the c variants offer better crypographic security
1721	//
1722	// (just how good the cryptographic security is is an open question)
1723
1724	typedef pcg_engines::ext_setseq_xsh_rr_64_32<`6`,`16`,true> pcg32_k64;
1725	typedef pcg_engines::ext_mcg_xsh_rs_64_32<`6`,`32`,true> pcg32_k64_oneseq;
1726	typedef pcg_engines::ext_oneseq_xsh_rs_64_32<`6`,`32`,true> pcg32_k64_fast;
1727
1728	typedef pcg_engines::ext_setseq_xsh_rr_64_32<`6`,`16`,false> pcg32_c64;
1729	typedef pcg_engines::ext_oneseq_xsh_rs_64_32<`6`,`32`,false> pcg32_c64_oneseq;
1730	typedef pcg_engines::ext_mcg_xsh_rs_64_32<`6`,`32`,false> pcg32_c64_fast;
1731
1732	typedef pcg_engines::ext_setseq_xsl_rr_128_64<`5`,`16`,true> pcg64_k32;
1733	typedef pcg_engines::ext_oneseq_xsl_rr_128_64<`5`,`128`,true> pcg64_k32_oneseq;
1734	typedef pcg_engines::ext_mcg_xsl_rr_128_64<`5`,`128`,true> pcg64_k32_fast;
1735
1736	typedef pcg_engines::ext_setseq_xsl_rr_128_64<`5`,`16`,false> pcg64_c32;
1737	typedef pcg_engines::ext_oneseq_xsl_rr_128_64<`5`,`128`,false> pcg64_c32_oneseq;
1738	typedef pcg_engines::ext_mcg_xsl_rr_128_64<`5`,`128`,false> pcg64_c32_fast;
1739
1740	// These eight extended RNGs have more state than the Mersenne twister
1741	//
1742	// - the k variants are k-dimensionally equidistributed
1743	// - the c variants offer better crypographic security
1744	//
1745	// (just how good the cryptographic security is is an open question)
1746
1747	typedef pcg_engines::ext_setseq_xsh_rr_64_32<`10`,`16`,true> pcg32_k1024;
1748	typedef pcg_engines::ext_oneseq_xsh_rs_64_32<`10`,`32`,true> pcg32_k1024_fast;
1749
1750	typedef pcg_engines::ext_setseq_xsh_rr_64_32<`10`,`16`,false> pcg32_c1024;
1751	typedef pcg_engines::ext_oneseq_xsh_rs_64_32<`10`,`32`,false> pcg32_c1024_fast;
1752
1753	typedef pcg_engines::ext_setseq_xsl_rr_128_64<`10`,`16`,true> pcg64_k1024;
1754	typedef pcg_engines::ext_oneseq_xsl_rr_128_64<`10`,`128`,true> pcg64_k1024_fast;
1755
1756	typedef pcg_engines::ext_setseq_xsl_rr_128_64<`10`,`16`,false> pcg64_c1024;
1757	typedef pcg_engines::ext_oneseq_xsl_rr_128_64<`10`,`128`,false> pcg64_c1024_fast;
1758
1759	// These generators have an insanely huge period (2^524352), and is suitable
1760	// for silly party tricks, such as dumping out 64 KB ZIP files at an arbitrary
1761	// point in the future. [Actually, over the full period of the generator, it
1762	// will produce every 64 KB ZIP file 2^64 times!]
1763
1764	typedef pcg_engines::ext_setseq_xsh_rr_64_32<`14`,`16`,true> pcg32_k16384;
1765	typedef pcg_engines::ext_oneseq_xsh_rs_64_32<`14`,`32`,true> pcg32_k16384_fast;
1766
1767	#ifdef _MSC_VER
1768	#pragma warning(default:4146)
1769	#endif
1770
1771	#endif // PCG_RAND_HPP_INCLUDED
1772

Browse the source code of ClickHouse/contrib/libpcg-random/include/pcg_random.hpp