d2s_intrinsics.h source code [PostgreSQL/src/common/d2s_intrinsics.h]

1	/---------------------------------------------------------------------------*
2	*
3	* Ryu floating-point output for double precision.
4	*
5	* Portions Copyright (c) 2018-2019, PostgreSQL Global Development Group
6	*
7	* IDENTIFICATION
8	* src/common/d2s_intrinsics.h
9	*
10	* This is a modification of code taken from github.com/ulfjack/ryu under the
11	* terms of the Boost license (not the Apache license). The original copyright
12	* notice follows:
13	*
14	* Copyright 2018 Ulf Adams
15	*
16	* The contents of this file may be used under the terms of the Apache
17	* License, Version 2.0.
18	*
19	* (See accompanying file LICENSE-Apache or copy at
20	* http://www.apache.org/licenses/LICENSE-2.0)
21	*
22	* Alternatively, the contents of this file may be used under the terms of the
23	* Boost Software License, Version 1.0.
24	*
25	* (See accompanying file LICENSE-Boost or copy at
26	* https://www.boost.org/LICENSE_1_0.txt)
27	*
28	* Unless required by applicable law or agreed to in writing, this software is
29	* distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
30	* KIND, either express or implied.
31	*
32	*---------------------------------------------------------------------------
33	*/
34	#ifndef RYU_D2S_INTRINSICS_H
35	#define RYU_D2S_INTRINSICS_H
36
37	#if defined(HAS_64_BIT_INTRINSICS)
38
39	#include <intrin.h>
40
41	static inline uint64
42	umul128(const uint64 a, const uint64 b, uint64 *const productHi)
43	{
44	return _umul128(a, b, productHi);
45	}
46
47	static inline uint64
48	shiftright128(const uint64 lo, const uint64 hi, const uint32 dist)
49	{
50	/*
51	* For the __shiftright128 intrinsic, the shift value is always modulo 64.
52	* In the current implementation of the double-precision version of Ryu,
53	* the shift value is always < 64. (In the case RYU_OPTIMIZE_SIZE == 0,
54	* the shift value is in the range [49, 58]. Otherwise in the range [2,
55	* 59].) Check this here in case a future change requires larger shift
56	* values. In this case this function needs to be adjusted.
57	*/
58	Assert(dist < `64`);
59	return __shiftright128(lo, hi, (unsigned char) dist);
60	}
61
62	#else /* defined(HAS_64_BIT_INTRINSICS) */
63
64	static inline uint64
65	umul128(const uint64 a, const uint64 b, uint64 *const productHi)
66	{
67	/*
68	* The casts here help MSVC to avoid calls to the __allmul library
69	* function.
70	*/
71	const uint32 aLo = (uint32) a;
72	const uint32 aHi = (uint32) (a >> `32`);
73	const uint32 bLo = (uint32) b;
74	const uint32 bHi = (uint32) (b >> `32`);
75
76	const uint64 b00 = (uint64) aLo * bLo;
77	const uint64 b01 = (uint64) aLo * bHi;
78	const uint64 b10 = (uint64) aHi * bLo;
79	const uint64 b11 = (uint64) aHi * bHi;
80
81	const uint32 b00Lo = (uint32) b00;
82	const uint32 b00Hi = (uint32) (b00 >> `32`);
83
84	const uint64 mid1 = b10 + b00Hi;
85	const uint32 mid1Lo = (uint32) (mid1);
86	const uint32 mid1Hi = (uint32) (mid1 >> `32`);
87
88	const uint64 mid2 = b01 + mid1Lo;
89	const uint32 mid2Lo = (uint32) (mid2);
90	const uint32 mid2Hi = (uint32) (mid2 >> `32`);
91
92	const uint64 pHi = b11 + mid1Hi + mid2Hi;
93	const uint64 pLo = ((uint64) mid2Lo << `32`) + b00Lo;
94
95	*productHi = pHi;
96	return pLo;
97	}
98
99	static inline uint64
100	shiftright128(const uint64 lo, const uint64 hi, const uint32 dist)
101	{
102	/ We don't need to handle the case dist >= 64 here (see above). /
103	Assert(dist < `64`);
104	#if !defined(RYU_32_BIT_PLATFORM)
105	Assert(dist > `0`);
106	return (hi << (`64` - dist)) \| (lo >> dist);
107	#else
108	/ Avoid a 64-bit shift by taking advantage of the range of shift values. /
109	Assert(dist >= `32`);
110	return (hi << (`64` - dist)) \| ((uint32) (lo >> `32`) >> (dist - `32`));
111	#endif
112	}
113
114	#endif /* // defined(HAS_64_BIT_INTRINSICS) */
115
116	#ifdef RYU_32_BIT_PLATFORM
117
118	/ Returns the high 64 bits of the 128-bit product of a and b. /
119	static inline uint64
120	umulh(const uint64 a, const uint64 b)
121	{
122	/*
123	* Reuse the umul128 implementation. Optimizers will likely eliminate the
124	* instructions used to compute the low part of the product.
125	*/
126	uint64 hi;
127
128	umul128(a, b, &hi);
129	return hi;
130	}
131
132	/----*
133	* On 32-bit platforms, compilers typically generate calls to library
134	* functions for 64-bit divisions, even if the divisor is a constant.
135	*
136	* E.g.:
137	* https://bugs.llvm.org/show_bug.cgi?id=37932
138	* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=17958
139	* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37443
140	*
141	* The functions here perform division-by-constant using multiplications
142	* in the same way as 64-bit compilers would do.
143	*
144	* NB:
145	* The multipliers and shift values are the ones generated by clang x64
146	* for expressions like x/5, x/10, etc.
147	*----
148	*/
149
150	static inline uint64
151	div5(const uint64 x)
152	{
153	return umulh(x, UINT64CONST(`0xCCCCCCCCCCCCCCCD`)) >> `2`;
154	}
155
156	static inline uint64
157	div10(const uint64 x)
158	{
159	return umulh(x, UINT64CONST(`0xCCCCCCCCCCCCCCCD`)) >> `3`;
160	}
161
162	static inline uint64
163	div100(const uint64 x)
164	{
165	return umulh(x >> `2`, UINT64CONST(`0x28F5C28F5C28F5C3`)) >> `2`;
166	}
167
168	static inline uint64
169	div1e8(const uint64 x)
170	{
171	return umulh(x, UINT64CONST(`0xABCC77118461CEFD`)) >> `26`;
172	}
173
174	#else /* RYU_32_BIT_PLATFORM */
175
176	static inline uint64
177	div5(const uint64 x)
178	{
179	return x / `5`;
180	}
181
182	static inline uint64
183	div10(const uint64 x)
184	{
185	return x / `10`;
186	}
187
188	static inline uint64
189	div100(const uint64 x)
190	{
191	return x / `100`;
192	}
193
194	static inline uint64
195	div1e8(const uint64 x)
196	{
197	return x / `100000000`;
198	}
199
200	#endif /* RYU_32_BIT_PLATFORM */
201
202	#endif /* RYU_D2S_INTRINSICS_H */
203

Browse the source code of PostgreSQL/src/common/d2s_intrinsics.h