1 | /*--------------------------------------------------------------------------- |
2 | * |
3 | * Ryu floating-point output for double precision. |
4 | * |
5 | * Portions Copyright (c) 2018-2019, PostgreSQL Global Development Group |
6 | * |
7 | * IDENTIFICATION |
8 | * src/common/d2s_intrinsics.h |
9 | * |
10 | * This is a modification of code taken from github.com/ulfjack/ryu under the |
11 | * terms of the Boost license (not the Apache license). The original copyright |
12 | * notice follows: |
13 | * |
14 | * Copyright 2018 Ulf Adams |
15 | * |
16 | * The contents of this file may be used under the terms of the Apache |
17 | * License, Version 2.0. |
18 | * |
19 | * (See accompanying file LICENSE-Apache or copy at |
20 | * http://www.apache.org/licenses/LICENSE-2.0) |
21 | * |
22 | * Alternatively, the contents of this file may be used under the terms of the |
23 | * Boost Software License, Version 1.0. |
24 | * |
25 | * (See accompanying file LICENSE-Boost or copy at |
26 | * https://www.boost.org/LICENSE_1_0.txt) |
27 | * |
28 | * Unless required by applicable law or agreed to in writing, this software is |
29 | * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
30 | * KIND, either express or implied. |
31 | * |
32 | *--------------------------------------------------------------------------- |
33 | */ |
34 | #ifndef RYU_D2S_INTRINSICS_H |
35 | #define RYU_D2S_INTRINSICS_H |
36 | |
37 | #if defined(HAS_64_BIT_INTRINSICS) |
38 | |
39 | #include <intrin.h> |
40 | |
41 | static inline uint64 |
42 | umul128(const uint64 a, const uint64 b, uint64 *const productHi) |
43 | { |
44 | return _umul128(a, b, productHi); |
45 | } |
46 | |
47 | static inline uint64 |
48 | shiftright128(const uint64 lo, const uint64 hi, const uint32 dist) |
49 | { |
50 | /* |
51 | * For the __shiftright128 intrinsic, the shift value is always modulo 64. |
52 | * In the current implementation of the double-precision version of Ryu, |
53 | * the shift value is always < 64. (In the case RYU_OPTIMIZE_SIZE == 0, |
54 | * the shift value is in the range [49, 58]. Otherwise in the range [2, |
55 | * 59].) Check this here in case a future change requires larger shift |
56 | * values. In this case this function needs to be adjusted. |
57 | */ |
58 | Assert(dist < 64); |
59 | return __shiftright128(lo, hi, (unsigned char) dist); |
60 | } |
61 | |
62 | #else /* defined(HAS_64_BIT_INTRINSICS) */ |
63 | |
64 | static inline uint64 |
65 | umul128(const uint64 a, const uint64 b, uint64 *const productHi) |
66 | { |
67 | /* |
68 | * The casts here help MSVC to avoid calls to the __allmul library |
69 | * function. |
70 | */ |
71 | const uint32 aLo = (uint32) a; |
72 | const uint32 aHi = (uint32) (a >> 32); |
73 | const uint32 bLo = (uint32) b; |
74 | const uint32 bHi = (uint32) (b >> 32); |
75 | |
76 | const uint64 b00 = (uint64) aLo * bLo; |
77 | const uint64 b01 = (uint64) aLo * bHi; |
78 | const uint64 b10 = (uint64) aHi * bLo; |
79 | const uint64 b11 = (uint64) aHi * bHi; |
80 | |
81 | const uint32 b00Lo = (uint32) b00; |
82 | const uint32 b00Hi = (uint32) (b00 >> 32); |
83 | |
84 | const uint64 mid1 = b10 + b00Hi; |
85 | const uint32 mid1Lo = (uint32) (mid1); |
86 | const uint32 mid1Hi = (uint32) (mid1 >> 32); |
87 | |
88 | const uint64 mid2 = b01 + mid1Lo; |
89 | const uint32 mid2Lo = (uint32) (mid2); |
90 | const uint32 mid2Hi = (uint32) (mid2 >> 32); |
91 | |
92 | const uint64 pHi = b11 + mid1Hi + mid2Hi; |
93 | const uint64 pLo = ((uint64) mid2Lo << 32) + b00Lo; |
94 | |
95 | *productHi = pHi; |
96 | return pLo; |
97 | } |
98 | |
99 | static inline uint64 |
100 | shiftright128(const uint64 lo, const uint64 hi, const uint32 dist) |
101 | { |
102 | /* We don't need to handle the case dist >= 64 here (see above). */ |
103 | Assert(dist < 64); |
104 | #if !defined(RYU_32_BIT_PLATFORM) |
105 | Assert(dist > 0); |
106 | return (hi << (64 - dist)) | (lo >> dist); |
107 | #else |
108 | /* Avoid a 64-bit shift by taking advantage of the range of shift values. */ |
109 | Assert(dist >= 32); |
110 | return (hi << (64 - dist)) | ((uint32) (lo >> 32) >> (dist - 32)); |
111 | #endif |
112 | } |
113 | |
114 | #endif /* // defined(HAS_64_BIT_INTRINSICS) */ |
115 | |
116 | #ifdef RYU_32_BIT_PLATFORM |
117 | |
118 | /* Returns the high 64 bits of the 128-bit product of a and b. */ |
119 | static inline uint64 |
120 | umulh(const uint64 a, const uint64 b) |
121 | { |
122 | /* |
123 | * Reuse the umul128 implementation. Optimizers will likely eliminate the |
124 | * instructions used to compute the low part of the product. |
125 | */ |
126 | uint64 hi; |
127 | |
128 | umul128(a, b, &hi); |
129 | return hi; |
130 | } |
131 | |
132 | /*---- |
133 | * On 32-bit platforms, compilers typically generate calls to library |
134 | * functions for 64-bit divisions, even if the divisor is a constant. |
135 | * |
136 | * E.g.: |
137 | * https://bugs.llvm.org/show_bug.cgi?id=37932 |
138 | * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=17958 |
139 | * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37443 |
140 | * |
141 | * The functions here perform division-by-constant using multiplications |
142 | * in the same way as 64-bit compilers would do. |
143 | * |
144 | * NB: |
145 | * The multipliers and shift values are the ones generated by clang x64 |
146 | * for expressions like x/5, x/10, etc. |
147 | *---- |
148 | */ |
149 | |
150 | static inline uint64 |
151 | div5(const uint64 x) |
152 | { |
153 | return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 2; |
154 | } |
155 | |
156 | static inline uint64 |
157 | div10(const uint64 x) |
158 | { |
159 | return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 3; |
160 | } |
161 | |
162 | static inline uint64 |
163 | div100(const uint64 x) |
164 | { |
165 | return umulh(x >> 2, UINT64CONST(0x28F5C28F5C28F5C3)) >> 2; |
166 | } |
167 | |
168 | static inline uint64 |
169 | div1e8(const uint64 x) |
170 | { |
171 | return umulh(x, UINT64CONST(0xABCC77118461CEFD)) >> 26; |
172 | } |
173 | |
174 | #else /* RYU_32_BIT_PLATFORM */ |
175 | |
176 | static inline uint64 |
177 | div5(const uint64 x) |
178 | { |
179 | return x / 5; |
180 | } |
181 | |
182 | static inline uint64 |
183 | div10(const uint64 x) |
184 | { |
185 | return x / 10; |
186 | } |
187 | |
188 | static inline uint64 |
189 | div100(const uint64 x) |
190 | { |
191 | return x / 100; |
192 | } |
193 | |
194 | static inline uint64 |
195 | div1e8(const uint64 x) |
196 | { |
197 | return x / 100000000; |
198 | } |
199 | |
200 | #endif /* RYU_32_BIT_PLATFORM */ |
201 | |
202 | #endif /* RYU_D2S_INTRINSICS_H */ |
203 | |