| 1 | /*--------------------------------------------------------------------------- |
| 2 | * |
| 3 | * Ryu floating-point output for double precision. |
| 4 | * |
| 5 | * Portions Copyright (c) 2018-2019, PostgreSQL Global Development Group |
| 6 | * |
| 7 | * IDENTIFICATION |
| 8 | * src/common/d2s_intrinsics.h |
| 9 | * |
| 10 | * This is a modification of code taken from github.com/ulfjack/ryu under the |
| 11 | * terms of the Boost license (not the Apache license). The original copyright |
| 12 | * notice follows: |
| 13 | * |
| 14 | * Copyright 2018 Ulf Adams |
| 15 | * |
| 16 | * The contents of this file may be used under the terms of the Apache |
| 17 | * License, Version 2.0. |
| 18 | * |
| 19 | * (See accompanying file LICENSE-Apache or copy at |
| 20 | * http://www.apache.org/licenses/LICENSE-2.0) |
| 21 | * |
| 22 | * Alternatively, the contents of this file may be used under the terms of the |
| 23 | * Boost Software License, Version 1.0. |
| 24 | * |
| 25 | * (See accompanying file LICENSE-Boost or copy at |
| 26 | * https://www.boost.org/LICENSE_1_0.txt) |
| 27 | * |
| 28 | * Unless required by applicable law or agreed to in writing, this software is |
| 29 | * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 30 | * KIND, either express or implied. |
| 31 | * |
| 32 | *--------------------------------------------------------------------------- |
| 33 | */ |
| 34 | #ifndef RYU_D2S_INTRINSICS_H |
| 35 | #define RYU_D2S_INTRINSICS_H |
| 36 | |
| 37 | #if defined(HAS_64_BIT_INTRINSICS) |
| 38 | |
| 39 | #include <intrin.h> |
| 40 | |
| 41 | static inline uint64 |
| 42 | umul128(const uint64 a, const uint64 b, uint64 *const productHi) |
| 43 | { |
| 44 | return _umul128(a, b, productHi); |
| 45 | } |
| 46 | |
| 47 | static inline uint64 |
| 48 | shiftright128(const uint64 lo, const uint64 hi, const uint32 dist) |
| 49 | { |
| 50 | /* |
| 51 | * For the __shiftright128 intrinsic, the shift value is always modulo 64. |
| 52 | * In the current implementation of the double-precision version of Ryu, |
| 53 | * the shift value is always < 64. (In the case RYU_OPTIMIZE_SIZE == 0, |
| 54 | * the shift value is in the range [49, 58]. Otherwise in the range [2, |
| 55 | * 59].) Check this here in case a future change requires larger shift |
| 56 | * values. In this case this function needs to be adjusted. |
| 57 | */ |
| 58 | Assert(dist < 64); |
| 59 | return __shiftright128(lo, hi, (unsigned char) dist); |
| 60 | } |
| 61 | |
| 62 | #else /* defined(HAS_64_BIT_INTRINSICS) */ |
| 63 | |
| 64 | static inline uint64 |
| 65 | umul128(const uint64 a, const uint64 b, uint64 *const productHi) |
| 66 | { |
| 67 | /* |
| 68 | * The casts here help MSVC to avoid calls to the __allmul library |
| 69 | * function. |
| 70 | */ |
| 71 | const uint32 aLo = (uint32) a; |
| 72 | const uint32 aHi = (uint32) (a >> 32); |
| 73 | const uint32 bLo = (uint32) b; |
| 74 | const uint32 bHi = (uint32) (b >> 32); |
| 75 | |
| 76 | const uint64 b00 = (uint64) aLo * bLo; |
| 77 | const uint64 b01 = (uint64) aLo * bHi; |
| 78 | const uint64 b10 = (uint64) aHi * bLo; |
| 79 | const uint64 b11 = (uint64) aHi * bHi; |
| 80 | |
| 81 | const uint32 b00Lo = (uint32) b00; |
| 82 | const uint32 b00Hi = (uint32) (b00 >> 32); |
| 83 | |
| 84 | const uint64 mid1 = b10 + b00Hi; |
| 85 | const uint32 mid1Lo = (uint32) (mid1); |
| 86 | const uint32 mid1Hi = (uint32) (mid1 >> 32); |
| 87 | |
| 88 | const uint64 mid2 = b01 + mid1Lo; |
| 89 | const uint32 mid2Lo = (uint32) (mid2); |
| 90 | const uint32 mid2Hi = (uint32) (mid2 >> 32); |
| 91 | |
| 92 | const uint64 pHi = b11 + mid1Hi + mid2Hi; |
| 93 | const uint64 pLo = ((uint64) mid2Lo << 32) + b00Lo; |
| 94 | |
| 95 | *productHi = pHi; |
| 96 | return pLo; |
| 97 | } |
| 98 | |
| 99 | static inline uint64 |
| 100 | shiftright128(const uint64 lo, const uint64 hi, const uint32 dist) |
| 101 | { |
| 102 | /* We don't need to handle the case dist >= 64 here (see above). */ |
| 103 | Assert(dist < 64); |
| 104 | #if !defined(RYU_32_BIT_PLATFORM) |
| 105 | Assert(dist > 0); |
| 106 | return (hi << (64 - dist)) | (lo >> dist); |
| 107 | #else |
| 108 | /* Avoid a 64-bit shift by taking advantage of the range of shift values. */ |
| 109 | Assert(dist >= 32); |
| 110 | return (hi << (64 - dist)) | ((uint32) (lo >> 32) >> (dist - 32)); |
| 111 | #endif |
| 112 | } |
| 113 | |
| 114 | #endif /* // defined(HAS_64_BIT_INTRINSICS) */ |
| 115 | |
| 116 | #ifdef RYU_32_BIT_PLATFORM |
| 117 | |
| 118 | /* Returns the high 64 bits of the 128-bit product of a and b. */ |
| 119 | static inline uint64 |
| 120 | umulh(const uint64 a, const uint64 b) |
| 121 | { |
| 122 | /* |
| 123 | * Reuse the umul128 implementation. Optimizers will likely eliminate the |
| 124 | * instructions used to compute the low part of the product. |
| 125 | */ |
| 126 | uint64 hi; |
| 127 | |
| 128 | umul128(a, b, &hi); |
| 129 | return hi; |
| 130 | } |
| 131 | |
| 132 | /*---- |
| 133 | * On 32-bit platforms, compilers typically generate calls to library |
| 134 | * functions for 64-bit divisions, even if the divisor is a constant. |
| 135 | * |
| 136 | * E.g.: |
| 137 | * https://bugs.llvm.org/show_bug.cgi?id=37932 |
| 138 | * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=17958 |
| 139 | * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=37443 |
| 140 | * |
| 141 | * The functions here perform division-by-constant using multiplications |
| 142 | * in the same way as 64-bit compilers would do. |
| 143 | * |
| 144 | * NB: |
| 145 | * The multipliers and shift values are the ones generated by clang x64 |
| 146 | * for expressions like x/5, x/10, etc. |
| 147 | *---- |
| 148 | */ |
| 149 | |
| 150 | static inline uint64 |
| 151 | div5(const uint64 x) |
| 152 | { |
| 153 | return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 2; |
| 154 | } |
| 155 | |
| 156 | static inline uint64 |
| 157 | div10(const uint64 x) |
| 158 | { |
| 159 | return umulh(x, UINT64CONST(0xCCCCCCCCCCCCCCCD)) >> 3; |
| 160 | } |
| 161 | |
| 162 | static inline uint64 |
| 163 | div100(const uint64 x) |
| 164 | { |
| 165 | return umulh(x >> 2, UINT64CONST(0x28F5C28F5C28F5C3)) >> 2; |
| 166 | } |
| 167 | |
| 168 | static inline uint64 |
| 169 | div1e8(const uint64 x) |
| 170 | { |
| 171 | return umulh(x, UINT64CONST(0xABCC77118461CEFD)) >> 26; |
| 172 | } |
| 173 | |
| 174 | #else /* RYU_32_BIT_PLATFORM */ |
| 175 | |
| 176 | static inline uint64 |
| 177 | div5(const uint64 x) |
| 178 | { |
| 179 | return x / 5; |
| 180 | } |
| 181 | |
| 182 | static inline uint64 |
| 183 | div10(const uint64 x) |
| 184 | { |
| 185 | return x / 10; |
| 186 | } |
| 187 | |
| 188 | static inline uint64 |
| 189 | div100(const uint64 x) |
| 190 | { |
| 191 | return x / 100; |
| 192 | } |
| 193 | |
| 194 | static inline uint64 |
| 195 | div1e8(const uint64 x) |
| 196 | { |
| 197 | return x / 100000000; |
| 198 | } |
| 199 | |
| 200 | #endif /* RYU_32_BIT_PLATFORM */ |
| 201 | |
| 202 | #endif /* RYU_D2S_INTRINSICS_H */ |
| 203 | |