1 | #pragma once |
2 | |
3 | // Based on https://github.com/amdn/itoa and combined with our optimizations |
4 | // |
5 | //=== itoa.h - Fast integer to ascii conversion --*- C++ -*-// |
6 | // |
7 | // The MIT License (MIT) |
8 | // Copyright (c) 2016 Arturo Martin-de-Nicolas |
9 | // |
10 | // Permission is hereby granted, free of charge, to any person obtaining a copy |
11 | // of this software and associated documentation files (the "Software"), to deal |
12 | // in the Software without restriction, including without limitation the rights |
13 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
14 | // copies of the Software, and to permit persons to whom the Software is |
15 | // furnished to do so, subject to the following conditions: |
16 | // |
17 | // The above copyright notice and this permission notice shall be included |
18 | // in all copies or substantial portions of the Software. |
19 | // |
20 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
21 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
22 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
23 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
24 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
25 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
26 | // SOFTWARE. |
27 | //===----------------------------------------------------------------------===// |
28 | |
29 | #include <cstdint> |
30 | #include <cstddef> |
31 | #include <cstring> |
32 | #include <type_traits> |
33 | #include "likely.h" |
34 | |
35 | using int128_t = __int128; |
36 | using uint128_t = unsigned __int128; |
37 | |
38 | namespace impl |
39 | { |
40 | |
41 | template <typename T> |
42 | static constexpr T pow10(size_t x) |
43 | { |
44 | return x ? 10 * pow10<T>(x - 1) : 1; |
45 | } |
46 | |
47 | // Division by a power of 10 is implemented using a multiplicative inverse. |
48 | // This strength reduction is also done by optimizing compilers, but |
49 | // presently the fastest results are produced by using the values |
50 | // for the multiplication and the shift as given by the algorithm |
51 | // described by Agner Fog in "Optimizing Subroutines in Assembly Language" |
52 | // |
53 | // http://www.agner.org/optimize/optimizing_assembly.pdf |
54 | // |
55 | // "Integer division by a constant (all processors) |
56 | // A floating point number can be divided by a constant by multiplying |
57 | // with the reciprocal. If we want to do the same with integers, we have |
58 | // to scale the reciprocal by 2n and then shift the product to the right |
59 | // by n. There are various algorithms for finding a suitable value of n |
60 | // and compensating for rounding errors. The algorithm described below |
61 | // was invented by Terje Mathisen, Norway, and not published elsewhere." |
62 | |
63 | /// Division by constant is performed by: |
64 | /// 1. Adding 1 if needed; |
65 | /// 2. Multiplying by another constant; |
66 | /// 3. Shifting right by another constant. |
67 | template <typename UInt, bool add_, UInt multiplier_, unsigned shift_> |
68 | struct Division |
69 | { |
70 | static constexpr bool add{add_}; |
71 | static constexpr UInt multiplier{multiplier_}; |
72 | static constexpr unsigned shift{shift_}; |
73 | }; |
74 | |
75 | /// Select a type with appropriate number of bytes from the list of types. |
76 | /// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes. |
77 | /// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t. |
78 | template <size_t N, typename T, typename... Ts> |
79 | struct SelectType |
80 | { |
81 | using Result = typename SelectType<N / 2, Ts...>::Result; |
82 | }; |
83 | |
84 | template <typename T, typename... Ts> |
85 | struct SelectType<1, T, Ts...> |
86 | { |
87 | using Result = T; |
88 | }; |
89 | |
90 | |
91 | /// Division by 10^N where N is the size of the type. |
92 | template <size_t N> |
93 | using DivisionBy10PowN = typename SelectType |
94 | < |
95 | N, |
96 | Division<uint8_t, 0, 205U, 11>, /// divide by 10 |
97 | Division<uint16_t, 1, 41943U, 22>, /// divide by 100 |
98 | Division<uint32_t, 0, 3518437209U, 45>, /// divide by 10000 |
99 | Division<uint64_t, 0, 12379400392853802749ULL, 90> /// divide by 100000000 |
100 | >::Result; |
101 | |
102 | template <size_t N> |
103 | using UnsignedOfSize = typename SelectType |
104 | < |
105 | N, |
106 | uint8_t, |
107 | uint16_t, |
108 | uint32_t, |
109 | uint64_t, |
110 | uint128_t |
111 | >::Result; |
112 | |
113 | /// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in |
114 | template <size_t N> |
115 | struct QuotientAndRemainder |
116 | { |
117 | UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits |
118 | UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits |
119 | }; |
120 | |
121 | template <size_t N> |
122 | QuotientAndRemainder<N> static inline split(UnsignedOfSize<N> value) |
123 | { |
124 | constexpr DivisionBy10PowN<N> division; |
125 | |
126 | UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift; |
127 | UnsignedOfSize<N / 2> remainder = value - quotient * pow10<UnsignedOfSize<N / 2>>(N); |
128 | |
129 | return {quotient, remainder}; |
130 | } |
131 | |
132 | |
133 | static inline char * outDigit(char * p, uint8_t value) |
134 | { |
135 | *p = '0' + value; |
136 | ++p; |
137 | return p; |
138 | } |
139 | |
140 | // Using a lookup table to convert binary numbers from 0 to 99 |
141 | // into ascii characters as described by Andrei Alexandrescu in |
142 | // https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ |
143 | |
144 | static const char digits[201] = "00010203040506070809" |
145 | "10111213141516171819" |
146 | "20212223242526272829" |
147 | "30313233343536373839" |
148 | "40414243444546474849" |
149 | "50515253545556575859" |
150 | "60616263646566676869" |
151 | "70717273747576777879" |
152 | "80818283848586878889" |
153 | "90919293949596979899" ; |
154 | |
155 | static inline char * outTwoDigits(char * p, uint8_t value) |
156 | { |
157 | memcpy(p, &digits[value * 2], 2); |
158 | p += 2; |
159 | return p; |
160 | } |
161 | |
162 | |
163 | namespace convert |
164 | { |
165 | template <typename UInt, size_t N = sizeof(UInt)> static char * head(char * p, UInt u); |
166 | template <typename UInt, size_t N = sizeof(UInt)> static char * tail(char * p, UInt u); |
167 | |
168 | //===----------------------------------------------------------===// |
169 | // head: find most significant digit, skip leading zeros |
170 | //===----------------------------------------------------------===// |
171 | |
172 | // "x" contains quotient and remainder after division by 10^N |
173 | // quotient is less than 10^N |
174 | template <size_t N> |
175 | static inline char * head(char * p, QuotientAndRemainder<N> x) |
176 | { |
177 | p = head(p, UnsignedOfSize<N / 2>(x.quotient)); |
178 | p = tail(p, x.remainder); |
179 | return p; |
180 | } |
181 | |
182 | // "u" is less than 10^2*N |
183 | template <typename UInt, size_t N> |
184 | static inline char * head(char * p, UInt u) |
185 | { |
186 | return u < pow10<UnsignedOfSize<N>>(N) |
187 | ? head(p, UnsignedOfSize<N / 2>(u)) |
188 | : head<N>(p, split<N>(u)); |
189 | } |
190 | |
191 | // recursion base case, selected when "u" is one byte |
192 | template <> |
193 | inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u) |
194 | { |
195 | return u < 10 |
196 | ? outDigit(p, u) |
197 | : outTwoDigits(p, u); |
198 | } |
199 | |
200 | //===----------------------------------------------------------===// |
201 | // tail: produce all digits including leading zeros |
202 | //===----------------------------------------------------------===// |
203 | |
204 | // recursive step, "u" is less than 10^2*N |
205 | template <typename UInt, size_t N> |
206 | static inline char * tail(char * p, UInt u) |
207 | { |
208 | QuotientAndRemainder<N> x = split<N>(u); |
209 | p = tail(p, UnsignedOfSize<N / 2>(x.quotient)); |
210 | p = tail(p, x.remainder); |
211 | return p; |
212 | } |
213 | |
214 | // recursion base case, selected when "u" is one byte |
215 | template <> |
216 | inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u) |
217 | { |
218 | return outTwoDigits(p, u); |
219 | } |
220 | |
221 | //===----------------------------------------------------------===// |
222 | // large values are >= 10^2*N |
223 | // where x contains quotient and remainder after division by 10^N |
224 | //===----------------------------------------------------------===// |
225 | |
226 | template <size_t N> |
227 | static inline char * large(char * p, QuotientAndRemainder<N> x) |
228 | { |
229 | QuotientAndRemainder<N> y = split<N>(x.quotient); |
230 | p = head(p, UnsignedOfSize<N / 2>(y.quotient)); |
231 | p = tail(p, y.remainder); |
232 | p = tail(p, x.remainder); |
233 | return p; |
234 | } |
235 | |
236 | //===----------------------------------------------------------===// |
237 | // handle values of "u" that might be >= 10^2*N |
238 | // where N is the size of "u" in bytes |
239 | //===----------------------------------------------------------===// |
240 | |
241 | template <typename UInt, size_t N = sizeof(UInt)> |
242 | static inline char * uitoa(char * p, UInt u) |
243 | { |
244 | if (u < pow10<UnsignedOfSize<N>>(N)) |
245 | return head(p, UnsignedOfSize<N / 2>(u)); |
246 | QuotientAndRemainder<N> x = split<N>(u); |
247 | |
248 | return u < pow10<UnsignedOfSize<N>>(2 * N) |
249 | ? head<N>(p, x) |
250 | : large<N>(p, x); |
251 | } |
252 | |
253 | // selected when "u" is one byte |
254 | template <> |
255 | inline char * uitoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u) |
256 | { |
257 | if (u < 10) |
258 | return outDigit(p, u); |
259 | else if (u < 100) |
260 | return outTwoDigits(p, u); |
261 | else |
262 | { |
263 | p = outDigit(p, u / 100); |
264 | p = outTwoDigits(p, u % 100); |
265 | return p; |
266 | } |
267 | } |
268 | |
269 | //===----------------------------------------------------------===// |
270 | // handle unsigned and signed integral operands |
271 | //===----------------------------------------------------------===// |
272 | |
273 | // itoa: handle unsigned integral operands (selected by SFINAE) |
274 | template <typename U, std::enable_if_t<!std::is_signed_v<U> && std::is_integral_v<U>> * = nullptr> |
275 | static inline char * itoa(U u, char * p) |
276 | { |
277 | return convert::uitoa(p, u); |
278 | } |
279 | |
280 | // itoa: handle signed integral operands (selected by SFINAE) |
281 | template <typename I, size_t N = sizeof(I), std::enable_if_t<std::is_signed_v<I> && std::is_integral_v<I>> * = nullptr> |
282 | static inline char * itoa(I i, char * p) |
283 | { |
284 | // Need "mask" to be filled with a copy of the sign bit. |
285 | // If "i" is a negative value, then the result of "operator >>" |
286 | // is implementation-defined, though usually it is an arithmetic |
287 | // right shift that replicates the sign bit. |
288 | // Use a conditional expression to be portable, |
289 | // a good optimizing compiler generates an arithmetic right shift |
290 | // and avoids the conditional branch. |
291 | UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0; |
292 | // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>. |
293 | // Cannot use std::abs() because the result is undefined |
294 | // in 2's complement systems for the most-negative value. |
295 | // Want to avoid conditional branch for performance reasons since |
296 | // CPU branch prediction will be ineffective when negative values |
297 | // occur randomly. |
298 | // Let "u" be "i" cast to unsigned type UnsignedOfSize<N>. |
299 | // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative. |
300 | // This yields the absolute value with the desired type without |
301 | // using a conditional branch and without invoking undefined or |
302 | // implementation defined behavior: |
303 | UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i); |
304 | // Unconditionally store a minus sign when producing digits |
305 | // in a forward direction and increment the pointer only if |
306 | // the value is in fact negative. |
307 | // This avoids a conditional branch and is safe because we will |
308 | // always produce at least one digit and it will overwrite the |
309 | // minus sign when the value is not negative. |
310 | *p = '-'; |
311 | p += (mask & 1); |
312 | p = convert::uitoa(p, u); |
313 | return p; |
314 | } |
315 | } |
316 | |
317 | static inline int digits10(uint128_t x) |
318 | { |
319 | if (x < 10ULL) |
320 | return 1; |
321 | if (x < 100ULL) |
322 | return 2; |
323 | if (x < 1000ULL) |
324 | return 3; |
325 | |
326 | if (x < 1000000000000ULL) |
327 | { |
328 | if (x < 100000000ULL) |
329 | { |
330 | if (x < 1000000ULL) |
331 | { |
332 | if (x < 10000ULL) |
333 | return 4; |
334 | else |
335 | return 5 + (x >= 100000ULL); |
336 | } |
337 | |
338 | return 7 + (x >= 10000000ULL); |
339 | } |
340 | |
341 | if (x < 10000000000ULL) |
342 | return 9 + (x >= 1000000000ULL); |
343 | |
344 | return 11 + (x >= 100000000000ULL); |
345 | } |
346 | |
347 | return 12 + digits10(x / 1000000000000ULL); |
348 | } |
349 | |
350 | static inline char * writeUIntText(uint128_t x, char * p) |
351 | { |
352 | int len = digits10(x); |
353 | auto pp = p + len; |
354 | while (x >= 100) |
355 | { |
356 | const auto i = x % 100; |
357 | x /= 100; |
358 | pp -= 2; |
359 | outTwoDigits(pp, i); |
360 | } |
361 | if (x < 10) |
362 | *p = '0' + x; |
363 | else |
364 | outTwoDigits(p, x); |
365 | return p + len; |
366 | } |
367 | |
368 | static inline char * writeLeadingMinus(char * pos) |
369 | { |
370 | *pos = '-'; |
371 | return pos + 1; |
372 | } |
373 | |
374 | static inline char * writeSIntText(int128_t x, char * pos) |
375 | { |
376 | static const int128_t min_int128 = int128_t(0x8000000000000000ll) << 64; |
377 | |
378 | if (unlikely(x == min_int128)) |
379 | { |
380 | memcpy(pos, "-170141183460469231731687303715884105728" , 40); |
381 | return pos + 40; |
382 | } |
383 | |
384 | if (x < 0) |
385 | { |
386 | x = -x; |
387 | pos = writeLeadingMinus(pos); |
388 | } |
389 | return writeUIntText(static_cast<uint128_t>(x), pos); |
390 | } |
391 | |
392 | } |
393 | |
394 | template <typename I> |
395 | char * itoa(I i, char * p) |
396 | { |
397 | return impl::convert::itoa(i, p); |
398 | } |
399 | |
400 | template <> |
401 | inline char * itoa<uint128_t>(uint128_t i, char * p) |
402 | { |
403 | return impl::writeUIntText(i, p); |
404 | } |
405 | |
406 | template <> |
407 | inline char * itoa<int128_t>(int128_t i, char * p) |
408 | { |
409 | return impl::writeSIntText(i, p); |
410 | } |
411 | |