1#pragma once
2
3// Based on https://github.com/amdn/itoa and combined with our optimizations
4//
5//=== itoa.h - Fast integer to ascii conversion --*- C++ -*-//
6//
7// The MIT License (MIT)
8// Copyright (c) 2016 Arturo Martin-de-Nicolas
9//
10// Permission is hereby granted, free of charge, to any person obtaining a copy
11// of this software and associated documentation files (the "Software"), to deal
12// in the Software without restriction, including without limitation the rights
13// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14// copies of the Software, and to permit persons to whom the Software is
15// furnished to do so, subject to the following conditions:
16//
17// The above copyright notice and this permission notice shall be included
18// in all copies or substantial portions of the Software.
19//
20// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26// SOFTWARE.
27//===----------------------------------------------------------------------===//
28
29#include <cstdint>
30#include <cstddef>
31#include <cstring>
32#include <type_traits>
33#include "likely.h"
34
35using int128_t = __int128;
36using uint128_t = unsigned __int128;
37
38namespace impl
39{
40
41template <typename T>
42static constexpr T pow10(size_t x)
43{
44 return x ? 10 * pow10<T>(x - 1) : 1;
45}
46
47// Division by a power of 10 is implemented using a multiplicative inverse.
48// This strength reduction is also done by optimizing compilers, but
49// presently the fastest results are produced by using the values
50// for the multiplication and the shift as given by the algorithm
51// described by Agner Fog in "Optimizing Subroutines in Assembly Language"
52//
53// http://www.agner.org/optimize/optimizing_assembly.pdf
54//
55// "Integer division by a constant (all processors)
56// A floating point number can be divided by a constant by multiplying
57// with the reciprocal. If we want to do the same with integers, we have
58// to scale the reciprocal by 2n and then shift the product to the right
59// by n. There are various algorithms for finding a suitable value of n
60// and compensating for rounding errors. The algorithm described below
61// was invented by Terje Mathisen, Norway, and not published elsewhere."
62
63/// Division by constant is performed by:
64/// 1. Adding 1 if needed;
65/// 2. Multiplying by another constant;
66/// 3. Shifting right by another constant.
67template <typename UInt, bool add_, UInt multiplier_, unsigned shift_>
68struct Division
69{
70 static constexpr bool add{add_};
71 static constexpr UInt multiplier{multiplier_};
72 static constexpr unsigned shift{shift_};
73};
74
75/// Select a type with appropriate number of bytes from the list of types.
76/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes.
77/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t.
78template <size_t N, typename T, typename... Ts>
79struct SelectType
80{
81 using Result = typename SelectType<N / 2, Ts...>::Result;
82};
83
84template <typename T, typename... Ts>
85struct SelectType<1, T, Ts...>
86{
87 using Result = T;
88};
89
90
91/// Division by 10^N where N is the size of the type.
92template <size_t N>
93using DivisionBy10PowN = typename SelectType
94<
95 N,
96 Division<uint8_t, 0, 205U, 11>, /// divide by 10
97 Division<uint16_t, 1, 41943U, 22>, /// divide by 100
98 Division<uint32_t, 0, 3518437209U, 45>, /// divide by 10000
99 Division<uint64_t, 0, 12379400392853802749ULL, 90> /// divide by 100000000
100>::Result;
101
102template <size_t N>
103using UnsignedOfSize = typename SelectType
104<
105 N,
106 uint8_t,
107 uint16_t,
108 uint32_t,
109 uint64_t,
110 uint128_t
111>::Result;
112
113/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in
114template <size_t N>
115struct QuotientAndRemainder
116{
117 UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits
118 UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits
119};
120
121template <size_t N>
122QuotientAndRemainder<N> static inline split(UnsignedOfSize<N> value)
123{
124 constexpr DivisionBy10PowN<N> division;
125
126 UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift;
127 UnsignedOfSize<N / 2> remainder = value - quotient * pow10<UnsignedOfSize<N / 2>>(N);
128
129 return {quotient, remainder};
130}
131
132
133static inline char * outDigit(char * p, uint8_t value)
134{
135 *p = '0' + value;
136 ++p;
137 return p;
138}
139
140// Using a lookup table to convert binary numbers from 0 to 99
141// into ascii characters as described by Andrei Alexandrescu in
142// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
143
144static const char digits[201] = "00010203040506070809"
145 "10111213141516171819"
146 "20212223242526272829"
147 "30313233343536373839"
148 "40414243444546474849"
149 "50515253545556575859"
150 "60616263646566676869"
151 "70717273747576777879"
152 "80818283848586878889"
153 "90919293949596979899";
154
155static inline char * outTwoDigits(char * p, uint8_t value)
156{
157 memcpy(p, &digits[value * 2], 2);
158 p += 2;
159 return p;
160}
161
162
163namespace convert
164{
165 template <typename UInt, size_t N = sizeof(UInt)> static char * head(char * p, UInt u);
166 template <typename UInt, size_t N = sizeof(UInt)> static char * tail(char * p, UInt u);
167
168 //===----------------------------------------------------------===//
169 // head: find most significant digit, skip leading zeros
170 //===----------------------------------------------------------===//
171
172 // "x" contains quotient and remainder after division by 10^N
173 // quotient is less than 10^N
174 template <size_t N>
175 static inline char * head(char * p, QuotientAndRemainder<N> x)
176 {
177 p = head(p, UnsignedOfSize<N / 2>(x.quotient));
178 p = tail(p, x.remainder);
179 return p;
180 }
181
182 // "u" is less than 10^2*N
183 template <typename UInt, size_t N>
184 static inline char * head(char * p, UInt u)
185 {
186 return u < pow10<UnsignedOfSize<N>>(N)
187 ? head(p, UnsignedOfSize<N / 2>(u))
188 : head<N>(p, split<N>(u));
189 }
190
191 // recursion base case, selected when "u" is one byte
192 template <>
193 inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
194 {
195 return u < 10
196 ? outDigit(p, u)
197 : outTwoDigits(p, u);
198 }
199
200 //===----------------------------------------------------------===//
201 // tail: produce all digits including leading zeros
202 //===----------------------------------------------------------===//
203
204 // recursive step, "u" is less than 10^2*N
205 template <typename UInt, size_t N>
206 static inline char * tail(char * p, UInt u)
207 {
208 QuotientAndRemainder<N> x = split<N>(u);
209 p = tail(p, UnsignedOfSize<N / 2>(x.quotient));
210 p = tail(p, x.remainder);
211 return p;
212 }
213
214 // recursion base case, selected when "u" is one byte
215 template <>
216 inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
217 {
218 return outTwoDigits(p, u);
219 }
220
221 //===----------------------------------------------------------===//
222 // large values are >= 10^2*N
223 // where x contains quotient and remainder after division by 10^N
224 //===----------------------------------------------------------===//
225
226 template <size_t N>
227 static inline char * large(char * p, QuotientAndRemainder<N> x)
228 {
229 QuotientAndRemainder<N> y = split<N>(x.quotient);
230 p = head(p, UnsignedOfSize<N / 2>(y.quotient));
231 p = tail(p, y.remainder);
232 p = tail(p, x.remainder);
233 return p;
234 }
235
236 //===----------------------------------------------------------===//
237 // handle values of "u" that might be >= 10^2*N
238 // where N is the size of "u" in bytes
239 //===----------------------------------------------------------===//
240
241 template <typename UInt, size_t N = sizeof(UInt)>
242 static inline char * uitoa(char * p, UInt u)
243 {
244 if (u < pow10<UnsignedOfSize<N>>(N))
245 return head(p, UnsignedOfSize<N / 2>(u));
246 QuotientAndRemainder<N> x = split<N>(u);
247
248 return u < pow10<UnsignedOfSize<N>>(2 * N)
249 ? head<N>(p, x)
250 : large<N>(p, x);
251 }
252
253 // selected when "u" is one byte
254 template <>
255 inline char * uitoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
256 {
257 if (u < 10)
258 return outDigit(p, u);
259 else if (u < 100)
260 return outTwoDigits(p, u);
261 else
262 {
263 p = outDigit(p, u / 100);
264 p = outTwoDigits(p, u % 100);
265 return p;
266 }
267 }
268
269 //===----------------------------------------------------------===//
270 // handle unsigned and signed integral operands
271 //===----------------------------------------------------------===//
272
273 // itoa: handle unsigned integral operands (selected by SFINAE)
274 template <typename U, std::enable_if_t<!std::is_signed_v<U> && std::is_integral_v<U>> * = nullptr>
275 static inline char * itoa(U u, char * p)
276 {
277 return convert::uitoa(p, u);
278 }
279
280 // itoa: handle signed integral operands (selected by SFINAE)
281 template <typename I, size_t N = sizeof(I), std::enable_if_t<std::is_signed_v<I> && std::is_integral_v<I>> * = nullptr>
282 static inline char * itoa(I i, char * p)
283 {
284 // Need "mask" to be filled with a copy of the sign bit.
285 // If "i" is a negative value, then the result of "operator >>"
286 // is implementation-defined, though usually it is an arithmetic
287 // right shift that replicates the sign bit.
288 // Use a conditional expression to be portable,
289 // a good optimizing compiler generates an arithmetic right shift
290 // and avoids the conditional branch.
291 UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0;
292 // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>.
293 // Cannot use std::abs() because the result is undefined
294 // in 2's complement systems for the most-negative value.
295 // Want to avoid conditional branch for performance reasons since
296 // CPU branch prediction will be ineffective when negative values
297 // occur randomly.
298 // Let "u" be "i" cast to unsigned type UnsignedOfSize<N>.
299 // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative.
300 // This yields the absolute value with the desired type without
301 // using a conditional branch and without invoking undefined or
302 // implementation defined behavior:
303 UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i);
304 // Unconditionally store a minus sign when producing digits
305 // in a forward direction and increment the pointer only if
306 // the value is in fact negative.
307 // This avoids a conditional branch and is safe because we will
308 // always produce at least one digit and it will overwrite the
309 // minus sign when the value is not negative.
310 *p = '-';
311 p += (mask & 1);
312 p = convert::uitoa(p, u);
313 return p;
314 }
315}
316
317static inline int digits10(uint128_t x)
318{
319 if (x < 10ULL)
320 return 1;
321 if (x < 100ULL)
322 return 2;
323 if (x < 1000ULL)
324 return 3;
325
326 if (x < 1000000000000ULL)
327 {
328 if (x < 100000000ULL)
329 {
330 if (x < 1000000ULL)
331 {
332 if (x < 10000ULL)
333 return 4;
334 else
335 return 5 + (x >= 100000ULL);
336 }
337
338 return 7 + (x >= 10000000ULL);
339 }
340
341 if (x < 10000000000ULL)
342 return 9 + (x >= 1000000000ULL);
343
344 return 11 + (x >= 100000000000ULL);
345 }
346
347 return 12 + digits10(x / 1000000000000ULL);
348}
349
350static inline char * writeUIntText(uint128_t x, char * p)
351{
352 int len = digits10(x);
353 auto pp = p + len;
354 while (x >= 100)
355 {
356 const auto i = x % 100;
357 x /= 100;
358 pp -= 2;
359 outTwoDigits(pp, i);
360 }
361 if (x < 10)
362 *p = '0' + x;
363 else
364 outTwoDigits(p, x);
365 return p + len;
366}
367
368static inline char * writeLeadingMinus(char * pos)
369{
370 *pos = '-';
371 return pos + 1;
372}
373
374static inline char * writeSIntText(int128_t x, char * pos)
375{
376 static const int128_t min_int128 = int128_t(0x8000000000000000ll) << 64;
377
378 if (unlikely(x == min_int128))
379 {
380 memcpy(pos, "-170141183460469231731687303715884105728", 40);
381 return pos + 40;
382 }
383
384 if (x < 0)
385 {
386 x = -x;
387 pos = writeLeadingMinus(pos);
388 }
389 return writeUIntText(static_cast<uint128_t>(x), pos);
390}
391
392}
393
394template <typename I>
395char * itoa(I i, char * p)
396{
397 return impl::convert::itoa(i, p);
398}
399
400template <>
401inline char * itoa<uint128_t>(uint128_t i, char * p)
402{
403 return impl::writeUIntText(i, p);
404}
405
406template <>
407inline char * itoa<int128_t>(int128_t i, char * p)
408{
409 return impl::writeSIntText(i, p);
410}
411