| 1 | #pragma once | 
|---|
| 2 |  | 
|---|
| 3 | // Based on https://github.com/amdn/itoa and combined with our optimizations | 
|---|
| 4 | // | 
|---|
| 5 | //=== itoa.h - Fast integer to ascii conversion                   --*- C++ -*-// | 
|---|
| 6 | // | 
|---|
| 7 | // The MIT License (MIT) | 
|---|
| 8 | // Copyright (c) 2016 Arturo Martin-de-Nicolas | 
|---|
| 9 | // | 
|---|
| 10 | // Permission is hereby granted, free of charge, to any person obtaining a copy | 
|---|
| 11 | // of this software and associated documentation files (the "Software"), to deal | 
|---|
| 12 | // in the Software without restriction, including without limitation the rights | 
|---|
| 13 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 
|---|
| 14 | // copies of the Software, and to permit persons to whom the Software is | 
|---|
| 15 | // furnished to do so, subject to the following conditions: | 
|---|
| 16 | // | 
|---|
| 17 | //     The above copyright notice and this permission notice shall be included | 
|---|
| 18 | //     in all copies or substantial portions of the Software. | 
|---|
| 19 | // | 
|---|
| 20 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
|---|
| 21 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
|---|
| 22 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 
|---|
| 23 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
|---|
| 24 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 
|---|
| 25 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | 
|---|
| 26 | // SOFTWARE. | 
|---|
| 27 | //===----------------------------------------------------------------------===// | 
|---|
| 28 |  | 
|---|
| 29 | #include <cstdint> | 
|---|
| 30 | #include <cstddef> | 
|---|
| 31 | #include <cstring> | 
|---|
| 32 | #include <type_traits> | 
|---|
| 33 | #include "likely.h" | 
|---|
| 34 |  | 
|---|
| 35 | using int128_t = __int128; | 
|---|
| 36 | using uint128_t = unsigned __int128; | 
|---|
| 37 |  | 
|---|
| 38 | namespace impl | 
|---|
| 39 | { | 
|---|
| 40 |  | 
|---|
| 41 | template <typename T> | 
|---|
| 42 | static constexpr T pow10(size_t x) | 
|---|
| 43 | { | 
|---|
| 44 | return x ? 10 * pow10<T>(x - 1) : 1; | 
|---|
| 45 | } | 
|---|
| 46 |  | 
|---|
| 47 | // Division by a power of 10 is implemented using a multiplicative inverse. | 
|---|
| 48 | // This strength reduction is also done by optimizing compilers, but | 
|---|
| 49 | // presently the fastest results are produced by using the values | 
|---|
| 50 | // for the multiplication and the shift as given by the algorithm | 
|---|
| 51 | // described by Agner Fog in "Optimizing Subroutines in Assembly Language" | 
|---|
| 52 | // | 
|---|
| 53 | // http://www.agner.org/optimize/optimizing_assembly.pdf | 
|---|
| 54 | // | 
|---|
| 55 | // "Integer division by a constant (all processors) | 
|---|
| 56 | // A floating point number can be divided by a constant by multiplying | 
|---|
| 57 | // with the reciprocal. If we want to do the same with integers, we have | 
|---|
| 58 | // to scale the reciprocal by 2n and then shift the product to the right | 
|---|
| 59 | // by n. There are various algorithms for finding a suitable value of n | 
|---|
| 60 | // and compensating for rounding errors. The algorithm described below | 
|---|
| 61 | // was invented by Terje Mathisen, Norway, and not published elsewhere." | 
|---|
| 62 |  | 
|---|
| 63 | /// Division by constant is performed by: | 
|---|
| 64 | /// 1. Adding 1 if needed; | 
|---|
| 65 | /// 2. Multiplying by another constant; | 
|---|
| 66 | /// 3. Shifting right by another constant. | 
|---|
| 67 | template <typename UInt, bool add_, UInt multiplier_, unsigned shift_> | 
|---|
| 68 | struct Division | 
|---|
| 69 | { | 
|---|
| 70 | static constexpr bool add{add_}; | 
|---|
| 71 | static constexpr UInt multiplier{multiplier_}; | 
|---|
| 72 | static constexpr unsigned shift{shift_}; | 
|---|
| 73 | }; | 
|---|
| 74 |  | 
|---|
| 75 | /// Select a type with appropriate number of bytes from the list of types. | 
|---|
| 76 | /// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes. | 
|---|
| 77 | /// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t. | 
|---|
| 78 | template <size_t N, typename T, typename... Ts> | 
|---|
| 79 | struct SelectType | 
|---|
| 80 | { | 
|---|
| 81 | using Result = typename SelectType<N / 2, Ts...>::Result; | 
|---|
| 82 | }; | 
|---|
| 83 |  | 
|---|
| 84 | template <typename T, typename... Ts> | 
|---|
| 85 | struct SelectType<1, T, Ts...> | 
|---|
| 86 | { | 
|---|
| 87 | using Result = T; | 
|---|
| 88 | }; | 
|---|
| 89 |  | 
|---|
| 90 |  | 
|---|
| 91 | /// Division by 10^N where N is the size of the type. | 
|---|
| 92 | template <size_t N> | 
|---|
| 93 | using DivisionBy10PowN = typename SelectType | 
|---|
| 94 | < | 
|---|
| 95 | N, | 
|---|
| 96 | Division<uint8_t, 0, 205U, 11>,                           /// divide by 10 | 
|---|
| 97 | Division<uint16_t, 1, 41943U, 22>,                        /// divide by 100 | 
|---|
| 98 | Division<uint32_t, 0, 3518437209U, 45>,                   /// divide by 10000 | 
|---|
| 99 | Division<uint64_t, 0, 12379400392853802749ULL, 90>        /// divide by 100000000 | 
|---|
| 100 | >::Result; | 
|---|
| 101 |  | 
|---|
| 102 | template <size_t N> | 
|---|
| 103 | using UnsignedOfSize = typename SelectType | 
|---|
| 104 | < | 
|---|
| 105 | N, | 
|---|
| 106 | uint8_t, | 
|---|
| 107 | uint16_t, | 
|---|
| 108 | uint32_t, | 
|---|
| 109 | uint64_t, | 
|---|
| 110 | uint128_t | 
|---|
| 111 | >::Result; | 
|---|
| 112 |  | 
|---|
| 113 | /// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in | 
|---|
| 114 | template <size_t N> | 
|---|
| 115 | struct QuotientAndRemainder | 
|---|
| 116 | { | 
|---|
| 117 | UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits | 
|---|
| 118 | UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits | 
|---|
| 119 | }; | 
|---|
| 120 |  | 
|---|
| 121 | template <size_t N> | 
|---|
| 122 | QuotientAndRemainder<N> static inline split(UnsignedOfSize<N> value) | 
|---|
| 123 | { | 
|---|
| 124 | constexpr DivisionBy10PowN<N> division; | 
|---|
| 125 |  | 
|---|
| 126 | UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift; | 
|---|
| 127 | UnsignedOfSize<N / 2> remainder = value - quotient * pow10<UnsignedOfSize<N / 2>>(N); | 
|---|
| 128 |  | 
|---|
| 129 | return {quotient, remainder}; | 
|---|
| 130 | } | 
|---|
| 131 |  | 
|---|
| 132 |  | 
|---|
| 133 | static inline char * outDigit(char * p, uint8_t value) | 
|---|
| 134 | { | 
|---|
| 135 | *p = '0' + value; | 
|---|
| 136 | ++p; | 
|---|
| 137 | return p; | 
|---|
| 138 | } | 
|---|
| 139 |  | 
|---|
| 140 | // Using a lookup table to convert binary numbers from 0 to 99 | 
|---|
| 141 | // into ascii characters as described by Andrei Alexandrescu in | 
|---|
| 142 | // https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/ | 
|---|
| 143 |  | 
|---|
| 144 | static const char digits[201] = "00010203040506070809" | 
|---|
| 145 | "10111213141516171819" | 
|---|
| 146 | "20212223242526272829" | 
|---|
| 147 | "30313233343536373839" | 
|---|
| 148 | "40414243444546474849" | 
|---|
| 149 | "50515253545556575859" | 
|---|
| 150 | "60616263646566676869" | 
|---|
| 151 | "70717273747576777879" | 
|---|
| 152 | "80818283848586878889" | 
|---|
| 153 | "90919293949596979899"; | 
|---|
| 154 |  | 
|---|
| 155 | static inline char * outTwoDigits(char * p, uint8_t value) | 
|---|
| 156 | { | 
|---|
| 157 | memcpy(p, &digits[value * 2], 2); | 
|---|
| 158 | p += 2; | 
|---|
| 159 | return p; | 
|---|
| 160 | } | 
|---|
| 161 |  | 
|---|
| 162 |  | 
|---|
| 163 | namespace convert | 
|---|
| 164 | { | 
|---|
| 165 | template <typename UInt, size_t N = sizeof(UInt)> static char * head(char * p, UInt u); | 
|---|
| 166 | template <typename UInt, size_t N = sizeof(UInt)> static char * tail(char * p, UInt u); | 
|---|
| 167 |  | 
|---|
| 168 | //===----------------------------------------------------------===// | 
|---|
| 169 | //     head: find most significant digit, skip leading zeros | 
|---|
| 170 | //===----------------------------------------------------------===// | 
|---|
| 171 |  | 
|---|
| 172 | // "x" contains quotient and remainder after division by 10^N | 
|---|
| 173 | // quotient is less than 10^N | 
|---|
| 174 | template <size_t N> | 
|---|
| 175 | static inline char * head(char * p, QuotientAndRemainder<N> x) | 
|---|
| 176 | { | 
|---|
| 177 | p = head(p, UnsignedOfSize<N / 2>(x.quotient)); | 
|---|
| 178 | p = tail(p, x.remainder); | 
|---|
| 179 | return p; | 
|---|
| 180 | } | 
|---|
| 181 |  | 
|---|
| 182 | // "u" is less than 10^2*N | 
|---|
| 183 | template <typename UInt, size_t N> | 
|---|
| 184 | static inline char * head(char * p, UInt u) | 
|---|
| 185 | { | 
|---|
| 186 | return u < pow10<UnsignedOfSize<N>>(N) | 
|---|
| 187 | ? head(p, UnsignedOfSize<N / 2>(u)) | 
|---|
| 188 | : head<N>(p, split<N>(u)); | 
|---|
| 189 | } | 
|---|
| 190 |  | 
|---|
| 191 | // recursion base case, selected when "u" is one byte | 
|---|
| 192 | template <> | 
|---|
| 193 | inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u) | 
|---|
| 194 | { | 
|---|
| 195 | return u < 10 | 
|---|
| 196 | ? outDigit(p, u) | 
|---|
| 197 | : outTwoDigits(p, u); | 
|---|
| 198 | } | 
|---|
| 199 |  | 
|---|
| 200 | //===----------------------------------------------------------===// | 
|---|
| 201 | //     tail: produce all digits including leading zeros | 
|---|
| 202 | //===----------------------------------------------------------===// | 
|---|
| 203 |  | 
|---|
| 204 | // recursive step, "u" is less than 10^2*N | 
|---|
| 205 | template <typename UInt, size_t N> | 
|---|
| 206 | static inline char * tail(char * p, UInt u) | 
|---|
| 207 | { | 
|---|
| 208 | QuotientAndRemainder<N> x = split<N>(u); | 
|---|
| 209 | p = tail(p, UnsignedOfSize<N / 2>(x.quotient)); | 
|---|
| 210 | p = tail(p, x.remainder); | 
|---|
| 211 | return p; | 
|---|
| 212 | } | 
|---|
| 213 |  | 
|---|
| 214 | // recursion base case, selected when "u" is one byte | 
|---|
| 215 | template <> | 
|---|
| 216 | inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u) | 
|---|
| 217 | { | 
|---|
| 218 | return outTwoDigits(p, u); | 
|---|
| 219 | } | 
|---|
| 220 |  | 
|---|
| 221 | //===----------------------------------------------------------===// | 
|---|
| 222 | // large values are >= 10^2*N | 
|---|
| 223 | // where x contains quotient and remainder after division by 10^N | 
|---|
| 224 | //===----------------------------------------------------------===// | 
|---|
| 225 |  | 
|---|
| 226 | template <size_t N> | 
|---|
| 227 | static inline char * large(char * p, QuotientAndRemainder<N> x) | 
|---|
| 228 | { | 
|---|
| 229 | QuotientAndRemainder<N> y = split<N>(x.quotient); | 
|---|
| 230 | p = head(p, UnsignedOfSize<N / 2>(y.quotient)); | 
|---|
| 231 | p = tail(p, y.remainder); | 
|---|
| 232 | p = tail(p, x.remainder); | 
|---|
| 233 | return p; | 
|---|
| 234 | } | 
|---|
| 235 |  | 
|---|
| 236 | //===----------------------------------------------------------===// | 
|---|
| 237 | // handle values of "u" that might be >= 10^2*N | 
|---|
| 238 | // where N is the size of "u" in bytes | 
|---|
| 239 | //===----------------------------------------------------------===// | 
|---|
| 240 |  | 
|---|
| 241 | template <typename UInt, size_t N = sizeof(UInt)> | 
|---|
| 242 | static inline char * uitoa(char * p, UInt u) | 
|---|
| 243 | { | 
|---|
| 244 | if (u < pow10<UnsignedOfSize<N>>(N)) | 
|---|
| 245 | return head(p, UnsignedOfSize<N / 2>(u)); | 
|---|
| 246 | QuotientAndRemainder<N> x = split<N>(u); | 
|---|
| 247 |  | 
|---|
| 248 | return u < pow10<UnsignedOfSize<N>>(2 * N) | 
|---|
| 249 | ? head<N>(p, x) | 
|---|
| 250 | : large<N>(p, x); | 
|---|
| 251 | } | 
|---|
| 252 |  | 
|---|
| 253 | // selected when "u" is one byte | 
|---|
| 254 | template <> | 
|---|
| 255 | inline char * uitoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u) | 
|---|
| 256 | { | 
|---|
| 257 | if (u < 10) | 
|---|
| 258 | return outDigit(p, u); | 
|---|
| 259 | else if (u < 100) | 
|---|
| 260 | return outTwoDigits(p, u); | 
|---|
| 261 | else | 
|---|
| 262 | { | 
|---|
| 263 | p = outDigit(p, u / 100); | 
|---|
| 264 | p = outTwoDigits(p, u % 100); | 
|---|
| 265 | return p; | 
|---|
| 266 | } | 
|---|
| 267 | } | 
|---|
| 268 |  | 
|---|
| 269 | //===----------------------------------------------------------===// | 
|---|
| 270 | //     handle unsigned and signed integral operands | 
|---|
| 271 | //===----------------------------------------------------------===// | 
|---|
| 272 |  | 
|---|
| 273 | // itoa: handle unsigned integral operands (selected by SFINAE) | 
|---|
| 274 | template <typename U, std::enable_if_t<!std::is_signed_v<U> && std::is_integral_v<U>> * = nullptr> | 
|---|
| 275 | static inline char * itoa(U u, char * p) | 
|---|
| 276 | { | 
|---|
| 277 | return convert::uitoa(p, u); | 
|---|
| 278 | } | 
|---|
| 279 |  | 
|---|
| 280 | // itoa: handle signed integral operands (selected by SFINAE) | 
|---|
| 281 | template <typename I, size_t N = sizeof(I), std::enable_if_t<std::is_signed_v<I> && std::is_integral_v<I>> * = nullptr> | 
|---|
| 282 | static inline char * itoa(I i, char * p) | 
|---|
| 283 | { | 
|---|
| 284 | // Need "mask" to be filled with a copy of the sign bit. | 
|---|
| 285 | // If "i" is a negative value, then the result of "operator >>" | 
|---|
| 286 | // is implementation-defined, though usually it is an arithmetic | 
|---|
| 287 | // right shift that replicates the sign bit. | 
|---|
| 288 | // Use a conditional expression to be portable, | 
|---|
| 289 | // a good optimizing compiler generates an arithmetic right shift | 
|---|
| 290 | // and avoids the conditional branch. | 
|---|
| 291 | UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0; | 
|---|
| 292 | // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>. | 
|---|
| 293 | // Cannot use std::abs() because the result is undefined | 
|---|
| 294 | // in 2's complement systems for the most-negative value. | 
|---|
| 295 | // Want to avoid conditional branch for performance reasons since | 
|---|
| 296 | // CPU branch prediction will be ineffective when negative values | 
|---|
| 297 | // occur randomly. | 
|---|
| 298 | // Let "u" be "i" cast to unsigned type UnsignedOfSize<N>. | 
|---|
| 299 | // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative. | 
|---|
| 300 | // This yields the absolute value with the desired type without | 
|---|
| 301 | // using a conditional branch and without invoking undefined or | 
|---|
| 302 | // implementation defined behavior: | 
|---|
| 303 | UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i); | 
|---|
| 304 | // Unconditionally store a minus sign when producing digits | 
|---|
| 305 | // in a forward direction and increment the pointer only if | 
|---|
| 306 | // the value is in fact negative. | 
|---|
| 307 | // This avoids a conditional branch and is safe because we will | 
|---|
| 308 | // always produce at least one digit and it will overwrite the | 
|---|
| 309 | // minus sign when the value is not negative. | 
|---|
| 310 | *p = '-'; | 
|---|
| 311 | p += (mask & 1); | 
|---|
| 312 | p = convert::uitoa(p, u); | 
|---|
| 313 | return p; | 
|---|
| 314 | } | 
|---|
| 315 | } | 
|---|
| 316 |  | 
|---|
| 317 | static inline int digits10(uint128_t x) | 
|---|
| 318 | { | 
|---|
| 319 | if (x < 10ULL) | 
|---|
| 320 | return 1; | 
|---|
| 321 | if (x < 100ULL) | 
|---|
| 322 | return 2; | 
|---|
| 323 | if (x < 1000ULL) | 
|---|
| 324 | return 3; | 
|---|
| 325 |  | 
|---|
| 326 | if (x < 1000000000000ULL) | 
|---|
| 327 | { | 
|---|
| 328 | if (x < 100000000ULL) | 
|---|
| 329 | { | 
|---|
| 330 | if (x < 1000000ULL) | 
|---|
| 331 | { | 
|---|
| 332 | if (x < 10000ULL) | 
|---|
| 333 | return 4; | 
|---|
| 334 | else | 
|---|
| 335 | return 5 + (x >= 100000ULL); | 
|---|
| 336 | } | 
|---|
| 337 |  | 
|---|
| 338 | return 7 + (x >= 10000000ULL); | 
|---|
| 339 | } | 
|---|
| 340 |  | 
|---|
| 341 | if (x < 10000000000ULL) | 
|---|
| 342 | return 9 + (x >= 1000000000ULL); | 
|---|
| 343 |  | 
|---|
| 344 | return 11 + (x >= 100000000000ULL); | 
|---|
| 345 | } | 
|---|
| 346 |  | 
|---|
| 347 | return 12 + digits10(x / 1000000000000ULL); | 
|---|
| 348 | } | 
|---|
| 349 |  | 
|---|
| 350 | static inline char * writeUIntText(uint128_t x, char * p) | 
|---|
| 351 | { | 
|---|
| 352 | int len = digits10(x); | 
|---|
| 353 | auto pp = p + len; | 
|---|
| 354 | while (x >= 100) | 
|---|
| 355 | { | 
|---|
| 356 | const auto i = x % 100; | 
|---|
| 357 | x /= 100; | 
|---|
| 358 | pp -= 2; | 
|---|
| 359 | outTwoDigits(pp, i); | 
|---|
| 360 | } | 
|---|
| 361 | if (x < 10) | 
|---|
| 362 | *p = '0' + x; | 
|---|
| 363 | else | 
|---|
| 364 | outTwoDigits(p, x); | 
|---|
| 365 | return p + len; | 
|---|
| 366 | } | 
|---|
| 367 |  | 
|---|
| 368 | static inline char * writeLeadingMinus(char * pos) | 
|---|
| 369 | { | 
|---|
| 370 | *pos = '-'; | 
|---|
| 371 | return pos + 1; | 
|---|
| 372 | } | 
|---|
| 373 |  | 
|---|
| 374 | static inline char * writeSIntText(int128_t x, char * pos) | 
|---|
| 375 | { | 
|---|
| 376 | static const int128_t min_int128 = int128_t(0x8000000000000000ll) << 64; | 
|---|
| 377 |  | 
|---|
| 378 | if (unlikely(x == min_int128)) | 
|---|
| 379 | { | 
|---|
| 380 | memcpy(pos, "-170141183460469231731687303715884105728", 40); | 
|---|
| 381 | return pos + 40; | 
|---|
| 382 | } | 
|---|
| 383 |  | 
|---|
| 384 | if (x < 0) | 
|---|
| 385 | { | 
|---|
| 386 | x = -x; | 
|---|
| 387 | pos = writeLeadingMinus(pos); | 
|---|
| 388 | } | 
|---|
| 389 | return writeUIntText(static_cast<uint128_t>(x), pos); | 
|---|
| 390 | } | 
|---|
| 391 |  | 
|---|
| 392 | } | 
|---|
| 393 |  | 
|---|
| 394 | template <typename I> | 
|---|
| 395 | char * itoa(I i, char * p) | 
|---|
| 396 | { | 
|---|
| 397 | return impl::convert::itoa(i, p); | 
|---|
| 398 | } | 
|---|
| 399 |  | 
|---|
| 400 | template <> | 
|---|
| 401 | inline char * itoa<uint128_t>(uint128_t i, char * p) | 
|---|
| 402 | { | 
|---|
| 403 | return impl::writeUIntText(i, p); | 
|---|
| 404 | } | 
|---|
| 405 |  | 
|---|
| 406 | template <> | 
|---|
| 407 | inline char * itoa<int128_t>(int128_t i, char * p) | 
|---|
| 408 | { | 
|---|
| 409 | return impl::writeSIntText(i, p); | 
|---|
| 410 | } | 
|---|
| 411 |  | 
|---|