| 1 | #include "simdjson/internal/jsoncharutils_tables.h" |
| 2 | #include "simdjson/internal/numberparsing_tables.h" |
| 3 | |
| 4 | namespace simdjson { |
| 5 | namespace SIMDJSON_IMPLEMENTATION { |
| 6 | namespace { |
| 7 | namespace jsoncharutils { |
| 8 | |
| 9 | // return non-zero if not a structural or whitespace char |
| 10 | // zero otherwise |
| 11 | simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) { |
| 12 | return internal::structural_or_whitespace_negated[c]; |
| 13 | } |
| 14 | |
| 15 | simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) { |
| 16 | return internal::structural_or_whitespace[c]; |
| 17 | } |
| 18 | |
| 19 | // returns a value with the high 16 bits set if not valid |
| 20 | // otherwise returns the conversion of the 4 hex digits at src into the bottom |
| 21 | // 16 bits of the 32-bit return register |
| 22 | // |
| 23 | // see |
| 24 | // https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/ |
| 25 | static inline uint32_t hex_to_u32_nocheck( |
| 26 | const uint8_t *src) { // strictly speaking, static inline is a C-ism |
| 27 | uint32_t v1 = internal::digit_to_val32[630 + src[0]]; |
| 28 | uint32_t v2 = internal::digit_to_val32[420 + src[1]]; |
| 29 | uint32_t v3 = internal::digit_to_val32[210 + src[2]]; |
| 30 | uint32_t v4 = internal::digit_to_val32[0 + src[3]]; |
| 31 | return v1 | v2 | v3 | v4; |
| 32 | } |
| 33 | |
| 34 | // given a code point cp, writes to c |
| 35 | // the utf-8 code, outputting the length in |
| 36 | // bytes, if the length is zero, the code point |
| 37 | // is invalid |
| 38 | // |
| 39 | // This can possibly be made faster using pdep |
| 40 | // and clz and table lookups, but JSON documents |
| 41 | // have few escaped code points, and the following |
| 42 | // function looks cheap. |
| 43 | // |
| 44 | // Note: we assume that surrogates are treated separately |
| 45 | // |
| 46 | simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) { |
| 47 | if (cp <= 0x7F) { |
| 48 | c[0] = uint8_t(cp); |
| 49 | return 1; // ascii |
| 50 | } |
| 51 | if (cp <= 0x7FF) { |
| 52 | c[0] = uint8_t((cp >> 6) + 192); |
| 53 | c[1] = uint8_t((cp & 63) + 128); |
| 54 | return 2; // universal plane |
| 55 | // Surrogates are treated elsewhere... |
| 56 | //} //else if (0xd800 <= cp && cp <= 0xdfff) { |
| 57 | // return 0; // surrogates // could put assert here |
| 58 | } else if (cp <= 0xFFFF) { |
| 59 | c[0] = uint8_t((cp >> 12) + 224); |
| 60 | c[1] = uint8_t(((cp >> 6) & 63) + 128); |
| 61 | c[2] = uint8_t((cp & 63) + 128); |
| 62 | return 3; |
| 63 | } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this |
| 64 | // is not needed |
| 65 | c[0] = uint8_t((cp >> 18) + 240); |
| 66 | c[1] = uint8_t(((cp >> 12) & 63) + 128); |
| 67 | c[2] = uint8_t(((cp >> 6) & 63) + 128); |
| 68 | c[3] = uint8_t((cp & 63) + 128); |
| 69 | return 4; |
| 70 | } |
| 71 | // will return 0 when the code point was too large. |
| 72 | return 0; // bad r |
| 73 | } |
| 74 | |
| 75 | #if SIMDJSON_IS_32BITS // _umul128 for x86, arm |
| 76 | // this is a slow emulation routine for 32-bit |
| 77 | // |
| 78 | static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) { |
| 79 | return x * (uint64_t)y; |
| 80 | } |
| 81 | static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) { |
| 82 | uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd); |
| 83 | uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd); |
| 84 | uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32)); |
| 85 | uint64_t adbc_carry = !!(adbc < ad); |
| 86 | uint64_t lo = bd + (adbc << 32); |
| 87 | *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + |
| 88 | (adbc_carry << 32) + !!(lo < bd); |
| 89 | return lo; |
| 90 | } |
| 91 | #endif |
| 92 | |
| 93 | using internal::value128; |
| 94 | |
| 95 | simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) { |
| 96 | value128 answer; |
| 97 | #if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS |
| 98 | #ifdef _M_ARM64 |
| 99 | // ARM64 has native support for 64-bit multiplications, no need to emultate |
| 100 | answer.high = __umulh(value1, value2); |
| 101 | answer.low = value1 * value2; |
| 102 | #else |
| 103 | answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64 |
| 104 | #endif // _M_ARM64 |
| 105 | #else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS |
| 106 | __uint128_t r = (static_cast<__uint128_t>(value1)) * value2; |
| 107 | answer.low = uint64_t(r); |
| 108 | answer.high = uint64_t(r >> 64); |
| 109 | #endif |
| 110 | return answer; |
| 111 | } |
| 112 | |
| 113 | } // namespace jsoncharutils |
| 114 | } // unnamed namespace |
| 115 | } // namespace SIMDJSON_IMPLEMENTATION |
| 116 | } // namespace simdjson |
| 117 | |