1#include "simdjson/internal/jsoncharutils_tables.h"
2#include "simdjson/internal/numberparsing_tables.h"
3
4namespace simdjson {
5namespace SIMDJSON_IMPLEMENTATION {
6namespace {
7namespace jsoncharutils {
8
9// return non-zero if not a structural or whitespace char
10// zero otherwise
11simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
12 return internal::structural_or_whitespace_negated[c];
13}
14
15simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) {
16 return internal::structural_or_whitespace[c];
17}
18
19// returns a value with the high 16 bits set if not valid
20// otherwise returns the conversion of the 4 hex digits at src into the bottom
21// 16 bits of the 32-bit return register
22//
23// see
24// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
25static inline uint32_t hex_to_u32_nocheck(
26 const uint8_t *src) { // strictly speaking, static inline is a C-ism
27 uint32_t v1 = internal::digit_to_val32[630 + src[0]];
28 uint32_t v2 = internal::digit_to_val32[420 + src[1]];
29 uint32_t v3 = internal::digit_to_val32[210 + src[2]];
30 uint32_t v4 = internal::digit_to_val32[0 + src[3]];
31 return v1 | v2 | v3 | v4;
32}
33
34// given a code point cp, writes to c
35// the utf-8 code, outputting the length in
36// bytes, if the length is zero, the code point
37// is invalid
38//
39// This can possibly be made faster using pdep
40// and clz and table lookups, but JSON documents
41// have few escaped code points, and the following
42// function looks cheap.
43//
44// Note: we assume that surrogates are treated separately
45//
46simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
47 if (cp <= 0x7F) {
48 c[0] = uint8_t(cp);
49 return 1; // ascii
50 }
51 if (cp <= 0x7FF) {
52 c[0] = uint8_t((cp >> 6) + 192);
53 c[1] = uint8_t((cp & 63) + 128);
54 return 2; // universal plane
55 // Surrogates are treated elsewhere...
56 //} //else if (0xd800 <= cp && cp <= 0xdfff) {
57 // return 0; // surrogates // could put assert here
58 } else if (cp <= 0xFFFF) {
59 c[0] = uint8_t((cp >> 12) + 224);
60 c[1] = uint8_t(((cp >> 6) & 63) + 128);
61 c[2] = uint8_t((cp & 63) + 128);
62 return 3;
63 } else if (cp <= 0x10FFFF) { // if you know you have a valid code point, this
64 // is not needed
65 c[0] = uint8_t((cp >> 18) + 240);
66 c[1] = uint8_t(((cp >> 12) & 63) + 128);
67 c[2] = uint8_t(((cp >> 6) & 63) + 128);
68 c[3] = uint8_t((cp & 63) + 128);
69 return 4;
70 }
71 // will return 0 when the code point was too large.
72 return 0; // bad r
73}
74
75#if SIMDJSON_IS_32BITS // _umul128 for x86, arm
76// this is a slow emulation routine for 32-bit
77//
78static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) {
79 return x * (uint64_t)y;
80}
81static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
82 uint64_t ad = __emulu((uint32_t)(ab >> 32), (uint32_t)cd);
83 uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
84 uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> 32));
85 uint64_t adbc_carry = !!(adbc < ad);
86 uint64_t lo = bd + (adbc << 32);
87 *hi = __emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) +
88 (adbc_carry << 32) + !!(lo < bd);
89 return lo;
90}
91#endif
92
93using internal::value128;
94
95simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
96 value128 answer;
97#if SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS
98#ifdef _M_ARM64
99 // ARM64 has native support for 64-bit multiplications, no need to emultate
100 answer.high = __umulh(value1, value2);
101 answer.low = value1 * value2;
102#else
103 answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
104#endif // _M_ARM64
105#else // SIMDJSON_REGULAR_VISUAL_STUDIO || SIMDJSON_IS_32BITS
106 __uint128_t r = (static_cast<__uint128_t>(value1)) * value2;
107 answer.low = uint64_t(r);
108 answer.high = uint64_t(r >> 64);
109#endif
110 return answer;
111}
112
113} // namespace jsoncharutils
114} // unnamed namespace
115} // namespace SIMDJSON_IMPLEMENTATION
116} // namespace simdjson
117