jsoncharutils.h source code [Velox/build/_deps/simdjson-src/include/simdjson/generic/jsoncharutils.h]

1	#include "simdjson/internal/jsoncharutils_tables.h"
2	#include "simdjson/internal/numberparsing_tables.h"
3
4	namespace simdjson {
5	namespace SIMDJSON_IMPLEMENTATION {
6	namespace {
7	namespace jsoncharutils {
8
9	// return non-zero if not a structural or whitespace char
10	// zero otherwise
11	simdjson_inline uint32_t is_not_structural_or_whitespace(uint8_t c) {
12	return internal::structural_or_whitespace_negated[c];
13	}
14
15	simdjson_inline uint32_t is_structural_or_whitespace(uint8_t c) {
16	return internal::structural_or_whitespace[c];
17	}
18
19	// returns a value with the high 16 bits set if not valid
20	// otherwise returns the conversion of the 4 hex digits at src into the bottom
21	// 16 bits of the 32-bit return register
22	//
23	// see
24	// https://lemire.me/blog/2019/04/17/parsing-short-hexadecimal-strings-efficiently/
25	static inline uint32_t hex_to_u32_nocheck(
26	const uint8_t src) { // strictly speaking, static inline is a C-ism*
27	uint32_t v1 = internal::digit_to_val32[`630` + src[`0`]];
28	uint32_t v2 = internal::digit_to_val32[`420` + src[`1`]];
29	uint32_t v3 = internal::digit_to_val32[`210` + src[`2`]];
30	uint32_t v4 = internal::digit_to_val32[`0` + src[`3`]];
31	return v1 \| v2 \| v3 \| v4;
32	}
33
34	// given a code point cp, writes to c
35	// the utf-8 code, outputting the length in
36	// bytes, if the length is zero, the code point
37	// is invalid
38	//
39	// This can possibly be made faster using pdep
40	// and clz and table lookups, but JSON documents
41	// have few escaped code points, and the following
42	// function looks cheap.
43	//
44	// Note: we assume that surrogates are treated separately
45	//
46	simdjson_inline size_t codepoint_to_utf8(uint32_t cp, uint8_t *c) {
47	if (cp <= `0x7F`) {
48	c[`0`] = uint8_t(cp);
49	return `1`; // ascii
50	}
51	if (cp <= `0x7FF`) {
52	c[`0`] = uint8_t((cp >> `6`) + `192`);
53	c[`1`] = uint8_t((cp & `63`) + `128`);
54	return `2`; // universal plane
55	// Surrogates are treated elsewhere...
56	//} //else if (0xd800 <= cp && cp <= 0xdfff) {
57	// return 0; // surrogates // could put assert here
58	} else if (cp <= `0xFFFF`) {
59	c[`0`] = uint8_t((cp >> `12`) + `224`);
60	c[`1`] = uint8_t(((cp >> `6`) & `63`) + `128`);
61	c[`2`] = uint8_t((cp & `63`) + `128`);
62	return `3`;
63	} else if (cp <= `0x10FFFF`) { // if you know you have a valid code point, this
64	// is not needed
65	c[`0`] = uint8_t((cp >> `18`) + `240`);
66	c[`1`] = uint8_t(((cp >> `12`) & `63`) + `128`);
67	c[`2`] = uint8_t(((cp >> `6`) & `63`) + `128`);
68	c[`3`] = uint8_t((cp & `63`) + `128`);
69	return `4`;
70	}
71	// will return 0 when the code point was too large.
72	return `0`; // bad r
73	}
74
75	#if SIMDJSON_IS_32BITS // _umul128 for x86, arm
76	// this is a slow emulation routine for 32-bit
77	//
78	static simdjson_inline uint64_t __emulu(uint32_t x, uint32_t y) {
79	return x * (uint64_t)y;
80	}
81	static simdjson_inline uint64_t _umul128(uint64_t ab, uint64_t cd, uint64_t *hi) {
82	uint64_t ad = __emulu((uint32_t)(ab >> `32`), (uint32_t)cd);
83	uint64_t bd = __emulu((uint32_t)ab, (uint32_t)cd);
84	uint64_t adbc = ad + __emulu((uint32_t)ab, (uint32_t)(cd >> `32`));
85	uint64_t adbc_carry = !!(adbc < ad);
86	uint64_t lo = bd + (adbc << `32`);
87	*hi = __emulu((uint32_t)(ab >> `32`), (uint32_t)(cd >> `32`)) + (adbc >> `32`) +
88	(adbc_carry << `32`) + !!(lo < bd);
89	return lo;
90	}
91	#endif
92
93	using internal::value128;
94
95	simdjson_inline value128 full_multiplication(uint64_t value1, uint64_t value2) {
96	value128 answer;
97	#if SIMDJSON_REGULAR_VISUAL_STUDIO \|\| SIMDJSON_IS_32BITS
98	#ifdef _M_ARM64
99	// ARM64 has native support for 64-bit multiplications, no need to emultate
100	answer.high = __umulh(value1, value2);
101	answer.low = value1 * value2;
102	#else
103	answer.low = _umul128(value1, value2, &answer.high); // _umul128 not available on ARM64
104	#endif // _M_ARM64
105	#else // SIMDJSON_REGULAR_VISUAL_STUDIO \|\| SIMDJSON_IS_32BITS
106	__uint128_t r = (static_cast<__uint128_t>(value1)) * value2;
107	answer.low = uint64_t(r);
108	answer.high = uint64_t(r >> `64`);
109	#endif
110	return answer;
111	}
112
113	} // namespace jsoncharutils
114	} // unnamed namespace
115	} // namespace SIMDJSON_IMPLEMENTATION
116	} // namespace simdjson
117

Browse the source code of Velox/build/_deps/simdjson-src/include/simdjson/generic/jsoncharutils.h