1 | /* |
2 | * Copyright 2018 Google Inc. |
3 | * |
4 | * Use of this source code is governed by a BSD-style license that can be |
5 | * found in the LICENSE file. |
6 | */ |
7 | |
8 | #include "src/utils/SkJSON.h" |
9 | |
10 | #include "include/core/SkStream.h" |
11 | #include "include/core/SkString.h" |
12 | #include "include/private/SkMalloc.h" |
13 | #include "include/utils/SkParse.h" |
14 | #include "src/utils/SkUTF.h" |
15 | |
16 | #include <cmath> |
17 | #include <tuple> |
18 | #include <vector> |
19 | |
20 | namespace skjson { |
21 | |
22 | // #define SK_JSON_REPORT_ERRORS |
23 | |
24 | static_assert( sizeof(Value) == 8, "" ); |
25 | static_assert(alignof(Value) == 8, "" ); |
26 | |
27 | static constexpr size_t kRecAlign = alignof(Value); |
28 | |
29 | void Value::init_tagged(Tag t) { |
30 | memset(fData8, 0, sizeof(fData8)); |
31 | fData8[0] = SkTo<uint8_t>(t); |
32 | SkASSERT(this->getTag() == t); |
33 | } |
34 | |
35 | // Pointer values store a type (in the lower kTagBits bits) and a pointer. |
36 | void Value::init_tagged_pointer(Tag t, void* p) { |
37 | if (sizeof(Value) == sizeof(uintptr_t)) { |
38 | *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p); |
39 | // For 64-bit, we rely on the pointer lower bits being zero. |
40 | SkASSERT(!(fData8[0] & kTagMask)); |
41 | fData8[0] |= SkTo<uint8_t>(t); |
42 | } else { |
43 | // For 32-bit, we store the pointer in the upper word |
44 | SkASSERT(sizeof(Value) == sizeof(uintptr_t) * 2); |
45 | this->init_tagged(t); |
46 | *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p); |
47 | } |
48 | |
49 | SkASSERT(this->getTag() == t); |
50 | SkASSERT(this->ptr<void>() == p); |
51 | } |
52 | |
53 | NullValue::NullValue() { |
54 | this->init_tagged(Tag::kNull); |
55 | SkASSERT(this->getTag() == Tag::kNull); |
56 | } |
57 | |
58 | BoolValue::BoolValue(bool b) { |
59 | this->init_tagged(Tag::kBool); |
60 | *this->cast<bool>() = b; |
61 | SkASSERT(this->getTag() == Tag::kBool); |
62 | } |
63 | |
64 | NumberValue::NumberValue(int32_t i) { |
65 | this->init_tagged(Tag::kInt); |
66 | *this->cast<int32_t>() = i; |
67 | SkASSERT(this->getTag() == Tag::kInt); |
68 | } |
69 | |
70 | NumberValue::NumberValue(float f) { |
71 | this->init_tagged(Tag::kFloat); |
72 | *this->cast<float>() = f; |
73 | SkASSERT(this->getTag() == Tag::kFloat); |
74 | } |
75 | |
76 | // Vector recs point to externally allocated slabs with the following layout: |
77 | // |
78 | // [size_t n] [REC_0] ... [REC_n-1] [optional extra trailing storage] |
79 | // |
80 | // Long strings use extra_alloc_size == 1 to store the \0 terminator. |
81 | // |
82 | template <typename T, size_t extra_alloc_size = 0> |
83 | static void* MakeVector(const void* src, size_t size, SkArenaAlloc& alloc) { |
84 | // The Ts are already in memory, so their size should be safe. |
85 | const auto total_size = sizeof(size_t) + size * sizeof(T) + extra_alloc_size; |
86 | auto* size_ptr = reinterpret_cast<size_t*>(alloc.makeBytesAlignedTo(total_size, kRecAlign)); |
87 | |
88 | *size_ptr = size; |
89 | sk_careful_memcpy(size_ptr + 1, src, size * sizeof(T)); |
90 | |
91 | return size_ptr; |
92 | } |
93 | |
94 | ArrayValue::ArrayValue(const Value* src, size_t size, SkArenaAlloc& alloc) { |
95 | this->init_tagged_pointer(Tag::kArray, MakeVector<Value>(src, size, alloc)); |
96 | SkASSERT(this->getTag() == Tag::kArray); |
97 | } |
98 | |
99 | // Strings have two flavors: |
100 | // |
101 | // -- short strings (len <= 7) -> these are stored inline, in the record |
102 | // (one byte reserved for null terminator/type): |
103 | // |
104 | // [str] [\0]|[max_len - actual_len] |
105 | // |
106 | // Storing [max_len - actual_len] allows the 'len' field to double-up as a |
107 | // null terminator when size == max_len (this works 'cause kShortString == 0). |
108 | // |
109 | // -- long strings (len > 7) -> these are externally allocated vectors (VectorRec<char>). |
110 | // |
111 | // The string data plus a null-char terminator are copied over. |
112 | // |
113 | namespace { |
114 | |
115 | // An internal string builder with a fast 8 byte short string load path |
116 | // (for the common case where the string is not at the end of the stream). |
117 | class FastString final : public Value { |
118 | public: |
119 | FastString(const char* src, size_t size, const char* eos, SkArenaAlloc& alloc) { |
120 | SkASSERT(src <= eos); |
121 | |
122 | if (size > kMaxInlineStringSize) { |
123 | this->initLongString(src, size, alloc); |
124 | SkASSERT(this->getTag() == Tag::kString); |
125 | return; |
126 | } |
127 | |
128 | // initFastShortString is faster (doh), but requires access to 6 chars past src. |
129 | if (src && src + 6 <= eos) { |
130 | this->initFastShortString(src, size); |
131 | } else { |
132 | this->initShortString(src, size); |
133 | } |
134 | |
135 | SkASSERT(this->getTag() == Tag::kShortString); |
136 | } |
137 | |
138 | private: |
139 | // first byte reserved for tagging, \0 terminator => 6 usable chars |
140 | static constexpr size_t kMaxInlineStringSize = sizeof(Value) - 2; |
141 | |
142 | void initLongString(const char* src, size_t size, SkArenaAlloc& alloc) { |
143 | SkASSERT(size > kMaxInlineStringSize); |
144 | |
145 | this->init_tagged_pointer(Tag::kString, MakeVector<char, 1>(src, size, alloc)); |
146 | |
147 | auto* data = this->cast<VectorValue<char, Value::Type::kString>>()->begin(); |
148 | const_cast<char*>(data)[size] = '\0'; |
149 | } |
150 | |
151 | void initShortString(const char* src, size_t size) { |
152 | SkASSERT(size <= kMaxInlineStringSize); |
153 | |
154 | this->init_tagged(Tag::kShortString); |
155 | sk_careful_memcpy(this->cast<char>(), src, size); |
156 | // Null terminator provided by init_tagged() above (fData8 is zero-initialized). |
157 | } |
158 | |
159 | void initFastShortString(const char* src, size_t size) { |
160 | SkASSERT(size <= kMaxInlineStringSize); |
161 | |
162 | uint64_t* s64 = this->cast<uint64_t>(); |
163 | |
164 | // Load 8 chars and mask out the tag and \0 terminator. |
165 | // Note: we picked kShortString == 0 to avoid setting explicitly below. |
166 | static_assert(SkToU8(Tag::kShortString) == 0, "please don't break this" ); |
167 | |
168 | // Since the first byte is occupied by the tag, we want the string chars [0..5] to land |
169 | // on bytes [1..6] => the fastest way is to read8 @(src - 1) (always safe, because the |
170 | // string requires a " prefix at the very least). |
171 | memcpy(s64, src - 1, 8); |
172 | |
173 | #if defined(SK_CPU_LENDIAN) |
174 | // The mask for a max-length string (6), with a leading tag and trailing \0 is |
175 | // 0x00ffffffffffff00. Accounting for the final left-shift, this becomes |
176 | // 0x0000ffffffffffff. |
177 | *s64 &= (0x0000ffffffffffffULL >> ((kMaxInlineStringSize - size) * 8)) // trailing \0s |
178 | << 8; // tag byte |
179 | #else |
180 | static_assert(false, "Big-endian builds are not supported at this time." ); |
181 | #endif |
182 | } |
183 | }; |
184 | |
185 | } // namespace |
186 | |
187 | StringValue::StringValue(const char* src, size_t size, SkArenaAlloc& alloc) { |
188 | new (this) FastString(src, size, src, alloc); |
189 | } |
190 | |
191 | ObjectValue::ObjectValue(const Member* src, size_t size, SkArenaAlloc& alloc) { |
192 | this->init_tagged_pointer(Tag::kObject, MakeVector<Member>(src, size, alloc)); |
193 | SkASSERT(this->getTag() == Tag::kObject); |
194 | } |
195 | |
196 | |
197 | // Boring public Value glue. |
198 | |
199 | static int inline_strcmp(const char a[], const char b[]) { |
200 | for (;;) { |
201 | char c = *a++; |
202 | if (c == 0) { |
203 | break; |
204 | } |
205 | if (c != *b++) { |
206 | return 1; |
207 | } |
208 | } |
209 | return *b != 0; |
210 | } |
211 | |
212 | const Value& ObjectValue::operator[](const char* key) const { |
213 | // Reverse search for duplicates resolution (policy: return last). |
214 | const auto* begin = this->begin(); |
215 | const auto* member = this->end(); |
216 | |
217 | while (member > begin) { |
218 | --member; |
219 | if (0 == inline_strcmp(key, member->fKey.as<StringValue>().begin())) { |
220 | return member->fValue; |
221 | } |
222 | } |
223 | |
224 | static const Value g_null = NullValue(); |
225 | return g_null; |
226 | } |
227 | |
228 | namespace { |
229 | |
230 | // Lexer/parser inspired by rapidjson [1], sajson [2] and pjson [3]. |
231 | // |
232 | // [1] https://github.com/Tencent/rapidjson/ |
233 | // [2] https://github.com/chadaustin/sajson |
234 | // [3] https://pastebin.com/hnhSTL3h |
235 | |
236 | |
237 | // bit 0 (0x01) - plain ASCII string character |
238 | // bit 1 (0x02) - whitespace |
239 | // bit 2 (0x04) - string terminator (" \\ \0 [control chars] **AND } ]** <- see matchString notes) |
240 | // bit 3 (0x08) - 0-9 |
241 | // bit 4 (0x10) - 0-9 e E . |
242 | // bit 5 (0x20) - scope terminator (} ]) |
243 | static constexpr uint8_t g_token_flags[256] = { |
244 | // 0 1 2 3 4 5 6 7 8 9 A B C D E F |
245 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 4, 4, 6, 4, 4, // 0 |
246 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 1 |
247 | 3, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0x11,1, // 2 |
248 | 0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19, 0x19,0x19, 1, 1, 1, 1, 1, 1, // 3 |
249 | 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 |
250 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,0x25, 1, 1, // 5 |
251 | 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 |
252 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,0x25, 1, 1, // 7 |
253 | |
254 | // 128-255 |
255 | 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, |
256 | 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, |
257 | 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, |
258 | 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0 |
259 | }; |
260 | |
261 | static inline bool is_ws(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x02; } |
262 | static inline bool is_eostring(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x04; } |
263 | static inline bool is_digit(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x08; } |
264 | static inline bool is_numeric(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x10; } |
265 | static inline bool is_eoscope(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x20; } |
266 | |
267 | static inline const char* skip_ws(const char* p) { |
268 | while (is_ws(*p)) ++p; |
269 | return p; |
270 | } |
271 | |
272 | static inline float pow10(int32_t exp) { |
273 | static constexpr float g_pow10_table[63] = |
274 | { |
275 | 1.e-031f, 1.e-030f, 1.e-029f, 1.e-028f, 1.e-027f, 1.e-026f, 1.e-025f, 1.e-024f, |
276 | 1.e-023f, 1.e-022f, 1.e-021f, 1.e-020f, 1.e-019f, 1.e-018f, 1.e-017f, 1.e-016f, |
277 | 1.e-015f, 1.e-014f, 1.e-013f, 1.e-012f, 1.e-011f, 1.e-010f, 1.e-009f, 1.e-008f, |
278 | 1.e-007f, 1.e-006f, 1.e-005f, 1.e-004f, 1.e-003f, 1.e-002f, 1.e-001f, 1.e+000f, |
279 | 1.e+001f, 1.e+002f, 1.e+003f, 1.e+004f, 1.e+005f, 1.e+006f, 1.e+007f, 1.e+008f, |
280 | 1.e+009f, 1.e+010f, 1.e+011f, 1.e+012f, 1.e+013f, 1.e+014f, 1.e+015f, 1.e+016f, |
281 | 1.e+017f, 1.e+018f, 1.e+019f, 1.e+020f, 1.e+021f, 1.e+022f, 1.e+023f, 1.e+024f, |
282 | 1.e+025f, 1.e+026f, 1.e+027f, 1.e+028f, 1.e+029f, 1.e+030f, 1.e+031f |
283 | }; |
284 | |
285 | static constexpr int32_t k_exp_offset = SK_ARRAY_COUNT(g_pow10_table) / 2; |
286 | |
287 | // We only support negative exponents for now. |
288 | SkASSERT(exp <= 0); |
289 | |
290 | return (exp >= -k_exp_offset) ? g_pow10_table[exp + k_exp_offset] |
291 | : std::pow(10.0f, static_cast<float>(exp)); |
292 | } |
293 | |
294 | class DOMParser { |
295 | public: |
296 | explicit DOMParser(SkArenaAlloc& alloc) |
297 | : fAlloc(alloc) { |
298 | fValueStack.reserve(kValueStackReserve); |
299 | fUnescapeBuffer.reserve(kUnescapeBufferReserve); |
300 | } |
301 | |
302 | Value parse(const char* p, size_t size) { |
303 | if (!size) { |
304 | return this->error(NullValue(), p, "invalid empty input" ); |
305 | } |
306 | |
307 | const char* p_stop = p + size - 1; |
308 | |
309 | // We're only checking for end-of-stream on object/array close('}',']'), |
310 | // so we must trim any whitespace from the buffer tail. |
311 | while (p_stop > p && is_ws(*p_stop)) --p_stop; |
312 | |
313 | SkASSERT(p_stop >= p && p_stop < p + size); |
314 | if (!is_eoscope(*p_stop)) { |
315 | return this->error(NullValue(), p_stop, "invalid top-level value" ); |
316 | } |
317 | |
318 | p = skip_ws(p); |
319 | |
320 | switch (*p) { |
321 | case '{': |
322 | goto match_object; |
323 | case '[': |
324 | goto match_array; |
325 | default: |
326 | return this->error(NullValue(), p, "invalid top-level value" ); |
327 | } |
328 | |
329 | match_object: |
330 | SkASSERT(*p == '{'); |
331 | p = skip_ws(p + 1); |
332 | |
333 | this->pushObjectScope(); |
334 | |
335 | if (*p == '}') goto pop_object; |
336 | |
337 | // goto match_object_key; |
338 | match_object_key: |
339 | p = skip_ws(p); |
340 | if (*p != '"') return this->error(NullValue(), p, "expected object key" ); |
341 | |
342 | p = this->matchString(p, p_stop, [this](const char* key, size_t size, const char* eos) { |
343 | this->pushObjectKey(key, size, eos); |
344 | }); |
345 | if (!p) return NullValue(); |
346 | |
347 | p = skip_ws(p); |
348 | if (*p != ':') return this->error(NullValue(), p, "expected ':' separator" ); |
349 | |
350 | ++p; |
351 | |
352 | // goto match_value; |
353 | match_value: |
354 | p = skip_ws(p); |
355 | |
356 | switch (*p) { |
357 | case '\0': |
358 | return this->error(NullValue(), p, "unexpected input end" ); |
359 | case '"': |
360 | p = this->matchString(p, p_stop, [this](const char* str, size_t size, const char* eos) { |
361 | this->pushString(str, size, eos); |
362 | }); |
363 | break; |
364 | case '[': |
365 | goto match_array; |
366 | case 'f': |
367 | p = this->matchFalse(p); |
368 | break; |
369 | case 'n': |
370 | p = this->matchNull(p); |
371 | break; |
372 | case 't': |
373 | p = this->matchTrue(p); |
374 | break; |
375 | case '{': |
376 | goto match_object; |
377 | default: |
378 | p = this->matchNumber(p); |
379 | break; |
380 | } |
381 | |
382 | if (!p) return NullValue(); |
383 | |
384 | // goto match_post_value; |
385 | match_post_value: |
386 | SkASSERT(!this->inTopLevelScope()); |
387 | |
388 | p = skip_ws(p); |
389 | switch (*p) { |
390 | case ',': |
391 | ++p; |
392 | if (this->inObjectScope()) { |
393 | goto match_object_key; |
394 | } else { |
395 | SkASSERT(this->inArrayScope()); |
396 | goto match_value; |
397 | } |
398 | case ']': |
399 | goto pop_array; |
400 | case '}': |
401 | goto pop_object; |
402 | default: |
403 | return this->error(NullValue(), p - 1, "unexpected value-trailing token" ); |
404 | } |
405 | |
406 | // unreachable |
407 | SkASSERT(false); |
408 | |
409 | pop_object: |
410 | SkASSERT(*p == '}'); |
411 | |
412 | if (this->inArrayScope()) { |
413 | return this->error(NullValue(), p, "unexpected object terminator" ); |
414 | } |
415 | |
416 | this->popObjectScope(); |
417 | |
418 | // goto pop_common |
419 | pop_common: |
420 | SkASSERT(is_eoscope(*p)); |
421 | |
422 | if (this->inTopLevelScope()) { |
423 | SkASSERT(fValueStack.size() == 1); |
424 | |
425 | // Success condition: parsed the top level element and reached the stop token. |
426 | return p == p_stop |
427 | ? fValueStack.front() |
428 | : this->error(NullValue(), p + 1, "trailing root garbage" ); |
429 | } |
430 | |
431 | if (p == p_stop) { |
432 | return this->error(NullValue(), p, "unexpected end-of-input" ); |
433 | } |
434 | |
435 | ++p; |
436 | |
437 | goto match_post_value; |
438 | |
439 | match_array: |
440 | SkASSERT(*p == '['); |
441 | p = skip_ws(p + 1); |
442 | |
443 | this->pushArrayScope(); |
444 | |
445 | if (*p != ']') goto match_value; |
446 | |
447 | // goto pop_array; |
448 | pop_array: |
449 | SkASSERT(*p == ']'); |
450 | |
451 | if (this->inObjectScope()) { |
452 | return this->error(NullValue(), p, "unexpected array terminator" ); |
453 | } |
454 | |
455 | this->popArrayScope(); |
456 | |
457 | goto pop_common; |
458 | |
459 | SkASSERT(false); |
460 | return NullValue(); |
461 | } |
462 | |
463 | std::tuple<const char*, const SkString> getError() const { |
464 | return std::make_tuple(fErrorToken, fErrorMessage); |
465 | } |
466 | |
467 | private: |
468 | SkArenaAlloc& fAlloc; |
469 | |
470 | // Pending values stack. |
471 | static constexpr size_t kValueStackReserve = 256; |
472 | std::vector<Value> fValueStack; |
473 | |
474 | // String unescape buffer. |
475 | static constexpr size_t kUnescapeBufferReserve = 512; |
476 | std::vector<char> fUnescapeBuffer; |
477 | |
478 | // Tracks the current object/array scope, as an index into fStack: |
479 | // |
480 | // - for objects: fScopeIndex = (index of first value in scope) |
481 | // - for arrays : fScopeIndex = -(index of first value in scope) |
482 | // |
483 | // fScopeIndex == 0 IFF we are at the top level (no current/active scope). |
484 | intptr_t fScopeIndex = 0; |
485 | |
486 | // Error reporting. |
487 | const char* fErrorToken = nullptr; |
488 | SkString fErrorMessage; |
489 | |
490 | bool inTopLevelScope() const { return fScopeIndex == 0; } |
491 | bool inObjectScope() const { return fScopeIndex > 0; } |
492 | bool inArrayScope() const { return fScopeIndex < 0; } |
493 | |
494 | // Helper for masquerading raw primitive types as Values (bypassing tagging, etc). |
495 | template <typename T> |
496 | class RawValue final : public Value { |
497 | public: |
498 | explicit RawValue(T v) { |
499 | static_assert(sizeof(T) <= sizeof(Value), "" ); |
500 | *this->cast<T>() = v; |
501 | } |
502 | |
503 | T operator *() const { return *this->cast<T>(); } |
504 | }; |
505 | |
506 | template <typename VectorT> |
507 | void popScopeAsVec(size_t scope_start) { |
508 | SkASSERT(scope_start > 0); |
509 | SkASSERT(scope_start <= fValueStack.size()); |
510 | |
511 | using T = typename VectorT::ValueT; |
512 | static_assert( sizeof(T) >= sizeof(Value), "" ); |
513 | static_assert( sizeof(T) % sizeof(Value) == 0, "" ); |
514 | static_assert(alignof(T) == alignof(Value), "" ); |
515 | |
516 | const auto scope_count = fValueStack.size() - scope_start, |
517 | count = scope_count / (sizeof(T) / sizeof(Value)); |
518 | SkASSERT(scope_count % (sizeof(T) / sizeof(Value)) == 0); |
519 | |
520 | const auto* begin = reinterpret_cast<const T*>(fValueStack.data() + scope_start); |
521 | |
522 | // Restore the previous scope index from saved placeholder value, |
523 | // and instantiate as a vector of values in scope. |
524 | auto& placeholder = fValueStack[scope_start - 1]; |
525 | fScopeIndex = *static_cast<RawValue<intptr_t>&>(placeholder); |
526 | placeholder = VectorT(begin, count, fAlloc); |
527 | |
528 | // Drop the (consumed) values in scope. |
529 | fValueStack.resize(scope_start); |
530 | } |
531 | |
532 | void pushObjectScope() { |
533 | // Save a scope index now, and then later we'll overwrite this value as the Object itself. |
534 | fValueStack.push_back(RawValue<intptr_t>(fScopeIndex)); |
535 | |
536 | // New object scope. |
537 | fScopeIndex = SkTo<intptr_t>(fValueStack.size()); |
538 | } |
539 | |
540 | void popObjectScope() { |
541 | SkASSERT(this->inObjectScope()); |
542 | this->popScopeAsVec<ObjectValue>(SkTo<size_t>(fScopeIndex)); |
543 | |
544 | SkDEBUGCODE( |
545 | const auto& obj = fValueStack.back().as<ObjectValue>(); |
546 | SkASSERT(obj.is<ObjectValue>()); |
547 | for (const auto& member : obj) { |
548 | SkASSERT(member.fKey.is<StringValue>()); |
549 | } |
550 | ) |
551 | } |
552 | |
553 | void pushArrayScope() { |
554 | // Save a scope index now, and then later we'll overwrite this value as the Array itself. |
555 | fValueStack.push_back(RawValue<intptr_t>(fScopeIndex)); |
556 | |
557 | // New array scope. |
558 | fScopeIndex = -SkTo<intptr_t>(fValueStack.size()); |
559 | } |
560 | |
561 | void popArrayScope() { |
562 | SkASSERT(this->inArrayScope()); |
563 | this->popScopeAsVec<ArrayValue>(SkTo<size_t>(-fScopeIndex)); |
564 | |
565 | SkDEBUGCODE( |
566 | const auto& arr = fValueStack.back().as<ArrayValue>(); |
567 | SkASSERT(arr.is<ArrayValue>()); |
568 | ) |
569 | } |
570 | |
571 | void pushObjectKey(const char* key, size_t size, const char* eos) { |
572 | SkASSERT(this->inObjectScope()); |
573 | SkASSERT(fValueStack.size() >= SkTo<size_t>(fScopeIndex)); |
574 | SkASSERT(!((fValueStack.size() - SkTo<size_t>(fScopeIndex)) & 1)); |
575 | this->pushString(key, size, eos); |
576 | } |
577 | |
578 | void pushTrue() { |
579 | fValueStack.push_back(BoolValue(true)); |
580 | } |
581 | |
582 | void pushFalse() { |
583 | fValueStack.push_back(BoolValue(false)); |
584 | } |
585 | |
586 | void pushNull() { |
587 | fValueStack.push_back(NullValue()); |
588 | } |
589 | |
590 | void pushString(const char* s, size_t size, const char* eos) { |
591 | fValueStack.push_back(FastString(s, size, eos, fAlloc)); |
592 | } |
593 | |
594 | void pushInt32(int32_t i) { |
595 | fValueStack.push_back(NumberValue(i)); |
596 | } |
597 | |
598 | void pushFloat(float f) { |
599 | fValueStack.push_back(NumberValue(f)); |
600 | } |
601 | |
602 | template <typename T> |
603 | T error(T&& ret_val, const char* p, const char* msg) { |
604 | #if defined(SK_JSON_REPORT_ERRORS) |
605 | fErrorToken = p; |
606 | fErrorMessage.set(msg); |
607 | #endif |
608 | return ret_val; |
609 | } |
610 | |
611 | const char* matchTrue(const char* p) { |
612 | SkASSERT(p[0] == 't'); |
613 | |
614 | if (p[1] == 'r' && p[2] == 'u' && p[3] == 'e') { |
615 | this->pushTrue(); |
616 | return p + 4; |
617 | } |
618 | |
619 | return this->error(nullptr, p, "invalid token" ); |
620 | } |
621 | |
622 | const char* matchFalse(const char* p) { |
623 | SkASSERT(p[0] == 'f'); |
624 | |
625 | if (p[1] == 'a' && p[2] == 'l' && p[3] == 's' && p[4] == 'e') { |
626 | this->pushFalse(); |
627 | return p + 5; |
628 | } |
629 | |
630 | return this->error(nullptr, p, "invalid token" ); |
631 | } |
632 | |
633 | const char* matchNull(const char* p) { |
634 | SkASSERT(p[0] == 'n'); |
635 | |
636 | if (p[1] == 'u' && p[2] == 'l' && p[3] == 'l') { |
637 | this->pushNull(); |
638 | return p + 4; |
639 | } |
640 | |
641 | return this->error(nullptr, p, "invalid token" ); |
642 | } |
643 | |
644 | const std::vector<char>* unescapeString(const char* begin, const char* end) { |
645 | fUnescapeBuffer.clear(); |
646 | |
647 | for (const auto* p = begin; p != end; ++p) { |
648 | if (*p != '\\') { |
649 | fUnescapeBuffer.push_back(*p); |
650 | continue; |
651 | } |
652 | |
653 | if (++p == end) { |
654 | return nullptr; |
655 | } |
656 | |
657 | switch (*p) { |
658 | case '"': fUnescapeBuffer.push_back( '"'); break; |
659 | case '\\': fUnescapeBuffer.push_back('\\'); break; |
660 | case '/': fUnescapeBuffer.push_back( '/'); break; |
661 | case 'b': fUnescapeBuffer.push_back('\b'); break; |
662 | case 'f': fUnescapeBuffer.push_back('\f'); break; |
663 | case 'n': fUnescapeBuffer.push_back('\n'); break; |
664 | case 'r': fUnescapeBuffer.push_back('\r'); break; |
665 | case 't': fUnescapeBuffer.push_back('\t'); break; |
666 | case 'u': { |
667 | if (p + 4 >= end) { |
668 | return nullptr; |
669 | } |
670 | |
671 | uint32_t hexed; |
672 | const char hex_str[] = {p[1], p[2], p[3], p[4], '\0'}; |
673 | const auto* eos = SkParse::FindHex(hex_str, &hexed); |
674 | if (!eos || *eos) { |
675 | return nullptr; |
676 | } |
677 | |
678 | char utf8[SkUTF::kMaxBytesInUTF8Sequence]; |
679 | const auto utf8_len = SkUTF::ToUTF8(SkTo<SkUnichar>(hexed), utf8); |
680 | fUnescapeBuffer.insert(fUnescapeBuffer.end(), utf8, utf8 + utf8_len); |
681 | p += 4; |
682 | } break; |
683 | default: return nullptr; |
684 | } |
685 | } |
686 | |
687 | return &fUnescapeBuffer; |
688 | } |
689 | |
690 | template <typename MatchFunc> |
691 | const char* matchString(const char* p, const char* p_stop, MatchFunc&& func) { |
692 | SkASSERT(*p == '"'); |
693 | const auto* s_begin = p + 1; |
694 | bool requires_unescape = false; |
695 | |
696 | do { |
697 | // Consume string chars. |
698 | // This is the fast path, and hopefully we only hit it once then quick-exit below. |
699 | for (p = p + 1; !is_eostring(*p); ++p); |
700 | |
701 | if (*p == '"') { |
702 | // Valid string found. |
703 | if (!requires_unescape) { |
704 | func(s_begin, p - s_begin, p_stop); |
705 | } else { |
706 | // Slow unescape. We could avoid this extra copy with some effort, |
707 | // but in practice escaped strings should be rare. |
708 | const auto* buf = this->unescapeString(s_begin, p); |
709 | if (!buf) { |
710 | break; |
711 | } |
712 | |
713 | SkASSERT(!buf->empty()); |
714 | func(buf->data(), buf->size(), buf->data() + buf->size() - 1); |
715 | } |
716 | return p + 1; |
717 | } |
718 | |
719 | if (*p == '\\') { |
720 | requires_unescape = true; |
721 | ++p; |
722 | continue; |
723 | } |
724 | |
725 | // End-of-scope chars are special: we use them to tag the end of the input. |
726 | // Thus they cannot be consumed indiscriminately -- we need to check if we hit the |
727 | // end of the input. To that effect, we treat them as string terminators above, |
728 | // then we catch them here. |
729 | if (is_eoscope(*p)) { |
730 | continue; |
731 | } |
732 | |
733 | // Invalid/unexpected char. |
734 | break; |
735 | } while (p != p_stop); |
736 | |
737 | // Premature end-of-input, or illegal string char. |
738 | return this->error(nullptr, s_begin - 1, "invalid string" ); |
739 | } |
740 | |
741 | const char* matchFastFloatDecimalPart(const char* p, int sign, float f, int exp) { |
742 | SkASSERT(exp <= 0); |
743 | |
744 | for (;;) { |
745 | if (!is_digit(*p)) break; |
746 | f = f * 10.f + (*p++ - '0'); --exp; |
747 | if (!is_digit(*p)) break; |
748 | f = f * 10.f + (*p++ - '0'); --exp; |
749 | } |
750 | |
751 | const auto decimal_scale = pow10(exp); |
752 | if (is_numeric(*p) || !decimal_scale) { |
753 | SkASSERT((*p == '.' || *p == 'e' || *p == 'E') || !decimal_scale); |
754 | // Malformed input, or an (unsupported) exponent, or a collapsed decimal factor. |
755 | return nullptr; |
756 | } |
757 | |
758 | this->pushFloat(sign * f * decimal_scale); |
759 | |
760 | return p; |
761 | } |
762 | |
763 | const char* matchFastFloatPart(const char* p, int sign, float f) { |
764 | for (;;) { |
765 | if (!is_digit(*p)) break; |
766 | f = f * 10.f + (*p++ - '0'); |
767 | if (!is_digit(*p)) break; |
768 | f = f * 10.f + (*p++ - '0'); |
769 | } |
770 | |
771 | if (!is_numeric(*p)) { |
772 | // Matched (integral) float. |
773 | this->pushFloat(sign * f); |
774 | return p; |
775 | } |
776 | |
777 | return (*p == '.') ? this->matchFastFloatDecimalPart(p + 1, sign, f, 0) |
778 | : nullptr; |
779 | } |
780 | |
781 | const char* matchFast32OrFloat(const char* p) { |
782 | int sign = 1; |
783 | if (*p == '-') { |
784 | sign = -1; |
785 | ++p; |
786 | } |
787 | |
788 | const auto* digits_start = p; |
789 | |
790 | int32_t n32 = 0; |
791 | |
792 | // This is the largest absolute int32 value we can handle before |
793 | // risking overflow *on the next digit* (214748363). |
794 | static constexpr int32_t kMaxInt32 = (std::numeric_limits<int32_t>::max() - 9) / 10; |
795 | |
796 | if (is_digit(*p)) { |
797 | n32 = (*p++ - '0'); |
798 | for (;;) { |
799 | if (!is_digit(*p) || n32 > kMaxInt32) break; |
800 | n32 = n32 * 10 + (*p++ - '0'); |
801 | } |
802 | } |
803 | |
804 | if (!is_numeric(*p)) { |
805 | // Did we actually match any digits? |
806 | if (p > digits_start) { |
807 | this->pushInt32(sign * n32); |
808 | return p; |
809 | } |
810 | return nullptr; |
811 | } |
812 | |
813 | if (*p == '.') { |
814 | const auto* decimals_start = ++p; |
815 | |
816 | int exp = 0; |
817 | |
818 | for (;;) { |
819 | if (!is_digit(*p) || n32 > kMaxInt32) break; |
820 | n32 = n32 * 10 + (*p++ - '0'); --exp; |
821 | if (!is_digit(*p) || n32 > kMaxInt32) break; |
822 | n32 = n32 * 10 + (*p++ - '0'); --exp; |
823 | } |
824 | |
825 | if (!is_numeric(*p)) { |
826 | // Did we actually match any digits? |
827 | if (p > decimals_start) { |
828 | this->pushFloat(sign * n32 * pow10(exp)); |
829 | return p; |
830 | } |
831 | return nullptr; |
832 | } |
833 | |
834 | if (n32 > kMaxInt32) { |
835 | // we ran out on n32 bits |
836 | return this->matchFastFloatDecimalPart(p, sign, n32, exp); |
837 | } |
838 | } |
839 | |
840 | return this->matchFastFloatPart(p, sign, n32); |
841 | } |
842 | |
843 | const char* matchNumber(const char* p) { |
844 | if (const auto* fast = this->matchFast32OrFloat(p)) return fast; |
845 | |
846 | // slow fallback |
847 | char* matched; |
848 | float f = strtof(p, &matched); |
849 | if (matched > p) { |
850 | this->pushFloat(f); |
851 | return matched; |
852 | } |
853 | return this->error(nullptr, p, "invalid numeric token" ); |
854 | } |
855 | }; |
856 | |
857 | void Write(const Value& v, SkWStream* stream) { |
858 | switch (v.getType()) { |
859 | case Value::Type::kNull: |
860 | stream->writeText("null" ); |
861 | break; |
862 | case Value::Type::kBool: |
863 | stream->writeText(*v.as<BoolValue>() ? "true" : "false" ); |
864 | break; |
865 | case Value::Type::kNumber: |
866 | stream->writeScalarAsText(*v.as<NumberValue>()); |
867 | break; |
868 | case Value::Type::kString: |
869 | stream->writeText("\"" ); |
870 | stream->writeText(v.as<StringValue>().begin()); |
871 | stream->writeText("\"" ); |
872 | break; |
873 | case Value::Type::kArray: { |
874 | const auto& array = v.as<ArrayValue>(); |
875 | stream->writeText("[" ); |
876 | bool first_value = true; |
877 | for (const auto& v : array) { |
878 | if (!first_value) stream->writeText("," ); |
879 | Write(v, stream); |
880 | first_value = false; |
881 | } |
882 | stream->writeText("]" ); |
883 | break; |
884 | } |
885 | case Value::Type::kObject: |
886 | const auto& object = v.as<ObjectValue>(); |
887 | stream->writeText("{" ); |
888 | bool first_member = true; |
889 | for (const auto& member : object) { |
890 | SkASSERT(member.fKey.getType() == Value::Type::kString); |
891 | if (!first_member) stream->writeText("," ); |
892 | Write(member.fKey, stream); |
893 | stream->writeText(":" ); |
894 | Write(member.fValue, stream); |
895 | first_member = false; |
896 | } |
897 | stream->writeText("}" ); |
898 | break; |
899 | } |
900 | } |
901 | |
902 | } // namespace |
903 | |
904 | SkString Value::toString() const { |
905 | SkDynamicMemoryWStream wstream; |
906 | Write(*this, &wstream); |
907 | const auto data = wstream.detachAsData(); |
908 | // TODO: is there a better way to pass data around without copying? |
909 | return SkString(static_cast<const char*>(data->data()), data->size()); |
910 | } |
911 | |
912 | static constexpr size_t kMinChunkSize = 4096; |
913 | |
914 | DOM::DOM(const char* data, size_t size) |
915 | : fAlloc(kMinChunkSize) { |
916 | DOMParser parser(fAlloc); |
917 | |
918 | fRoot = parser.parse(data, size); |
919 | } |
920 | |
921 | void DOM::write(SkWStream* stream) const { |
922 | Write(fRoot, stream); |
923 | } |
924 | |
925 | } // namespace skjson |
926 | |