1 | /* |
2 | * Copyright 2018 Google Inc. |
3 | * |
4 | * Use of this source code is governed by a BSD-style license that can be |
5 | * found in the LICENSE file. |
6 | */ |
7 | |
8 | #include "src/utils/SkJSON.h" |
9 | |
10 | #include "include/core/SkStream.h" |
11 | #include "include/core/SkString.h" |
12 | #include "include/private/SkMalloc.h" |
13 | #include "include/utils/SkParse.h" |
14 | #include "src/utils/SkUTF.h" |
15 | |
16 | #include <cmath> |
17 | #include <tuple> |
18 | #include <vector> |
19 | |
20 | namespace skjson { |
21 | |
22 | // #define SK_JSON_REPORT_ERRORS |
23 | |
24 | static_assert( sizeof(Value) == 8, "" ); |
25 | static_assert(alignof(Value) == 8, "" ); |
26 | |
27 | static constexpr size_t kRecAlign = alignof(Value); |
28 | |
29 | void Value::init_tagged(Tag t) { |
30 | memset(fData8, 0, sizeof(fData8)); |
31 | fData8[Value::kTagOffset] = SkTo<uint8_t>(t); |
32 | SkASSERT(this->getTag() == t); |
33 | } |
34 | |
35 | // Pointer values store a type (in the upper kTagBits bits) and a pointer. |
36 | void Value::init_tagged_pointer(Tag t, void* p) { |
37 | *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p); |
38 | |
39 | if (sizeof(Value) == sizeof(uintptr_t)) { |
40 | // For 64-bit, we rely on the pointer upper bits being unused/zero. |
41 | SkASSERT(!(fData8[kTagOffset] & kTagMask)); |
42 | fData8[kTagOffset] |= SkTo<uint8_t>(t); |
43 | } else { |
44 | // For 32-bit, we need to zero-initialize the upper 32 bits |
45 | SkASSERT(sizeof(Value) == sizeof(uintptr_t) * 2); |
46 | this->cast<uintptr_t>()[kTagOffset >> 2] = 0; |
47 | fData8[kTagOffset] = SkTo<uint8_t>(t); |
48 | } |
49 | |
50 | SkASSERT(this->getTag() == t); |
51 | SkASSERT(this->ptr<void>() == p); |
52 | } |
53 | |
54 | NullValue::NullValue() { |
55 | this->init_tagged(Tag::kNull); |
56 | SkASSERT(this->getTag() == Tag::kNull); |
57 | } |
58 | |
59 | BoolValue::BoolValue(bool b) { |
60 | this->init_tagged(Tag::kBool); |
61 | *this->cast<bool>() = b; |
62 | SkASSERT(this->getTag() == Tag::kBool); |
63 | } |
64 | |
65 | NumberValue::NumberValue(int32_t i) { |
66 | this->init_tagged(Tag::kInt); |
67 | *this->cast<int32_t>() = i; |
68 | SkASSERT(this->getTag() == Tag::kInt); |
69 | } |
70 | |
71 | NumberValue::NumberValue(float f) { |
72 | this->init_tagged(Tag::kFloat); |
73 | *this->cast<float>() = f; |
74 | SkASSERT(this->getTag() == Tag::kFloat); |
75 | } |
76 | |
77 | // Vector recs point to externally allocated slabs with the following layout: |
78 | // |
79 | // [size_t n] [REC_0] ... [REC_n-1] [optional extra trailing storage] |
80 | // |
81 | // Long strings use extra_alloc_size == 1 to store the \0 terminator. |
82 | // |
83 | template <typename T, size_t extra_alloc_size = 0> |
84 | static void* MakeVector(const void* src, size_t size, SkArenaAlloc& alloc) { |
85 | // The Ts are already in memory, so their size should be safe. |
86 | const auto total_size = sizeof(size_t) + size * sizeof(T) + extra_alloc_size; |
87 | auto* size_ptr = reinterpret_cast<size_t*>(alloc.makeBytesAlignedTo(total_size, kRecAlign)); |
88 | |
89 | *size_ptr = size; |
90 | sk_careful_memcpy(size_ptr + 1, src, size * sizeof(T)); |
91 | |
92 | return size_ptr; |
93 | } |
94 | |
95 | ArrayValue::ArrayValue(const Value* src, size_t size, SkArenaAlloc& alloc) { |
96 | this->init_tagged_pointer(Tag::kArray, MakeVector<Value>(src, size, alloc)); |
97 | SkASSERT(this->getTag() == Tag::kArray); |
98 | } |
99 | |
100 | // Strings have two flavors: |
101 | // |
102 | // -- short strings (len <= 7) -> these are stored inline, in the record |
103 | // (one byte reserved for null terminator/type): |
104 | // |
105 | // [str] [\0]|[max_len - actual_len] |
106 | // |
107 | // Storing [max_len - actual_len] allows the 'len' field to double-up as a |
108 | // null terminator when size == max_len (this works 'cause kShortString == 0). |
109 | // |
110 | // -- long strings (len > 7) -> these are externally allocated vectors (VectorRec<char>). |
111 | // |
112 | // The string data plus a null-char terminator are copied over. |
113 | // |
114 | namespace { |
115 | |
116 | // An internal string builder with a fast 8 byte short string load path |
117 | // (for the common case where the string is not at the end of the stream). |
118 | class FastString final : public Value { |
119 | public: |
120 | FastString(const char* src, size_t size, const char* eos, SkArenaAlloc& alloc) { |
121 | SkASSERT(src <= eos); |
122 | |
123 | if (size > kMaxInlineStringSize) { |
124 | this->initLongString(src, size, alloc); |
125 | SkASSERT(this->getTag() == Tag::kString); |
126 | return; |
127 | } |
128 | |
129 | static_assert(static_cast<uint8_t>(Tag::kShortString) == 0, "please don't break this" ); |
130 | static_assert(sizeof(Value) == 8, "" ); |
131 | |
132 | // TODO: LIKELY |
133 | if (src && src + 7 <= eos) { |
134 | this->initFastShortString(src, size); |
135 | } else { |
136 | this->initShortString(src, size); |
137 | } |
138 | |
139 | SkASSERT(this->getTag() == Tag::kShortString); |
140 | } |
141 | |
142 | private: |
143 | static constexpr size_t kMaxInlineStringSize = sizeof(Value) - 1; |
144 | |
145 | void initLongString(const char* src, size_t size, SkArenaAlloc& alloc) { |
146 | SkASSERT(size > kMaxInlineStringSize); |
147 | |
148 | this->init_tagged_pointer(Tag::kString, MakeVector<char, 1>(src, size, alloc)); |
149 | |
150 | auto* data = this->cast<VectorValue<char, Value::Type::kString>>()->begin(); |
151 | const_cast<char*>(data)[size] = '\0'; |
152 | } |
153 | |
154 | void initShortString(const char* src, size_t size) { |
155 | SkASSERT(size <= kMaxInlineStringSize); |
156 | |
157 | this->init_tagged(Tag::kShortString); |
158 | sk_careful_memcpy(this->cast<char>(), src, size); |
159 | // Null terminator provided by init_tagged() above (fData8 is zero-initialized). |
160 | } |
161 | |
162 | void initFastShortString(const char* src, size_t size) { |
163 | SkASSERT(size <= kMaxInlineStringSize); |
164 | |
165 | // Load 8 chars and mask out the tag and \0 terminator. |
166 | uint64_t* s64 = this->cast<uint64_t>(); |
167 | memcpy(s64, src, 8); |
168 | |
169 | #if defined(SK_CPU_LENDIAN) |
170 | *s64 &= 0x00ffffffffffffffULL >> ((kMaxInlineStringSize - size) * 8); |
171 | #else |
172 | static_assert(false, "Big-endian builds are not supported at this time." ); |
173 | #endif |
174 | } |
175 | }; |
176 | |
177 | } // namespace |
178 | |
179 | StringValue::StringValue(const char* src, size_t size, SkArenaAlloc& alloc) { |
180 | new (this) FastString(src, size, src, alloc); |
181 | } |
182 | |
183 | ObjectValue::ObjectValue(const Member* src, size_t size, SkArenaAlloc& alloc) { |
184 | this->init_tagged_pointer(Tag::kObject, MakeVector<Member>(src, size, alloc)); |
185 | SkASSERT(this->getTag() == Tag::kObject); |
186 | } |
187 | |
188 | |
189 | // Boring public Value glue. |
190 | |
191 | static int inline_strcmp(const char a[], const char b[]) { |
192 | for (;;) { |
193 | char c = *a++; |
194 | if (c == 0) { |
195 | break; |
196 | } |
197 | if (c != *b++) { |
198 | return 1; |
199 | } |
200 | } |
201 | return *b != 0; |
202 | } |
203 | |
204 | const Value& ObjectValue::operator[](const char* key) const { |
205 | // Reverse search for duplicates resolution (policy: return last). |
206 | const auto* begin = this->begin(); |
207 | const auto* member = this->end(); |
208 | |
209 | while (member > begin) { |
210 | --member; |
211 | if (0 == inline_strcmp(key, member->fKey.as<StringValue>().begin())) { |
212 | return member->fValue; |
213 | } |
214 | } |
215 | |
216 | static const Value g_null = NullValue(); |
217 | return g_null; |
218 | } |
219 | |
220 | namespace { |
221 | |
222 | // Lexer/parser inspired by rapidjson [1], sajson [2] and pjson [3]. |
223 | // |
224 | // [1] https://github.com/Tencent/rapidjson/ |
225 | // [2] https://github.com/chadaustin/sajson |
226 | // [3] https://pastebin.com/hnhSTL3h |
227 | |
228 | |
229 | // bit 0 (0x01) - plain ASCII string character |
230 | // bit 1 (0x02) - whitespace |
231 | // bit 2 (0x04) - string terminator (" \\ \0 [control chars] **AND } ]** <- see matchString notes) |
232 | // bit 3 (0x08) - 0-9 |
233 | // bit 4 (0x10) - 0-9 e E . |
234 | // bit 5 (0x20) - scope terminator (} ]) |
235 | static constexpr uint8_t g_token_flags[256] = { |
236 | // 0 1 2 3 4 5 6 7 8 9 A B C D E F |
237 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 4, 4, 6, 4, 4, // 0 |
238 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 1 |
239 | 3, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0x11,1, // 2 |
240 | 0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19, 0x19,0x19, 1, 1, 1, 1, 1, 1, // 3 |
241 | 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 |
242 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,0x25, 1, 1, // 5 |
243 | 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 |
244 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,0x25, 1, 1, // 7 |
245 | |
246 | // 128-255 |
247 | 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, |
248 | 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, |
249 | 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, |
250 | 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0 |
251 | }; |
252 | |
253 | static inline bool is_ws(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x02; } |
254 | static inline bool is_eostring(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x04; } |
255 | static inline bool is_digit(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x08; } |
256 | static inline bool is_numeric(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x10; } |
257 | static inline bool is_eoscope(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x20; } |
258 | |
259 | static inline const char* skip_ws(const char* p) { |
260 | while (is_ws(*p)) ++p; |
261 | return p; |
262 | } |
263 | |
264 | static inline float pow10(int32_t exp) { |
265 | static constexpr float g_pow10_table[63] = |
266 | { |
267 | 1.e-031f, 1.e-030f, 1.e-029f, 1.e-028f, 1.e-027f, 1.e-026f, 1.e-025f, 1.e-024f, |
268 | 1.e-023f, 1.e-022f, 1.e-021f, 1.e-020f, 1.e-019f, 1.e-018f, 1.e-017f, 1.e-016f, |
269 | 1.e-015f, 1.e-014f, 1.e-013f, 1.e-012f, 1.e-011f, 1.e-010f, 1.e-009f, 1.e-008f, |
270 | 1.e-007f, 1.e-006f, 1.e-005f, 1.e-004f, 1.e-003f, 1.e-002f, 1.e-001f, 1.e+000f, |
271 | 1.e+001f, 1.e+002f, 1.e+003f, 1.e+004f, 1.e+005f, 1.e+006f, 1.e+007f, 1.e+008f, |
272 | 1.e+009f, 1.e+010f, 1.e+011f, 1.e+012f, 1.e+013f, 1.e+014f, 1.e+015f, 1.e+016f, |
273 | 1.e+017f, 1.e+018f, 1.e+019f, 1.e+020f, 1.e+021f, 1.e+022f, 1.e+023f, 1.e+024f, |
274 | 1.e+025f, 1.e+026f, 1.e+027f, 1.e+028f, 1.e+029f, 1.e+030f, 1.e+031f |
275 | }; |
276 | |
277 | static constexpr int32_t k_exp_offset = SK_ARRAY_COUNT(g_pow10_table) / 2; |
278 | |
279 | // We only support negative exponents for now. |
280 | SkASSERT(exp <= 0); |
281 | |
282 | return (exp >= -k_exp_offset) ? g_pow10_table[exp + k_exp_offset] |
283 | : std::pow(10.0f, static_cast<float>(exp)); |
284 | } |
285 | |
286 | class DOMParser { |
287 | public: |
288 | explicit DOMParser(SkArenaAlloc& alloc) |
289 | : fAlloc(alloc) { |
290 | fValueStack.reserve(kValueStackReserve); |
291 | fUnescapeBuffer.reserve(kUnescapeBufferReserve); |
292 | } |
293 | |
294 | const Value parse(const char* p, size_t size) { |
295 | if (!size) { |
296 | return this->error(NullValue(), p, "invalid empty input" ); |
297 | } |
298 | |
299 | const char* p_stop = p + size - 1; |
300 | |
301 | // We're only checking for end-of-stream on object/array close('}',']'), |
302 | // so we must trim any whitespace from the buffer tail. |
303 | while (p_stop > p && is_ws(*p_stop)) --p_stop; |
304 | |
305 | SkASSERT(p_stop >= p && p_stop < p + size); |
306 | if (!is_eoscope(*p_stop)) { |
307 | return this->error(NullValue(), p_stop, "invalid top-level value" ); |
308 | } |
309 | |
310 | p = skip_ws(p); |
311 | |
312 | switch (*p) { |
313 | case '{': |
314 | goto match_object; |
315 | case '[': |
316 | goto match_array; |
317 | default: |
318 | return this->error(NullValue(), p, "invalid top-level value" ); |
319 | } |
320 | |
321 | match_object: |
322 | SkASSERT(*p == '{'); |
323 | p = skip_ws(p + 1); |
324 | |
325 | this->pushObjectScope(); |
326 | |
327 | if (*p == '}') goto pop_object; |
328 | |
329 | // goto match_object_key; |
330 | match_object_key: |
331 | p = skip_ws(p); |
332 | if (*p != '"') return this->error(NullValue(), p, "expected object key" ); |
333 | |
334 | p = this->matchString(p, p_stop, [this](const char* key, size_t size, const char* eos) { |
335 | this->pushObjectKey(key, size, eos); |
336 | }); |
337 | if (!p) return NullValue(); |
338 | |
339 | p = skip_ws(p); |
340 | if (*p != ':') return this->error(NullValue(), p, "expected ':' separator" ); |
341 | |
342 | ++p; |
343 | |
344 | // goto match_value; |
345 | match_value: |
346 | p = skip_ws(p); |
347 | |
348 | switch (*p) { |
349 | case '\0': |
350 | return this->error(NullValue(), p, "unexpected input end" ); |
351 | case '"': |
352 | p = this->matchString(p, p_stop, [this](const char* str, size_t size, const char* eos) { |
353 | this->pushString(str, size, eos); |
354 | }); |
355 | break; |
356 | case '[': |
357 | goto match_array; |
358 | case 'f': |
359 | p = this->matchFalse(p); |
360 | break; |
361 | case 'n': |
362 | p = this->matchNull(p); |
363 | break; |
364 | case 't': |
365 | p = this->matchTrue(p); |
366 | break; |
367 | case '{': |
368 | goto match_object; |
369 | default: |
370 | p = this->matchNumber(p); |
371 | break; |
372 | } |
373 | |
374 | if (!p) return NullValue(); |
375 | |
376 | // goto match_post_value; |
377 | match_post_value: |
378 | SkASSERT(!this->inTopLevelScope()); |
379 | |
380 | p = skip_ws(p); |
381 | switch (*p) { |
382 | case ',': |
383 | ++p; |
384 | if (this->inObjectScope()) { |
385 | goto match_object_key; |
386 | } else { |
387 | SkASSERT(this->inArrayScope()); |
388 | goto match_value; |
389 | } |
390 | case ']': |
391 | goto pop_array; |
392 | case '}': |
393 | goto pop_object; |
394 | default: |
395 | return this->error(NullValue(), p - 1, "unexpected value-trailing token" ); |
396 | } |
397 | |
398 | // unreachable |
399 | SkASSERT(false); |
400 | |
401 | pop_object: |
402 | SkASSERT(*p == '}'); |
403 | |
404 | if (this->inArrayScope()) { |
405 | return this->error(NullValue(), p, "unexpected object terminator" ); |
406 | } |
407 | |
408 | this->popObjectScope(); |
409 | |
410 | // goto pop_common |
411 | pop_common: |
412 | SkASSERT(is_eoscope(*p)); |
413 | |
414 | if (this->inTopLevelScope()) { |
415 | SkASSERT(fValueStack.size() == 1); |
416 | |
417 | // Success condition: parsed the top level element and reached the stop token. |
418 | return p == p_stop |
419 | ? fValueStack.front() |
420 | : this->error(NullValue(), p + 1, "trailing root garbage" ); |
421 | } |
422 | |
423 | if (p == p_stop) { |
424 | return this->error(NullValue(), p, "unexpected end-of-input" ); |
425 | } |
426 | |
427 | ++p; |
428 | |
429 | goto match_post_value; |
430 | |
431 | match_array: |
432 | SkASSERT(*p == '['); |
433 | p = skip_ws(p + 1); |
434 | |
435 | this->pushArrayScope(); |
436 | |
437 | if (*p != ']') goto match_value; |
438 | |
439 | // goto pop_array; |
440 | pop_array: |
441 | SkASSERT(*p == ']'); |
442 | |
443 | if (this->inObjectScope()) { |
444 | return this->error(NullValue(), p, "unexpected array terminator" ); |
445 | } |
446 | |
447 | this->popArrayScope(); |
448 | |
449 | goto pop_common; |
450 | |
451 | SkASSERT(false); |
452 | return NullValue(); |
453 | } |
454 | |
455 | std::tuple<const char*, const SkString> getError() const { |
456 | return std::make_tuple(fErrorToken, fErrorMessage); |
457 | } |
458 | |
459 | private: |
460 | SkArenaAlloc& fAlloc; |
461 | |
462 | // Pending values stack. |
463 | static constexpr size_t kValueStackReserve = 256; |
464 | std::vector<Value> fValueStack; |
465 | |
466 | // String unescape buffer. |
467 | static constexpr size_t kUnescapeBufferReserve = 512; |
468 | std::vector<char> fUnescapeBuffer; |
469 | |
470 | // Tracks the current object/array scope, as an index into fStack: |
471 | // |
472 | // - for objects: fScopeIndex = (index of first value in scope) |
473 | // - for arrays : fScopeIndex = -(index of first value in scope) |
474 | // |
475 | // fScopeIndex == 0 IFF we are at the top level (no current/active scope). |
476 | intptr_t fScopeIndex = 0; |
477 | |
478 | // Error reporting. |
479 | const char* fErrorToken = nullptr; |
480 | SkString fErrorMessage; |
481 | |
482 | bool inTopLevelScope() const { return fScopeIndex == 0; } |
483 | bool inObjectScope() const { return fScopeIndex > 0; } |
484 | bool inArrayScope() const { return fScopeIndex < 0; } |
485 | |
486 | // Helper for masquerading raw primitive types as Values (bypassing tagging, etc). |
487 | template <typename T> |
488 | class RawValue final : public Value { |
489 | public: |
490 | explicit RawValue(T v) { |
491 | static_assert(sizeof(T) <= sizeof(Value), "" ); |
492 | *this->cast<T>() = v; |
493 | } |
494 | |
495 | T operator *() const { return *this->cast<T>(); } |
496 | }; |
497 | |
498 | template <typename VectorT> |
499 | void popScopeAsVec(size_t scope_start) { |
500 | SkASSERT(scope_start > 0); |
501 | SkASSERT(scope_start <= fValueStack.size()); |
502 | |
503 | using T = typename VectorT::ValueT; |
504 | static_assert( sizeof(T) >= sizeof(Value), "" ); |
505 | static_assert( sizeof(T) % sizeof(Value) == 0, "" ); |
506 | static_assert(alignof(T) == alignof(Value), "" ); |
507 | |
508 | const auto scope_count = fValueStack.size() - scope_start, |
509 | count = scope_count / (sizeof(T) / sizeof(Value)); |
510 | SkASSERT(scope_count % (sizeof(T) / sizeof(Value)) == 0); |
511 | |
512 | const auto* begin = reinterpret_cast<const T*>(fValueStack.data() + scope_start); |
513 | |
514 | // Restore the previous scope index from saved placeholder value, |
515 | // and instantiate as a vector of values in scope. |
516 | auto& placeholder = fValueStack[scope_start - 1]; |
517 | fScopeIndex = *static_cast<RawValue<intptr_t>&>(placeholder); |
518 | placeholder = VectorT(begin, count, fAlloc); |
519 | |
520 | // Drop the (consumed) values in scope. |
521 | fValueStack.resize(scope_start); |
522 | } |
523 | |
524 | void pushObjectScope() { |
525 | // Save a scope index now, and then later we'll overwrite this value as the Object itself. |
526 | fValueStack.push_back(RawValue<intptr_t>(fScopeIndex)); |
527 | |
528 | // New object scope. |
529 | fScopeIndex = SkTo<intptr_t>(fValueStack.size()); |
530 | } |
531 | |
532 | void popObjectScope() { |
533 | SkASSERT(this->inObjectScope()); |
534 | this->popScopeAsVec<ObjectValue>(SkTo<size_t>(fScopeIndex)); |
535 | |
536 | SkDEBUGCODE( |
537 | const auto& obj = fValueStack.back().as<ObjectValue>(); |
538 | SkASSERT(obj.is<ObjectValue>()); |
539 | for (const auto& member : obj) { |
540 | SkASSERT(member.fKey.is<StringValue>()); |
541 | } |
542 | ) |
543 | } |
544 | |
545 | void pushArrayScope() { |
546 | // Save a scope index now, and then later we'll overwrite this value as the Array itself. |
547 | fValueStack.push_back(RawValue<intptr_t>(fScopeIndex)); |
548 | |
549 | // New array scope. |
550 | fScopeIndex = -SkTo<intptr_t>(fValueStack.size()); |
551 | } |
552 | |
553 | void popArrayScope() { |
554 | SkASSERT(this->inArrayScope()); |
555 | this->popScopeAsVec<ArrayValue>(SkTo<size_t>(-fScopeIndex)); |
556 | |
557 | SkDEBUGCODE( |
558 | const auto& arr = fValueStack.back().as<ArrayValue>(); |
559 | SkASSERT(arr.is<ArrayValue>()); |
560 | ) |
561 | } |
562 | |
563 | void pushObjectKey(const char* key, size_t size, const char* eos) { |
564 | SkASSERT(this->inObjectScope()); |
565 | SkASSERT(fValueStack.size() >= SkTo<size_t>(fScopeIndex)); |
566 | SkASSERT(!((fValueStack.size() - SkTo<size_t>(fScopeIndex)) & 1)); |
567 | this->pushString(key, size, eos); |
568 | } |
569 | |
570 | void pushTrue() { |
571 | fValueStack.push_back(BoolValue(true)); |
572 | } |
573 | |
574 | void pushFalse() { |
575 | fValueStack.push_back(BoolValue(false)); |
576 | } |
577 | |
578 | void pushNull() { |
579 | fValueStack.push_back(NullValue()); |
580 | } |
581 | |
582 | void pushString(const char* s, size_t size, const char* eos) { |
583 | fValueStack.push_back(FastString(s, size, eos, fAlloc)); |
584 | } |
585 | |
586 | void pushInt32(int32_t i) { |
587 | fValueStack.push_back(NumberValue(i)); |
588 | } |
589 | |
590 | void pushFloat(float f) { |
591 | fValueStack.push_back(NumberValue(f)); |
592 | } |
593 | |
594 | template <typename T> |
595 | T error(T&& ret_val, const char* p, const char* msg) { |
596 | #if defined(SK_JSON_REPORT_ERRORS) |
597 | fErrorToken = p; |
598 | fErrorMessage.set(msg); |
599 | #endif |
600 | return ret_val; |
601 | } |
602 | |
603 | const char* matchTrue(const char* p) { |
604 | SkASSERT(p[0] == 't'); |
605 | |
606 | if (p[1] == 'r' && p[2] == 'u' && p[3] == 'e') { |
607 | this->pushTrue(); |
608 | return p + 4; |
609 | } |
610 | |
611 | return this->error(nullptr, p, "invalid token" ); |
612 | } |
613 | |
614 | const char* matchFalse(const char* p) { |
615 | SkASSERT(p[0] == 'f'); |
616 | |
617 | if (p[1] == 'a' && p[2] == 'l' && p[3] == 's' && p[4] == 'e') { |
618 | this->pushFalse(); |
619 | return p + 5; |
620 | } |
621 | |
622 | return this->error(nullptr, p, "invalid token" ); |
623 | } |
624 | |
625 | const char* matchNull(const char* p) { |
626 | SkASSERT(p[0] == 'n'); |
627 | |
628 | if (p[1] == 'u' && p[2] == 'l' && p[3] == 'l') { |
629 | this->pushNull(); |
630 | return p + 4; |
631 | } |
632 | |
633 | return this->error(nullptr, p, "invalid token" ); |
634 | } |
635 | |
636 | const std::vector<char>* unescapeString(const char* begin, const char* end) { |
637 | fUnescapeBuffer.clear(); |
638 | |
639 | for (const auto* p = begin; p != end; ++p) { |
640 | if (*p != '\\') { |
641 | fUnescapeBuffer.push_back(*p); |
642 | continue; |
643 | } |
644 | |
645 | if (++p == end) { |
646 | return nullptr; |
647 | } |
648 | |
649 | switch (*p) { |
650 | case '"': fUnescapeBuffer.push_back( '"'); break; |
651 | case '\\': fUnescapeBuffer.push_back('\\'); break; |
652 | case '/': fUnescapeBuffer.push_back( '/'); break; |
653 | case 'b': fUnescapeBuffer.push_back('\b'); break; |
654 | case 'f': fUnescapeBuffer.push_back('\f'); break; |
655 | case 'n': fUnescapeBuffer.push_back('\n'); break; |
656 | case 'r': fUnescapeBuffer.push_back('\r'); break; |
657 | case 't': fUnescapeBuffer.push_back('\t'); break; |
658 | case 'u': { |
659 | if (p + 4 >= end) { |
660 | return nullptr; |
661 | } |
662 | |
663 | uint32_t hexed; |
664 | const char hex_str[] = {p[1], p[2], p[3], p[4], '\0'}; |
665 | const auto* eos = SkParse::FindHex(hex_str, &hexed); |
666 | if (!eos || *eos) { |
667 | return nullptr; |
668 | } |
669 | |
670 | char utf8[SkUTF::kMaxBytesInUTF8Sequence]; |
671 | const auto utf8_len = SkUTF::ToUTF8(SkTo<SkUnichar>(hexed), utf8); |
672 | fUnescapeBuffer.insert(fUnescapeBuffer.end(), utf8, utf8 + utf8_len); |
673 | p += 4; |
674 | } break; |
675 | default: return nullptr; |
676 | } |
677 | } |
678 | |
679 | return &fUnescapeBuffer; |
680 | } |
681 | |
682 | template <typename MatchFunc> |
683 | const char* matchString(const char* p, const char* p_stop, MatchFunc&& func) { |
684 | SkASSERT(*p == '"'); |
685 | const auto* s_begin = p + 1; |
686 | bool requires_unescape = false; |
687 | |
688 | do { |
689 | // Consume string chars. |
690 | // This is the fast path, and hopefully we only hit it once then quick-exit below. |
691 | for (p = p + 1; !is_eostring(*p); ++p); |
692 | |
693 | if (*p == '"') { |
694 | // Valid string found. |
695 | if (!requires_unescape) { |
696 | func(s_begin, p - s_begin, p_stop); |
697 | } else { |
698 | // Slow unescape. We could avoid this extra copy with some effort, |
699 | // but in practice escaped strings should be rare. |
700 | const auto* buf = this->unescapeString(s_begin, p); |
701 | if (!buf) { |
702 | break; |
703 | } |
704 | |
705 | SkASSERT(!buf->empty()); |
706 | func(buf->data(), buf->size(), buf->data() + buf->size() - 1); |
707 | } |
708 | return p + 1; |
709 | } |
710 | |
711 | if (*p == '\\') { |
712 | requires_unescape = true; |
713 | ++p; |
714 | continue; |
715 | } |
716 | |
717 | // End-of-scope chars are special: we use them to tag the end of the input. |
718 | // Thus they cannot be consumed indiscriminately -- we need to check if we hit the |
719 | // end of the input. To that effect, we treat them as string terminators above, |
720 | // then we catch them here. |
721 | if (is_eoscope(*p)) { |
722 | continue; |
723 | } |
724 | |
725 | // Invalid/unexpected char. |
726 | break; |
727 | } while (p != p_stop); |
728 | |
729 | // Premature end-of-input, or illegal string char. |
730 | return this->error(nullptr, s_begin - 1, "invalid string" ); |
731 | } |
732 | |
733 | const char* matchFastFloatDecimalPart(const char* p, int sign, float f, int exp) { |
734 | SkASSERT(exp <= 0); |
735 | |
736 | for (;;) { |
737 | if (!is_digit(*p)) break; |
738 | f = f * 10.f + (*p++ - '0'); --exp; |
739 | if (!is_digit(*p)) break; |
740 | f = f * 10.f + (*p++ - '0'); --exp; |
741 | } |
742 | |
743 | const auto decimal_scale = pow10(exp); |
744 | if (is_numeric(*p) || !decimal_scale) { |
745 | SkASSERT((*p == '.' || *p == 'e' || *p == 'E') || !decimal_scale); |
746 | // Malformed input, or an (unsupported) exponent, or a collapsed decimal factor. |
747 | return nullptr; |
748 | } |
749 | |
750 | this->pushFloat(sign * f * decimal_scale); |
751 | |
752 | return p; |
753 | } |
754 | |
755 | const char* matchFastFloatPart(const char* p, int sign, float f) { |
756 | for (;;) { |
757 | if (!is_digit(*p)) break; |
758 | f = f * 10.f + (*p++ - '0'); |
759 | if (!is_digit(*p)) break; |
760 | f = f * 10.f + (*p++ - '0'); |
761 | } |
762 | |
763 | if (!is_numeric(*p)) { |
764 | // Matched (integral) float. |
765 | this->pushFloat(sign * f); |
766 | return p; |
767 | } |
768 | |
769 | return (*p == '.') ? this->matchFastFloatDecimalPart(p + 1, sign, f, 0) |
770 | : nullptr; |
771 | } |
772 | |
773 | const char* matchFast32OrFloat(const char* p) { |
774 | int sign = 1; |
775 | if (*p == '-') { |
776 | sign = -1; |
777 | ++p; |
778 | } |
779 | |
780 | const auto* digits_start = p; |
781 | |
782 | int32_t n32 = 0; |
783 | |
784 | // This is the largest absolute int32 value we can handle before |
785 | // risking overflow *on the next digit* (214748363). |
786 | static constexpr int32_t kMaxInt32 = (std::numeric_limits<int32_t>::max() - 9) / 10; |
787 | |
788 | if (is_digit(*p)) { |
789 | n32 = (*p++ - '0'); |
790 | for (;;) { |
791 | if (!is_digit(*p) || n32 > kMaxInt32) break; |
792 | n32 = n32 * 10 + (*p++ - '0'); |
793 | } |
794 | } |
795 | |
796 | if (!is_numeric(*p)) { |
797 | // Did we actually match any digits? |
798 | if (p > digits_start) { |
799 | this->pushInt32(sign * n32); |
800 | return p; |
801 | } |
802 | return nullptr; |
803 | } |
804 | |
805 | if (*p == '.') { |
806 | const auto* decimals_start = ++p; |
807 | |
808 | int exp = 0; |
809 | |
810 | for (;;) { |
811 | if (!is_digit(*p) || n32 > kMaxInt32) break; |
812 | n32 = n32 * 10 + (*p++ - '0'); --exp; |
813 | if (!is_digit(*p) || n32 > kMaxInt32) break; |
814 | n32 = n32 * 10 + (*p++ - '0'); --exp; |
815 | } |
816 | |
817 | if (!is_numeric(*p)) { |
818 | // Did we actually match any digits? |
819 | if (p > decimals_start) { |
820 | this->pushFloat(sign * n32 * pow10(exp)); |
821 | return p; |
822 | } |
823 | return nullptr; |
824 | } |
825 | |
826 | if (n32 > kMaxInt32) { |
827 | // we ran out on n32 bits |
828 | return this->matchFastFloatDecimalPart(p, sign, n32, exp); |
829 | } |
830 | } |
831 | |
832 | return this->matchFastFloatPart(p, sign, n32); |
833 | } |
834 | |
835 | const char* matchNumber(const char* p) { |
836 | if (const auto* fast = this->matchFast32OrFloat(p)) return fast; |
837 | |
838 | // slow fallback |
839 | char* matched; |
840 | float f = strtof(p, &matched); |
841 | if (matched > p) { |
842 | this->pushFloat(f); |
843 | return matched; |
844 | } |
845 | return this->error(nullptr, p, "invalid numeric token" ); |
846 | } |
847 | }; |
848 | |
849 | void Write(const Value& v, SkWStream* stream) { |
850 | switch (v.getType()) { |
851 | case Value::Type::kNull: |
852 | stream->writeText("null" ); |
853 | break; |
854 | case Value::Type::kBool: |
855 | stream->writeText(*v.as<BoolValue>() ? "true" : "false" ); |
856 | break; |
857 | case Value::Type::kNumber: |
858 | stream->writeScalarAsText(*v.as<NumberValue>()); |
859 | break; |
860 | case Value::Type::kString: |
861 | stream->writeText("\"" ); |
862 | stream->writeText(v.as<StringValue>().begin()); |
863 | stream->writeText("\"" ); |
864 | break; |
865 | case Value::Type::kArray: { |
866 | const auto& array = v.as<ArrayValue>(); |
867 | stream->writeText("[" ); |
868 | bool first_value = true; |
869 | for (const auto& v : array) { |
870 | if (!first_value) stream->writeText("," ); |
871 | Write(v, stream); |
872 | first_value = false; |
873 | } |
874 | stream->writeText("]" ); |
875 | break; |
876 | } |
877 | case Value::Type::kObject: |
878 | const auto& object = v.as<ObjectValue>(); |
879 | stream->writeText("{" ); |
880 | bool first_member = true; |
881 | for (const auto& member : object) { |
882 | SkASSERT(member.fKey.getType() == Value::Type::kString); |
883 | if (!first_member) stream->writeText("," ); |
884 | Write(member.fKey, stream); |
885 | stream->writeText(":" ); |
886 | Write(member.fValue, stream); |
887 | first_member = false; |
888 | } |
889 | stream->writeText("}" ); |
890 | break; |
891 | } |
892 | } |
893 | |
894 | } // namespace |
895 | |
896 | SkString Value::toString() const { |
897 | SkDynamicMemoryWStream wstream; |
898 | Write(*this, &wstream); |
899 | const auto data = wstream.detachAsData(); |
900 | // TODO: is there a better way to pass data around without copying? |
901 | return SkString(static_cast<const char*>(data->data()), data->size()); |
902 | } |
903 | |
904 | static constexpr size_t kMinChunkSize = 4096; |
905 | |
906 | DOM::DOM(const char* data, size_t size) |
907 | : fAlloc(kMinChunkSize) { |
908 | DOMParser parser(fAlloc); |
909 | |
910 | fRoot = parser.parse(data, size); |
911 | } |
912 | |
913 | void DOM::write(SkWStream* stream) const { |
914 | Write(fRoot, stream); |
915 | } |
916 | |
917 | } // namespace skjson |
918 | |