| 1 | /* |
| 2 | * Copyright 2018 Google Inc. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license that can be |
| 5 | * found in the LICENSE file. |
| 6 | */ |
| 7 | |
| 8 | #include "src/utils/SkJSON.h" |
| 9 | |
| 10 | #include "include/core/SkStream.h" |
| 11 | #include "include/core/SkString.h" |
| 12 | #include "include/private/SkMalloc.h" |
| 13 | #include "include/utils/SkParse.h" |
| 14 | #include "src/utils/SkUTF.h" |
| 15 | |
| 16 | #include <cmath> |
| 17 | #include <tuple> |
| 18 | #include <vector> |
| 19 | |
| 20 | namespace skjson { |
| 21 | |
| 22 | // #define SK_JSON_REPORT_ERRORS |
| 23 | |
| 24 | static_assert( sizeof(Value) == 8, "" ); |
| 25 | static_assert(alignof(Value) == 8, "" ); |
| 26 | |
| 27 | static constexpr size_t kRecAlign = alignof(Value); |
| 28 | |
| 29 | void Value::init_tagged(Tag t) { |
| 30 | memset(fData8, 0, sizeof(fData8)); |
| 31 | fData8[0] = SkTo<uint8_t>(t); |
| 32 | SkASSERT(this->getTag() == t); |
| 33 | } |
| 34 | |
| 35 | // Pointer values store a type (in the lower kTagBits bits) and a pointer. |
| 36 | void Value::init_tagged_pointer(Tag t, void* p) { |
| 37 | if (sizeof(Value) == sizeof(uintptr_t)) { |
| 38 | *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p); |
| 39 | // For 64-bit, we rely on the pointer lower bits being zero. |
| 40 | SkASSERT(!(fData8[0] & kTagMask)); |
| 41 | fData8[0] |= SkTo<uint8_t>(t); |
| 42 | } else { |
| 43 | // For 32-bit, we store the pointer in the upper word |
| 44 | SkASSERT(sizeof(Value) == sizeof(uintptr_t) * 2); |
| 45 | this->init_tagged(t); |
| 46 | *this->cast<uintptr_t>() = reinterpret_cast<uintptr_t>(p); |
| 47 | } |
| 48 | |
| 49 | SkASSERT(this->getTag() == t); |
| 50 | SkASSERT(this->ptr<void>() == p); |
| 51 | } |
| 52 | |
| 53 | NullValue::NullValue() { |
| 54 | this->init_tagged(Tag::kNull); |
| 55 | SkASSERT(this->getTag() == Tag::kNull); |
| 56 | } |
| 57 | |
| 58 | BoolValue::BoolValue(bool b) { |
| 59 | this->init_tagged(Tag::kBool); |
| 60 | *this->cast<bool>() = b; |
| 61 | SkASSERT(this->getTag() == Tag::kBool); |
| 62 | } |
| 63 | |
| 64 | NumberValue::NumberValue(int32_t i) { |
| 65 | this->init_tagged(Tag::kInt); |
| 66 | *this->cast<int32_t>() = i; |
| 67 | SkASSERT(this->getTag() == Tag::kInt); |
| 68 | } |
| 69 | |
| 70 | NumberValue::NumberValue(float f) { |
| 71 | this->init_tagged(Tag::kFloat); |
| 72 | *this->cast<float>() = f; |
| 73 | SkASSERT(this->getTag() == Tag::kFloat); |
| 74 | } |
| 75 | |
| 76 | // Vector recs point to externally allocated slabs with the following layout: |
| 77 | // |
| 78 | // [size_t n] [REC_0] ... [REC_n-1] [optional extra trailing storage] |
| 79 | // |
| 80 | // Long strings use extra_alloc_size == 1 to store the \0 terminator. |
| 81 | // |
| 82 | template <typename T, size_t extra_alloc_size = 0> |
| 83 | static void* MakeVector(const void* src, size_t size, SkArenaAlloc& alloc) { |
| 84 | // The Ts are already in memory, so their size should be safe. |
| 85 | const auto total_size = sizeof(size_t) + size * sizeof(T) + extra_alloc_size; |
| 86 | auto* size_ptr = reinterpret_cast<size_t*>(alloc.makeBytesAlignedTo(total_size, kRecAlign)); |
| 87 | |
| 88 | *size_ptr = size; |
| 89 | sk_careful_memcpy(size_ptr + 1, src, size * sizeof(T)); |
| 90 | |
| 91 | return size_ptr; |
| 92 | } |
| 93 | |
| 94 | ArrayValue::ArrayValue(const Value* src, size_t size, SkArenaAlloc& alloc) { |
| 95 | this->init_tagged_pointer(Tag::kArray, MakeVector<Value>(src, size, alloc)); |
| 96 | SkASSERT(this->getTag() == Tag::kArray); |
| 97 | } |
| 98 | |
| 99 | // Strings have two flavors: |
| 100 | // |
| 101 | // -- short strings (len <= 7) -> these are stored inline, in the record |
| 102 | // (one byte reserved for null terminator/type): |
| 103 | // |
| 104 | // [str] [\0]|[max_len - actual_len] |
| 105 | // |
| 106 | // Storing [max_len - actual_len] allows the 'len' field to double-up as a |
| 107 | // null terminator when size == max_len (this works 'cause kShortString == 0). |
| 108 | // |
| 109 | // -- long strings (len > 7) -> these are externally allocated vectors (VectorRec<char>). |
| 110 | // |
| 111 | // The string data plus a null-char terminator are copied over. |
| 112 | // |
| 113 | namespace { |
| 114 | |
| 115 | // An internal string builder with a fast 8 byte short string load path |
| 116 | // (for the common case where the string is not at the end of the stream). |
| 117 | class FastString final : public Value { |
| 118 | public: |
| 119 | FastString(const char* src, size_t size, const char* eos, SkArenaAlloc& alloc) { |
| 120 | SkASSERT(src <= eos); |
| 121 | |
| 122 | if (size > kMaxInlineStringSize) { |
| 123 | this->initLongString(src, size, alloc); |
| 124 | SkASSERT(this->getTag() == Tag::kString); |
| 125 | return; |
| 126 | } |
| 127 | |
| 128 | // initFastShortString is faster (doh), but requires access to 6 chars past src. |
| 129 | if (src && src + 6 <= eos) { |
| 130 | this->initFastShortString(src, size); |
| 131 | } else { |
| 132 | this->initShortString(src, size); |
| 133 | } |
| 134 | |
| 135 | SkASSERT(this->getTag() == Tag::kShortString); |
| 136 | } |
| 137 | |
| 138 | private: |
| 139 | // first byte reserved for tagging, \0 terminator => 6 usable chars |
| 140 | static constexpr size_t kMaxInlineStringSize = sizeof(Value) - 2; |
| 141 | |
| 142 | void initLongString(const char* src, size_t size, SkArenaAlloc& alloc) { |
| 143 | SkASSERT(size > kMaxInlineStringSize); |
| 144 | |
| 145 | this->init_tagged_pointer(Tag::kString, MakeVector<char, 1>(src, size, alloc)); |
| 146 | |
| 147 | auto* data = this->cast<VectorValue<char, Value::Type::kString>>()->begin(); |
| 148 | const_cast<char*>(data)[size] = '\0'; |
| 149 | } |
| 150 | |
| 151 | void initShortString(const char* src, size_t size) { |
| 152 | SkASSERT(size <= kMaxInlineStringSize); |
| 153 | |
| 154 | this->init_tagged(Tag::kShortString); |
| 155 | sk_careful_memcpy(this->cast<char>(), src, size); |
| 156 | // Null terminator provided by init_tagged() above (fData8 is zero-initialized). |
| 157 | } |
| 158 | |
| 159 | void initFastShortString(const char* src, size_t size) { |
| 160 | SkASSERT(size <= kMaxInlineStringSize); |
| 161 | |
| 162 | uint64_t* s64 = this->cast<uint64_t>(); |
| 163 | |
| 164 | // Load 8 chars and mask out the tag and \0 terminator. |
| 165 | // Note: we picked kShortString == 0 to avoid setting explicitly below. |
| 166 | static_assert(SkToU8(Tag::kShortString) == 0, "please don't break this" ); |
| 167 | |
| 168 | // Since the first byte is occupied by the tag, we want the string chars [0..5] to land |
| 169 | // on bytes [1..6] => the fastest way is to read8 @(src - 1) (always safe, because the |
| 170 | // string requires a " prefix at the very least). |
| 171 | memcpy(s64, src - 1, 8); |
| 172 | |
| 173 | #if defined(SK_CPU_LENDIAN) |
| 174 | // The mask for a max-length string (6), with a leading tag and trailing \0 is |
| 175 | // 0x00ffffffffffff00. Accounting for the final left-shift, this becomes |
| 176 | // 0x0000ffffffffffff. |
| 177 | *s64 &= (0x0000ffffffffffffULL >> ((kMaxInlineStringSize - size) * 8)) // trailing \0s |
| 178 | << 8; // tag byte |
| 179 | #else |
| 180 | static_assert(false, "Big-endian builds are not supported at this time." ); |
| 181 | #endif |
| 182 | } |
| 183 | }; |
| 184 | |
| 185 | } // namespace |
| 186 | |
| 187 | StringValue::StringValue(const char* src, size_t size, SkArenaAlloc& alloc) { |
| 188 | new (this) FastString(src, size, src, alloc); |
| 189 | } |
| 190 | |
| 191 | ObjectValue::ObjectValue(const Member* src, size_t size, SkArenaAlloc& alloc) { |
| 192 | this->init_tagged_pointer(Tag::kObject, MakeVector<Member>(src, size, alloc)); |
| 193 | SkASSERT(this->getTag() == Tag::kObject); |
| 194 | } |
| 195 | |
| 196 | |
| 197 | // Boring public Value glue. |
| 198 | |
| 199 | static int inline_strcmp(const char a[], const char b[]) { |
| 200 | for (;;) { |
| 201 | char c = *a++; |
| 202 | if (c == 0) { |
| 203 | break; |
| 204 | } |
| 205 | if (c != *b++) { |
| 206 | return 1; |
| 207 | } |
| 208 | } |
| 209 | return *b != 0; |
| 210 | } |
| 211 | |
| 212 | const Value& ObjectValue::operator[](const char* key) const { |
| 213 | // Reverse search for duplicates resolution (policy: return last). |
| 214 | const auto* begin = this->begin(); |
| 215 | const auto* member = this->end(); |
| 216 | |
| 217 | while (member > begin) { |
| 218 | --member; |
| 219 | if (0 == inline_strcmp(key, member->fKey.as<StringValue>().begin())) { |
| 220 | return member->fValue; |
| 221 | } |
| 222 | } |
| 223 | |
| 224 | static const Value g_null = NullValue(); |
| 225 | return g_null; |
| 226 | } |
| 227 | |
| 228 | namespace { |
| 229 | |
| 230 | // Lexer/parser inspired by rapidjson [1], sajson [2] and pjson [3]. |
| 231 | // |
| 232 | // [1] https://github.com/Tencent/rapidjson/ |
| 233 | // [2] https://github.com/chadaustin/sajson |
| 234 | // [3] https://pastebin.com/hnhSTL3h |
| 235 | |
| 236 | |
| 237 | // bit 0 (0x01) - plain ASCII string character |
| 238 | // bit 1 (0x02) - whitespace |
| 239 | // bit 2 (0x04) - string terminator (" \\ \0 [control chars] **AND } ]** <- see matchString notes) |
| 240 | // bit 3 (0x08) - 0-9 |
| 241 | // bit 4 (0x10) - 0-9 e E . |
| 242 | // bit 5 (0x20) - scope terminator (} ]) |
| 243 | static constexpr uint8_t g_token_flags[256] = { |
| 244 | // 0 1 2 3 4 5 6 7 8 9 A B C D E F |
| 245 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 4, 4, 6, 4, 4, // 0 |
| 246 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, // 1 |
| 247 | 3, 1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0x11,1, // 2 |
| 248 | 0x19,0x19,0x19,0x19,0x19,0x19,0x19,0x19, 0x19,0x19, 1, 1, 1, 1, 1, 1, // 3 |
| 249 | 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 4 |
| 250 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,0x25, 1, 1, // 5 |
| 251 | 1, 1, 1, 1, 1, 0x11,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 6 |
| 252 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,0x25, 1, 1, // 7 |
| 253 | |
| 254 | // 128-255 |
| 255 | 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, |
| 256 | 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, |
| 257 | 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, |
| 258 | 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0 |
| 259 | }; |
| 260 | |
| 261 | static inline bool is_ws(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x02; } |
| 262 | static inline bool is_eostring(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x04; } |
| 263 | static inline bool is_digit(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x08; } |
| 264 | static inline bool is_numeric(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x10; } |
| 265 | static inline bool is_eoscope(char c) { return g_token_flags[static_cast<uint8_t>(c)] & 0x20; } |
| 266 | |
| 267 | static inline const char* skip_ws(const char* p) { |
| 268 | while (is_ws(*p)) ++p; |
| 269 | return p; |
| 270 | } |
| 271 | |
| 272 | static inline float pow10(int32_t exp) { |
| 273 | static constexpr float g_pow10_table[63] = |
| 274 | { |
| 275 | 1.e-031f, 1.e-030f, 1.e-029f, 1.e-028f, 1.e-027f, 1.e-026f, 1.e-025f, 1.e-024f, |
| 276 | 1.e-023f, 1.e-022f, 1.e-021f, 1.e-020f, 1.e-019f, 1.e-018f, 1.e-017f, 1.e-016f, |
| 277 | 1.e-015f, 1.e-014f, 1.e-013f, 1.e-012f, 1.e-011f, 1.e-010f, 1.e-009f, 1.e-008f, |
| 278 | 1.e-007f, 1.e-006f, 1.e-005f, 1.e-004f, 1.e-003f, 1.e-002f, 1.e-001f, 1.e+000f, |
| 279 | 1.e+001f, 1.e+002f, 1.e+003f, 1.e+004f, 1.e+005f, 1.e+006f, 1.e+007f, 1.e+008f, |
| 280 | 1.e+009f, 1.e+010f, 1.e+011f, 1.e+012f, 1.e+013f, 1.e+014f, 1.e+015f, 1.e+016f, |
| 281 | 1.e+017f, 1.e+018f, 1.e+019f, 1.e+020f, 1.e+021f, 1.e+022f, 1.e+023f, 1.e+024f, |
| 282 | 1.e+025f, 1.e+026f, 1.e+027f, 1.e+028f, 1.e+029f, 1.e+030f, 1.e+031f |
| 283 | }; |
| 284 | |
| 285 | static constexpr int32_t k_exp_offset = SK_ARRAY_COUNT(g_pow10_table) / 2; |
| 286 | |
| 287 | // We only support negative exponents for now. |
| 288 | SkASSERT(exp <= 0); |
| 289 | |
| 290 | return (exp >= -k_exp_offset) ? g_pow10_table[exp + k_exp_offset] |
| 291 | : std::pow(10.0f, static_cast<float>(exp)); |
| 292 | } |
| 293 | |
| 294 | class DOMParser { |
| 295 | public: |
| 296 | explicit DOMParser(SkArenaAlloc& alloc) |
| 297 | : fAlloc(alloc) { |
| 298 | fValueStack.reserve(kValueStackReserve); |
| 299 | fUnescapeBuffer.reserve(kUnescapeBufferReserve); |
| 300 | } |
| 301 | |
| 302 | Value parse(const char* p, size_t size) { |
| 303 | if (!size) { |
| 304 | return this->error(NullValue(), p, "invalid empty input" ); |
| 305 | } |
| 306 | |
| 307 | const char* p_stop = p + size - 1; |
| 308 | |
| 309 | // We're only checking for end-of-stream on object/array close('}',']'), |
| 310 | // so we must trim any whitespace from the buffer tail. |
| 311 | while (p_stop > p && is_ws(*p_stop)) --p_stop; |
| 312 | |
| 313 | SkASSERT(p_stop >= p && p_stop < p + size); |
| 314 | if (!is_eoscope(*p_stop)) { |
| 315 | return this->error(NullValue(), p_stop, "invalid top-level value" ); |
| 316 | } |
| 317 | |
| 318 | p = skip_ws(p); |
| 319 | |
| 320 | switch (*p) { |
| 321 | case '{': |
| 322 | goto match_object; |
| 323 | case '[': |
| 324 | goto match_array; |
| 325 | default: |
| 326 | return this->error(NullValue(), p, "invalid top-level value" ); |
| 327 | } |
| 328 | |
| 329 | match_object: |
| 330 | SkASSERT(*p == '{'); |
| 331 | p = skip_ws(p + 1); |
| 332 | |
| 333 | this->pushObjectScope(); |
| 334 | |
| 335 | if (*p == '}') goto pop_object; |
| 336 | |
| 337 | // goto match_object_key; |
| 338 | match_object_key: |
| 339 | p = skip_ws(p); |
| 340 | if (*p != '"') return this->error(NullValue(), p, "expected object key" ); |
| 341 | |
| 342 | p = this->matchString(p, p_stop, [this](const char* key, size_t size, const char* eos) { |
| 343 | this->pushObjectKey(key, size, eos); |
| 344 | }); |
| 345 | if (!p) return NullValue(); |
| 346 | |
| 347 | p = skip_ws(p); |
| 348 | if (*p != ':') return this->error(NullValue(), p, "expected ':' separator" ); |
| 349 | |
| 350 | ++p; |
| 351 | |
| 352 | // goto match_value; |
| 353 | match_value: |
| 354 | p = skip_ws(p); |
| 355 | |
| 356 | switch (*p) { |
| 357 | case '\0': |
| 358 | return this->error(NullValue(), p, "unexpected input end" ); |
| 359 | case '"': |
| 360 | p = this->matchString(p, p_stop, [this](const char* str, size_t size, const char* eos) { |
| 361 | this->pushString(str, size, eos); |
| 362 | }); |
| 363 | break; |
| 364 | case '[': |
| 365 | goto match_array; |
| 366 | case 'f': |
| 367 | p = this->matchFalse(p); |
| 368 | break; |
| 369 | case 'n': |
| 370 | p = this->matchNull(p); |
| 371 | break; |
| 372 | case 't': |
| 373 | p = this->matchTrue(p); |
| 374 | break; |
| 375 | case '{': |
| 376 | goto match_object; |
| 377 | default: |
| 378 | p = this->matchNumber(p); |
| 379 | break; |
| 380 | } |
| 381 | |
| 382 | if (!p) return NullValue(); |
| 383 | |
| 384 | // goto match_post_value; |
| 385 | match_post_value: |
| 386 | SkASSERT(!this->inTopLevelScope()); |
| 387 | |
| 388 | p = skip_ws(p); |
| 389 | switch (*p) { |
| 390 | case ',': |
| 391 | ++p; |
| 392 | if (this->inObjectScope()) { |
| 393 | goto match_object_key; |
| 394 | } else { |
| 395 | SkASSERT(this->inArrayScope()); |
| 396 | goto match_value; |
| 397 | } |
| 398 | case ']': |
| 399 | goto pop_array; |
| 400 | case '}': |
| 401 | goto pop_object; |
| 402 | default: |
| 403 | return this->error(NullValue(), p - 1, "unexpected value-trailing token" ); |
| 404 | } |
| 405 | |
| 406 | // unreachable |
| 407 | SkASSERT(false); |
| 408 | |
| 409 | pop_object: |
| 410 | SkASSERT(*p == '}'); |
| 411 | |
| 412 | if (this->inArrayScope()) { |
| 413 | return this->error(NullValue(), p, "unexpected object terminator" ); |
| 414 | } |
| 415 | |
| 416 | this->popObjectScope(); |
| 417 | |
| 418 | // goto pop_common |
| 419 | pop_common: |
| 420 | SkASSERT(is_eoscope(*p)); |
| 421 | |
| 422 | if (this->inTopLevelScope()) { |
| 423 | SkASSERT(fValueStack.size() == 1); |
| 424 | |
| 425 | // Success condition: parsed the top level element and reached the stop token. |
| 426 | return p == p_stop |
| 427 | ? fValueStack.front() |
| 428 | : this->error(NullValue(), p + 1, "trailing root garbage" ); |
| 429 | } |
| 430 | |
| 431 | if (p == p_stop) { |
| 432 | return this->error(NullValue(), p, "unexpected end-of-input" ); |
| 433 | } |
| 434 | |
| 435 | ++p; |
| 436 | |
| 437 | goto match_post_value; |
| 438 | |
| 439 | match_array: |
| 440 | SkASSERT(*p == '['); |
| 441 | p = skip_ws(p + 1); |
| 442 | |
| 443 | this->pushArrayScope(); |
| 444 | |
| 445 | if (*p != ']') goto match_value; |
| 446 | |
| 447 | // goto pop_array; |
| 448 | pop_array: |
| 449 | SkASSERT(*p == ']'); |
| 450 | |
| 451 | if (this->inObjectScope()) { |
| 452 | return this->error(NullValue(), p, "unexpected array terminator" ); |
| 453 | } |
| 454 | |
| 455 | this->popArrayScope(); |
| 456 | |
| 457 | goto pop_common; |
| 458 | |
| 459 | SkASSERT(false); |
| 460 | return NullValue(); |
| 461 | } |
| 462 | |
| 463 | std::tuple<const char*, const SkString> getError() const { |
| 464 | return std::make_tuple(fErrorToken, fErrorMessage); |
| 465 | } |
| 466 | |
| 467 | private: |
| 468 | SkArenaAlloc& fAlloc; |
| 469 | |
| 470 | // Pending values stack. |
| 471 | static constexpr size_t kValueStackReserve = 256; |
| 472 | std::vector<Value> fValueStack; |
| 473 | |
| 474 | // String unescape buffer. |
| 475 | static constexpr size_t kUnescapeBufferReserve = 512; |
| 476 | std::vector<char> fUnescapeBuffer; |
| 477 | |
| 478 | // Tracks the current object/array scope, as an index into fStack: |
| 479 | // |
| 480 | // - for objects: fScopeIndex = (index of first value in scope) |
| 481 | // - for arrays : fScopeIndex = -(index of first value in scope) |
| 482 | // |
| 483 | // fScopeIndex == 0 IFF we are at the top level (no current/active scope). |
| 484 | intptr_t fScopeIndex = 0; |
| 485 | |
| 486 | // Error reporting. |
| 487 | const char* fErrorToken = nullptr; |
| 488 | SkString fErrorMessage; |
| 489 | |
| 490 | bool inTopLevelScope() const { return fScopeIndex == 0; } |
| 491 | bool inObjectScope() const { return fScopeIndex > 0; } |
| 492 | bool inArrayScope() const { return fScopeIndex < 0; } |
| 493 | |
| 494 | // Helper for masquerading raw primitive types as Values (bypassing tagging, etc). |
| 495 | template <typename T> |
| 496 | class RawValue final : public Value { |
| 497 | public: |
| 498 | explicit RawValue(T v) { |
| 499 | static_assert(sizeof(T) <= sizeof(Value), "" ); |
| 500 | *this->cast<T>() = v; |
| 501 | } |
| 502 | |
| 503 | T operator *() const { return *this->cast<T>(); } |
| 504 | }; |
| 505 | |
| 506 | template <typename VectorT> |
| 507 | void popScopeAsVec(size_t scope_start) { |
| 508 | SkASSERT(scope_start > 0); |
| 509 | SkASSERT(scope_start <= fValueStack.size()); |
| 510 | |
| 511 | using T = typename VectorT::ValueT; |
| 512 | static_assert( sizeof(T) >= sizeof(Value), "" ); |
| 513 | static_assert( sizeof(T) % sizeof(Value) == 0, "" ); |
| 514 | static_assert(alignof(T) == alignof(Value), "" ); |
| 515 | |
| 516 | const auto scope_count = fValueStack.size() - scope_start, |
| 517 | count = scope_count / (sizeof(T) / sizeof(Value)); |
| 518 | SkASSERT(scope_count % (sizeof(T) / sizeof(Value)) == 0); |
| 519 | |
| 520 | const auto* begin = reinterpret_cast<const T*>(fValueStack.data() + scope_start); |
| 521 | |
| 522 | // Restore the previous scope index from saved placeholder value, |
| 523 | // and instantiate as a vector of values in scope. |
| 524 | auto& placeholder = fValueStack[scope_start - 1]; |
| 525 | fScopeIndex = *static_cast<RawValue<intptr_t>&>(placeholder); |
| 526 | placeholder = VectorT(begin, count, fAlloc); |
| 527 | |
| 528 | // Drop the (consumed) values in scope. |
| 529 | fValueStack.resize(scope_start); |
| 530 | } |
| 531 | |
| 532 | void pushObjectScope() { |
| 533 | // Save a scope index now, and then later we'll overwrite this value as the Object itself. |
| 534 | fValueStack.push_back(RawValue<intptr_t>(fScopeIndex)); |
| 535 | |
| 536 | // New object scope. |
| 537 | fScopeIndex = SkTo<intptr_t>(fValueStack.size()); |
| 538 | } |
| 539 | |
| 540 | void popObjectScope() { |
| 541 | SkASSERT(this->inObjectScope()); |
| 542 | this->popScopeAsVec<ObjectValue>(SkTo<size_t>(fScopeIndex)); |
| 543 | |
| 544 | SkDEBUGCODE( |
| 545 | const auto& obj = fValueStack.back().as<ObjectValue>(); |
| 546 | SkASSERT(obj.is<ObjectValue>()); |
| 547 | for (const auto& member : obj) { |
| 548 | SkASSERT(member.fKey.is<StringValue>()); |
| 549 | } |
| 550 | ) |
| 551 | } |
| 552 | |
| 553 | void pushArrayScope() { |
| 554 | // Save a scope index now, and then later we'll overwrite this value as the Array itself. |
| 555 | fValueStack.push_back(RawValue<intptr_t>(fScopeIndex)); |
| 556 | |
| 557 | // New array scope. |
| 558 | fScopeIndex = -SkTo<intptr_t>(fValueStack.size()); |
| 559 | } |
| 560 | |
| 561 | void popArrayScope() { |
| 562 | SkASSERT(this->inArrayScope()); |
| 563 | this->popScopeAsVec<ArrayValue>(SkTo<size_t>(-fScopeIndex)); |
| 564 | |
| 565 | SkDEBUGCODE( |
| 566 | const auto& arr = fValueStack.back().as<ArrayValue>(); |
| 567 | SkASSERT(arr.is<ArrayValue>()); |
| 568 | ) |
| 569 | } |
| 570 | |
| 571 | void pushObjectKey(const char* key, size_t size, const char* eos) { |
| 572 | SkASSERT(this->inObjectScope()); |
| 573 | SkASSERT(fValueStack.size() >= SkTo<size_t>(fScopeIndex)); |
| 574 | SkASSERT(!((fValueStack.size() - SkTo<size_t>(fScopeIndex)) & 1)); |
| 575 | this->pushString(key, size, eos); |
| 576 | } |
| 577 | |
| 578 | void pushTrue() { |
| 579 | fValueStack.push_back(BoolValue(true)); |
| 580 | } |
| 581 | |
| 582 | void pushFalse() { |
| 583 | fValueStack.push_back(BoolValue(false)); |
| 584 | } |
| 585 | |
| 586 | void pushNull() { |
| 587 | fValueStack.push_back(NullValue()); |
| 588 | } |
| 589 | |
| 590 | void pushString(const char* s, size_t size, const char* eos) { |
| 591 | fValueStack.push_back(FastString(s, size, eos, fAlloc)); |
| 592 | } |
| 593 | |
| 594 | void pushInt32(int32_t i) { |
| 595 | fValueStack.push_back(NumberValue(i)); |
| 596 | } |
| 597 | |
| 598 | void pushFloat(float f) { |
| 599 | fValueStack.push_back(NumberValue(f)); |
| 600 | } |
| 601 | |
| 602 | template <typename T> |
| 603 | T error(T&& ret_val, const char* p, const char* msg) { |
| 604 | #if defined(SK_JSON_REPORT_ERRORS) |
| 605 | fErrorToken = p; |
| 606 | fErrorMessage.set(msg); |
| 607 | #endif |
| 608 | return ret_val; |
| 609 | } |
| 610 | |
| 611 | const char* matchTrue(const char* p) { |
| 612 | SkASSERT(p[0] == 't'); |
| 613 | |
| 614 | if (p[1] == 'r' && p[2] == 'u' && p[3] == 'e') { |
| 615 | this->pushTrue(); |
| 616 | return p + 4; |
| 617 | } |
| 618 | |
| 619 | return this->error(nullptr, p, "invalid token" ); |
| 620 | } |
| 621 | |
| 622 | const char* matchFalse(const char* p) { |
| 623 | SkASSERT(p[0] == 'f'); |
| 624 | |
| 625 | if (p[1] == 'a' && p[2] == 'l' && p[3] == 's' && p[4] == 'e') { |
| 626 | this->pushFalse(); |
| 627 | return p + 5; |
| 628 | } |
| 629 | |
| 630 | return this->error(nullptr, p, "invalid token" ); |
| 631 | } |
| 632 | |
| 633 | const char* matchNull(const char* p) { |
| 634 | SkASSERT(p[0] == 'n'); |
| 635 | |
| 636 | if (p[1] == 'u' && p[2] == 'l' && p[3] == 'l') { |
| 637 | this->pushNull(); |
| 638 | return p + 4; |
| 639 | } |
| 640 | |
| 641 | return this->error(nullptr, p, "invalid token" ); |
| 642 | } |
| 643 | |
| 644 | const std::vector<char>* unescapeString(const char* begin, const char* end) { |
| 645 | fUnescapeBuffer.clear(); |
| 646 | |
| 647 | for (const auto* p = begin; p != end; ++p) { |
| 648 | if (*p != '\\') { |
| 649 | fUnescapeBuffer.push_back(*p); |
| 650 | continue; |
| 651 | } |
| 652 | |
| 653 | if (++p == end) { |
| 654 | return nullptr; |
| 655 | } |
| 656 | |
| 657 | switch (*p) { |
| 658 | case '"': fUnescapeBuffer.push_back( '"'); break; |
| 659 | case '\\': fUnescapeBuffer.push_back('\\'); break; |
| 660 | case '/': fUnescapeBuffer.push_back( '/'); break; |
| 661 | case 'b': fUnescapeBuffer.push_back('\b'); break; |
| 662 | case 'f': fUnescapeBuffer.push_back('\f'); break; |
| 663 | case 'n': fUnescapeBuffer.push_back('\n'); break; |
| 664 | case 'r': fUnescapeBuffer.push_back('\r'); break; |
| 665 | case 't': fUnescapeBuffer.push_back('\t'); break; |
| 666 | case 'u': { |
| 667 | if (p + 4 >= end) { |
| 668 | return nullptr; |
| 669 | } |
| 670 | |
| 671 | uint32_t hexed; |
| 672 | const char hex_str[] = {p[1], p[2], p[3], p[4], '\0'}; |
| 673 | const auto* eos = SkParse::FindHex(hex_str, &hexed); |
| 674 | if (!eos || *eos) { |
| 675 | return nullptr; |
| 676 | } |
| 677 | |
| 678 | char utf8[SkUTF::kMaxBytesInUTF8Sequence]; |
| 679 | const auto utf8_len = SkUTF::ToUTF8(SkTo<SkUnichar>(hexed), utf8); |
| 680 | fUnescapeBuffer.insert(fUnescapeBuffer.end(), utf8, utf8 + utf8_len); |
| 681 | p += 4; |
| 682 | } break; |
| 683 | default: return nullptr; |
| 684 | } |
| 685 | } |
| 686 | |
| 687 | return &fUnescapeBuffer; |
| 688 | } |
| 689 | |
| 690 | template <typename MatchFunc> |
| 691 | const char* matchString(const char* p, const char* p_stop, MatchFunc&& func) { |
| 692 | SkASSERT(*p == '"'); |
| 693 | const auto* s_begin = p + 1; |
| 694 | bool requires_unescape = false; |
| 695 | |
| 696 | do { |
| 697 | // Consume string chars. |
| 698 | // This is the fast path, and hopefully we only hit it once then quick-exit below. |
| 699 | for (p = p + 1; !is_eostring(*p); ++p); |
| 700 | |
| 701 | if (*p == '"') { |
| 702 | // Valid string found. |
| 703 | if (!requires_unescape) { |
| 704 | func(s_begin, p - s_begin, p_stop); |
| 705 | } else { |
| 706 | // Slow unescape. We could avoid this extra copy with some effort, |
| 707 | // but in practice escaped strings should be rare. |
| 708 | const auto* buf = this->unescapeString(s_begin, p); |
| 709 | if (!buf) { |
| 710 | break; |
| 711 | } |
| 712 | |
| 713 | SkASSERT(!buf->empty()); |
| 714 | func(buf->data(), buf->size(), buf->data() + buf->size() - 1); |
| 715 | } |
| 716 | return p + 1; |
| 717 | } |
| 718 | |
| 719 | if (*p == '\\') { |
| 720 | requires_unescape = true; |
| 721 | ++p; |
| 722 | continue; |
| 723 | } |
| 724 | |
| 725 | // End-of-scope chars are special: we use them to tag the end of the input. |
| 726 | // Thus they cannot be consumed indiscriminately -- we need to check if we hit the |
| 727 | // end of the input. To that effect, we treat them as string terminators above, |
| 728 | // then we catch them here. |
| 729 | if (is_eoscope(*p)) { |
| 730 | continue; |
| 731 | } |
| 732 | |
| 733 | // Invalid/unexpected char. |
| 734 | break; |
| 735 | } while (p != p_stop); |
| 736 | |
| 737 | // Premature end-of-input, or illegal string char. |
| 738 | return this->error(nullptr, s_begin - 1, "invalid string" ); |
| 739 | } |
| 740 | |
| 741 | const char* matchFastFloatDecimalPart(const char* p, int sign, float f, int exp) { |
| 742 | SkASSERT(exp <= 0); |
| 743 | |
| 744 | for (;;) { |
| 745 | if (!is_digit(*p)) break; |
| 746 | f = f * 10.f + (*p++ - '0'); --exp; |
| 747 | if (!is_digit(*p)) break; |
| 748 | f = f * 10.f + (*p++ - '0'); --exp; |
| 749 | } |
| 750 | |
| 751 | const auto decimal_scale = pow10(exp); |
| 752 | if (is_numeric(*p) || !decimal_scale) { |
| 753 | SkASSERT((*p == '.' || *p == 'e' || *p == 'E') || !decimal_scale); |
| 754 | // Malformed input, or an (unsupported) exponent, or a collapsed decimal factor. |
| 755 | return nullptr; |
| 756 | } |
| 757 | |
| 758 | this->pushFloat(sign * f * decimal_scale); |
| 759 | |
| 760 | return p; |
| 761 | } |
| 762 | |
| 763 | const char* matchFastFloatPart(const char* p, int sign, float f) { |
| 764 | for (;;) { |
| 765 | if (!is_digit(*p)) break; |
| 766 | f = f * 10.f + (*p++ - '0'); |
| 767 | if (!is_digit(*p)) break; |
| 768 | f = f * 10.f + (*p++ - '0'); |
| 769 | } |
| 770 | |
| 771 | if (!is_numeric(*p)) { |
| 772 | // Matched (integral) float. |
| 773 | this->pushFloat(sign * f); |
| 774 | return p; |
| 775 | } |
| 776 | |
| 777 | return (*p == '.') ? this->matchFastFloatDecimalPart(p + 1, sign, f, 0) |
| 778 | : nullptr; |
| 779 | } |
| 780 | |
| 781 | const char* matchFast32OrFloat(const char* p) { |
| 782 | int sign = 1; |
| 783 | if (*p == '-') { |
| 784 | sign = -1; |
| 785 | ++p; |
| 786 | } |
| 787 | |
| 788 | const auto* digits_start = p; |
| 789 | |
| 790 | int32_t n32 = 0; |
| 791 | |
| 792 | // This is the largest absolute int32 value we can handle before |
| 793 | // risking overflow *on the next digit* (214748363). |
| 794 | static constexpr int32_t kMaxInt32 = (std::numeric_limits<int32_t>::max() - 9) / 10; |
| 795 | |
| 796 | if (is_digit(*p)) { |
| 797 | n32 = (*p++ - '0'); |
| 798 | for (;;) { |
| 799 | if (!is_digit(*p) || n32 > kMaxInt32) break; |
| 800 | n32 = n32 * 10 + (*p++ - '0'); |
| 801 | } |
| 802 | } |
| 803 | |
| 804 | if (!is_numeric(*p)) { |
| 805 | // Did we actually match any digits? |
| 806 | if (p > digits_start) { |
| 807 | this->pushInt32(sign * n32); |
| 808 | return p; |
| 809 | } |
| 810 | return nullptr; |
| 811 | } |
| 812 | |
| 813 | if (*p == '.') { |
| 814 | const auto* decimals_start = ++p; |
| 815 | |
| 816 | int exp = 0; |
| 817 | |
| 818 | for (;;) { |
| 819 | if (!is_digit(*p) || n32 > kMaxInt32) break; |
| 820 | n32 = n32 * 10 + (*p++ - '0'); --exp; |
| 821 | if (!is_digit(*p) || n32 > kMaxInt32) break; |
| 822 | n32 = n32 * 10 + (*p++ - '0'); --exp; |
| 823 | } |
| 824 | |
| 825 | if (!is_numeric(*p)) { |
| 826 | // Did we actually match any digits? |
| 827 | if (p > decimals_start) { |
| 828 | this->pushFloat(sign * n32 * pow10(exp)); |
| 829 | return p; |
| 830 | } |
| 831 | return nullptr; |
| 832 | } |
| 833 | |
| 834 | if (n32 > kMaxInt32) { |
| 835 | // we ran out on n32 bits |
| 836 | return this->matchFastFloatDecimalPart(p, sign, n32, exp); |
| 837 | } |
| 838 | } |
| 839 | |
| 840 | return this->matchFastFloatPart(p, sign, n32); |
| 841 | } |
| 842 | |
| 843 | const char* matchNumber(const char* p) { |
| 844 | if (const auto* fast = this->matchFast32OrFloat(p)) return fast; |
| 845 | |
| 846 | // slow fallback |
| 847 | char* matched; |
| 848 | float f = strtof(p, &matched); |
| 849 | if (matched > p) { |
| 850 | this->pushFloat(f); |
| 851 | return matched; |
| 852 | } |
| 853 | return this->error(nullptr, p, "invalid numeric token" ); |
| 854 | } |
| 855 | }; |
| 856 | |
| 857 | void Write(const Value& v, SkWStream* stream) { |
| 858 | switch (v.getType()) { |
| 859 | case Value::Type::kNull: |
| 860 | stream->writeText("null" ); |
| 861 | break; |
| 862 | case Value::Type::kBool: |
| 863 | stream->writeText(*v.as<BoolValue>() ? "true" : "false" ); |
| 864 | break; |
| 865 | case Value::Type::kNumber: |
| 866 | stream->writeScalarAsText(*v.as<NumberValue>()); |
| 867 | break; |
| 868 | case Value::Type::kString: |
| 869 | stream->writeText("\"" ); |
| 870 | stream->writeText(v.as<StringValue>().begin()); |
| 871 | stream->writeText("\"" ); |
| 872 | break; |
| 873 | case Value::Type::kArray: { |
| 874 | const auto& array = v.as<ArrayValue>(); |
| 875 | stream->writeText("[" ); |
| 876 | bool first_value = true; |
| 877 | for (const auto& v : array) { |
| 878 | if (!first_value) stream->writeText("," ); |
| 879 | Write(v, stream); |
| 880 | first_value = false; |
| 881 | } |
| 882 | stream->writeText("]" ); |
| 883 | break; |
| 884 | } |
| 885 | case Value::Type::kObject: |
| 886 | const auto& object = v.as<ObjectValue>(); |
| 887 | stream->writeText("{" ); |
| 888 | bool first_member = true; |
| 889 | for (const auto& member : object) { |
| 890 | SkASSERT(member.fKey.getType() == Value::Type::kString); |
| 891 | if (!first_member) stream->writeText("," ); |
| 892 | Write(member.fKey, stream); |
| 893 | stream->writeText(":" ); |
| 894 | Write(member.fValue, stream); |
| 895 | first_member = false; |
| 896 | } |
| 897 | stream->writeText("}" ); |
| 898 | break; |
| 899 | } |
| 900 | } |
| 901 | |
| 902 | } // namespace |
| 903 | |
| 904 | SkString Value::toString() const { |
| 905 | SkDynamicMemoryWStream wstream; |
| 906 | Write(*this, &wstream); |
| 907 | const auto data = wstream.detachAsData(); |
| 908 | // TODO: is there a better way to pass data around without copying? |
| 909 | return SkString(static_cast<const char*>(data->data()), data->size()); |
| 910 | } |
| 911 | |
| 912 | static constexpr size_t kMinChunkSize = 4096; |
| 913 | |
| 914 | DOM::DOM(const char* data, size_t size) |
| 915 | : fAlloc(kMinChunkSize) { |
| 916 | DOMParser parser(fAlloc); |
| 917 | |
| 918 | fRoot = parser.parse(data, size); |
| 919 | } |
| 920 | |
| 921 | void DOM::write(SkWStream* stream) const { |
| 922 | Write(fRoot, stream); |
| 923 | } |
| 924 | |
| 925 | } // namespace skjson |
| 926 | |