1#include "simdjson/parsedjson.h"
2#include "simdjson/jsonformatutils.h"
3
4namespace simdjson {
5ParsedJson::ParsedJson()
6 : structural_indexes(nullptr), tape(nullptr),
7 containing_scope_offset(nullptr), ret_address(nullptr),
8 string_buf(nullptr), current_string_buf_loc(nullptr) {}
9
10ParsedJson::~ParsedJson() { deallocate(); }
11
12ParsedJson::ParsedJson(ParsedJson &&p)
13 : byte_capacity(p.byte_capacity), depth_capacity(p.depth_capacity),
14 tape_capacity(p.tape_capacity), string_capacity(p.string_capacity),
15 current_loc(p.current_loc), n_structural_indexes(p.n_structural_indexes),
16 structural_indexes(p.structural_indexes), tape(p.tape),
17 containing_scope_offset(p.containing_scope_offset),
18 ret_address(p.ret_address), string_buf(p.string_buf),
19 current_string_buf_loc(p.current_string_buf_loc), valid(p.valid) {
20 p.structural_indexes = nullptr;
21 p.tape = nullptr;
22 p.containing_scope_offset = nullptr;
23 p.ret_address = nullptr;
24 p.string_buf = nullptr;
25 p.current_string_buf_loc = nullptr;
26}
27
28ParsedJson &ParsedJson::operator=(ParsedJson &&p) {
29 byte_capacity = p.byte_capacity;
30 p.byte_capacity = 0;
31 depth_capacity = p.depth_capacity;
32 p.depth_capacity = 0;
33 tape_capacity = p.tape_capacity;
34 p.tape_capacity = 0;
35 string_capacity = p.string_capacity;
36 p.string_capacity = 0;
37 current_loc = p.current_loc;
38 p.current_loc = 0;
39 n_structural_indexes = p.n_structural_indexes;
40 p.n_structural_indexes = 0;
41 structural_indexes = p.structural_indexes;
42 p.structural_indexes = nullptr;
43 tape = p.tape;
44 p.tape = nullptr;
45 containing_scope_offset = p.containing_scope_offset;
46 p.containing_scope_offset = nullptr;
47 ret_address = p.ret_address;
48 p.ret_address = nullptr;
49 string_buf = p.string_buf;
50 p.string_buf = nullptr;
51 current_string_buf_loc = p.current_string_buf_loc;
52 p.current_string_buf_loc = nullptr;
53 valid = p.valid;
54 p.valid = false;
55 return *this;
56}
57
58WARN_UNUSED
59bool ParsedJson::allocate_capacity(size_t len, size_t max_depth) {
60 if (max_depth <= 0) {
61 max_depth = 1; // don't let the user allocate nothing
62 }
63 if (len <= 0) {
64 len = 64; // allocating 0 bytes is wasteful.
65 }
66 if (len > SIMDJSON_MAXSIZE_BYTES) {
67 return false;
68 }
69 if ((len <= byte_capacity) && (max_depth <= depth_capacity)) {
70 return true;
71 }
72 deallocate();
73 valid = false;
74 byte_capacity = 0; // will only set it to len after allocations are a success
75 n_structural_indexes = 0;
76 uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7;
77 structural_indexes = new (std::nothrow) uint32_t[max_structures];
78 // a pathological input like "[[[[..." would generate len tape elements, so
79 // need a capacity of at least len + 1, but it is also possible to do
80 // worse with "[7,7,7,7,6,7,7,7,6,7,7,6,[7,7,7,7,6,7,7,7,6,7,7,6,7,7,7,7,7,7,6"
81 //where len + 1 tape elements are
82 // generated, see issue https://github.com/lemire/simdjson/issues/345
83 size_t local_tape_capacity = ROUNDUP_N(len + 2, 64);
84 // a document with only zero-length strings... could have len/3 string
85 // and we would need len/3 * 5 bytes on the string buffer
86 size_t local_string_capacity = ROUNDUP_N(5 * len / 3 + 32, 64);
87 string_buf = new (std::nothrow) uint8_t[local_string_capacity];
88 tape = new (std::nothrow) uint64_t[local_tape_capacity];
89 containing_scope_offset = new (std::nothrow) uint32_t[max_depth];
90#ifdef SIMDJSON_USE_COMPUTED_GOTO
91 ret_address = new (std::nothrow) void *[max_depth];
92#else
93 ret_address = new (std::nothrow) char[max_depth];
94#endif
95 if ((string_buf == nullptr) || (tape == nullptr) ||
96 (containing_scope_offset == nullptr) || (ret_address == nullptr) ||
97 (structural_indexes == nullptr)) {
98 std::cerr << "Could not allocate memory" << std::endl;
99 delete[] ret_address;
100 delete[] containing_scope_offset;
101 delete[] tape;
102 delete[] string_buf;
103 delete[] structural_indexes;
104
105 return false;
106 }
107 /*
108 // We do not need to initialize this content for parsing, though we could
109 // need to initialize it for safety.
110 memset(string_buf, 0 , local_string_capacity);
111 memset(structural_indexes, 0, max_structures * sizeof(uint32_t));
112 memset(tape, 0, local_tape_capacity * sizeof(uint64_t));
113 */
114 byte_capacity = len;
115 depth_capacity = max_depth;
116 tape_capacity = local_tape_capacity;
117 string_capacity = local_string_capacity;
118 return true;
119}
120
121bool ParsedJson::is_valid() const { return valid; }
122
123int ParsedJson::get_error_code() const { return error_code; }
124
125std::string ParsedJson::get_error_message() const {
126 return error_message(error_code);
127}
128
129void ParsedJson::deallocate() {
130 byte_capacity = 0;
131 depth_capacity = 0;
132 tape_capacity = 0;
133 string_capacity = 0;
134 delete[] ret_address;
135 delete[] containing_scope_offset;
136 delete[] tape;
137 delete[] string_buf;
138 delete[] structural_indexes;
139 valid = false;
140}
141
142void ParsedJson::init() {
143 current_string_buf_loc = string_buf;
144 current_loc = 0;
145 valid = false;
146}
147
148WARN_UNUSED
149bool ParsedJson::print_json(std::ostream &os) const {
150 if (!valid) {
151 return false;
152 }
153 uint32_t string_length;
154 size_t tape_idx = 0;
155 uint64_t tape_val = tape[tape_idx];
156 uint8_t type = (tape_val >> 56);
157 size_t how_many = 0;
158 if (type == 'r') {
159 how_many = tape_val & JSON_VALUE_MASK;
160 } else {
161 fprintf(stderr, "Error: no starting root node?");
162 return false;
163 }
164 if (how_many > tape_capacity) {
165 fprintf(
166 stderr,
167 "We may be exceeding the tape capacity. Is this a valid document?\n");
168 return false;
169 }
170 tape_idx++;
171 bool *in_object = new bool[depth_capacity];
172 auto *in_object_idx = new size_t[depth_capacity];
173 int depth = 1; // only root at level 0
174 in_object_idx[depth] = 0;
175 in_object[depth] = false;
176 for (; tape_idx < how_many; tape_idx++) {
177 tape_val = tape[tape_idx];
178 uint64_t payload = tape_val & JSON_VALUE_MASK;
179 type = (tape_val >> 56);
180 if (!in_object[depth]) {
181 if ((in_object_idx[depth] > 0) && (type != ']')) {
182 os << ",";
183 }
184 in_object_idx[depth]++;
185 } else { // if (in_object) {
186 if ((in_object_idx[depth] > 0) && ((in_object_idx[depth] & 1) == 0) &&
187 (type != '}')) {
188 os << ",";
189 }
190 if (((in_object_idx[depth] & 1) == 1)) {
191 os << ":";
192 }
193 in_object_idx[depth]++;
194 }
195 switch (type) {
196 case '"': // we have a string
197 os << '"';
198 memcpy(&string_length, string_buf + payload, sizeof(uint32_t));
199 print_with_escapes(
200 (const unsigned char *)(string_buf + payload + sizeof(uint32_t)),
201 os, string_length);
202 os << '"';
203 break;
204 case 'l': // we have a long int
205 if (tape_idx + 1 >= how_many) {
206 delete[] in_object;
207 delete[] in_object_idx;
208 return false;
209 }
210 os << static_cast<int64_t>(tape[++tape_idx]);
211 break;
212 case 'u':
213 if (tape_idx + 1 >= how_many) {
214 delete[] in_object;
215 delete[] in_object_idx;
216 return false;
217 }
218 os << tape[++tape_idx];
219 break;
220 case 'd': // we have a double
221 if (tape_idx + 1 >= how_many) {
222 delete[] in_object;
223 delete[] in_object_idx;
224 return false;
225 }
226 double answer;
227 memcpy(&answer, &tape[++tape_idx], sizeof(answer));
228 os << answer;
229 break;
230 case 'n': // we have a null
231 os << "null";
232 break;
233 case 't': // we have a true
234 os << "true";
235 break;
236 case 'f': // we have a false
237 os << "false";
238 break;
239 case '{': // we have an object
240 os << '{';
241 depth++;
242 in_object[depth] = true;
243 in_object_idx[depth] = 0;
244 break;
245 case '}': // we end an object
246 depth--;
247 os << '}';
248 break;
249 case '[': // we start an array
250 os << '[';
251 depth++;
252 in_object[depth] = false;
253 in_object_idx[depth] = 0;
254 break;
255 case ']': // we end an array
256 depth--;
257 os << ']';
258 break;
259 case 'r': // we start and end with the root node
260 fprintf(stderr, "should we be hitting the root node?\n");
261 delete[] in_object;
262 delete[] in_object_idx;
263 return false;
264 default:
265 fprintf(stderr, "bug %c\n", type);
266 delete[] in_object;
267 delete[] in_object_idx;
268 return false;
269 }
270 }
271 delete[] in_object;
272 delete[] in_object_idx;
273 return true;
274}
275
276WARN_UNUSED
277bool ParsedJson::dump_raw_tape(std::ostream &os) const {
278 if (!valid) {
279 return false;
280 }
281 uint32_t string_length;
282 size_t tape_idx = 0;
283 uint64_t tape_val = tape[tape_idx];
284 uint8_t type = (tape_val >> 56);
285 os << tape_idx << " : " << type;
286 tape_idx++;
287 size_t how_many = 0;
288 if (type == 'r') {
289 how_many = tape_val & JSON_VALUE_MASK;
290 } else {
291 fprintf(stderr, "Error: no starting root node?");
292 return false;
293 }
294 os << "\t// pointing to " << how_many << " (right after last node)\n";
295 uint64_t payload;
296 for (; tape_idx < how_many; tape_idx++) {
297 os << tape_idx << " : ";
298 tape_val = tape[tape_idx];
299 payload = tape_val & JSON_VALUE_MASK;
300 type = (tape_val >> 56);
301 switch (type) {
302 case '"': // we have a string
303 os << "string \"";
304 memcpy(&string_length, string_buf + payload, sizeof(uint32_t));
305 print_with_escapes(
306 (const unsigned char *)(string_buf + payload + sizeof(uint32_t)),
307 string_length);
308 os << '"';
309 os << '\n';
310 break;
311 case 'l': // we have a long int
312 if (tape_idx + 1 >= how_many) {
313 return false;
314 }
315 os << "integer " << static_cast<int64_t>(tape[++tape_idx]) << "\n";
316 break;
317 case 'u': // we have a long uint
318 if (tape_idx + 1 >= how_many) {
319 return false;
320 }
321 os << "unsigned integer " << tape[++tape_idx] << "\n";
322 break;
323 case 'd': // we have a double
324 os << "float ";
325 if (tape_idx + 1 >= how_many) {
326 return false;
327 }
328 double answer;
329 memcpy(&answer, &tape[++tape_idx], sizeof(answer));
330 os << answer << '\n';
331 break;
332 case 'n': // we have a null
333 os << "null\n";
334 break;
335 case 't': // we have a true
336 os << "true\n";
337 break;
338 case 'f': // we have a false
339 os << "false\n";
340 break;
341 case '{': // we have an object
342 os << "{\t// pointing to next tape location " << payload
343 << " (first node after the scope) \n";
344 break;
345 case '}': // we end an object
346 os << "}\t// pointing to previous tape location " << payload
347 << " (start of the scope) \n";
348 break;
349 case '[': // we start an array
350 os << "[\t// pointing to next tape location " << payload
351 << " (first node after the scope) \n";
352 break;
353 case ']': // we end an array
354 os << "]\t// pointing to previous tape location " << payload
355 << " (start of the scope) \n";
356 break;
357 case 'r': // we start and end with the root node
358 printf("end of root\n");
359 return false;
360 default:
361 return false;
362 }
363 }
364 tape_val = tape[tape_idx];
365 payload = tape_val & JSON_VALUE_MASK;
366 type = (tape_val >> 56);
367 os << tape_idx << " : " << type << "\t// pointing to " << payload
368 << " (start root)\n";
369 return true;
370}
371} // namespace simdjson
372