1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * jsonb_util.c |
4 | * converting between Jsonb and JsonbValues, and iterating. |
5 | * |
6 | * Copyright (c) 2014-2019, PostgreSQL Global Development Group |
7 | * |
8 | * |
9 | * IDENTIFICATION |
10 | * src/backend/utils/adt/jsonb_util.c |
11 | * |
12 | *------------------------------------------------------------------------- |
13 | */ |
14 | #include "postgres.h" |
15 | |
16 | #include "catalog/pg_collation.h" |
17 | #include "miscadmin.h" |
18 | #include "utils/builtins.h" |
19 | #include "utils/hashutils.h" |
20 | #include "utils/jsonb.h" |
21 | #include "utils/memutils.h" |
22 | #include "utils/varlena.h" |
23 | |
24 | /* |
25 | * Maximum number of elements in an array (or key/value pairs in an object). |
26 | * This is limited by two things: the size of the JEntry array must fit |
27 | * in MaxAllocSize, and the number of elements (or pairs) must fit in the bits |
28 | * reserved for that in the JsonbContainer.header field. |
29 | * |
30 | * (The total size of an array's or object's elements is also limited by |
31 | * JENTRY_OFFLENMASK, but we're not concerned about that here.) |
32 | */ |
33 | #define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK)) |
34 | #define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK)) |
35 | |
36 | static void fillJsonbValue(JsonbContainer *container, int index, |
37 | char *base_addr, uint32 offset, |
38 | JsonbValue *result); |
39 | static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b); |
40 | static int compareJsonbScalarValue(JsonbValue *a, JsonbValue *b); |
41 | static Jsonb *convertToJsonb(JsonbValue *val); |
42 | static void convertJsonbValue(StringInfo buffer, JEntry *, JsonbValue *val, int level); |
43 | static void convertJsonbArray(StringInfo buffer, JEntry *, JsonbValue *val, int level); |
44 | static void convertJsonbObject(StringInfo buffer, JEntry *, JsonbValue *val, int level); |
45 | static void convertJsonbScalar(StringInfo buffer, JEntry *, JsonbValue *scalarVal); |
46 | |
47 | static int reserveFromBuffer(StringInfo buffer, int len); |
48 | static void appendToBuffer(StringInfo buffer, const char *data, int len); |
49 | static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len); |
50 | static short padBufferToInt(StringInfo buffer); |
51 | |
52 | static JsonbIterator *iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent); |
53 | static JsonbIterator *freeAndGetParent(JsonbIterator *it); |
54 | static JsonbParseState *pushState(JsonbParseState **pstate); |
55 | static void appendKey(JsonbParseState *pstate, JsonbValue *scalarVal); |
56 | static void appendValue(JsonbParseState *pstate, JsonbValue *scalarVal); |
57 | static void appendElement(JsonbParseState *pstate, JsonbValue *scalarVal); |
58 | static int lengthCompareJsonbStringValue(const void *a, const void *b); |
59 | static int lengthCompareJsonbPair(const void *a, const void *b, void *arg); |
60 | static void uniqueifyJsonbObject(JsonbValue *object); |
61 | static JsonbValue *pushJsonbValueScalar(JsonbParseState **pstate, |
62 | JsonbIteratorToken seq, |
63 | JsonbValue *scalarVal); |
64 | |
65 | /* |
66 | * Turn an in-memory JsonbValue into a Jsonb for on-disk storage. |
67 | * |
68 | * There isn't a JsonbToJsonbValue(), because generally we find it more |
69 | * convenient to directly iterate through the Jsonb representation and only |
70 | * really convert nested scalar values. JsonbIteratorNext() does this, so that |
71 | * clients of the iteration code don't have to directly deal with the binary |
72 | * representation (JsonbDeepContains() is a notable exception, although all |
73 | * exceptions are internal to this module). In general, functions that accept |
74 | * a JsonbValue argument are concerned with the manipulation of scalar values, |
75 | * or simple containers of scalar values, where it would be inconvenient to |
76 | * deal with a great amount of other state. |
77 | */ |
78 | Jsonb * |
79 | JsonbValueToJsonb(JsonbValue *val) |
80 | { |
81 | Jsonb *out; |
82 | |
83 | if (IsAJsonbScalar(val)) |
84 | { |
85 | /* Scalar value */ |
86 | JsonbParseState *pstate = NULL; |
87 | JsonbValue *res; |
88 | JsonbValue scalarArray; |
89 | |
90 | scalarArray.type = jbvArray; |
91 | scalarArray.val.array.rawScalar = true; |
92 | scalarArray.val.array.nElems = 1; |
93 | |
94 | pushJsonbValue(&pstate, WJB_BEGIN_ARRAY, &scalarArray); |
95 | pushJsonbValue(&pstate, WJB_ELEM, val); |
96 | res = pushJsonbValue(&pstate, WJB_END_ARRAY, NULL); |
97 | |
98 | out = convertToJsonb(res); |
99 | } |
100 | else if (val->type == jbvObject || val->type == jbvArray) |
101 | { |
102 | out = convertToJsonb(val); |
103 | } |
104 | else |
105 | { |
106 | Assert(val->type == jbvBinary); |
107 | out = palloc(VARHDRSZ + val->val.binary.len); |
108 | SET_VARSIZE(out, VARHDRSZ + val->val.binary.len); |
109 | memcpy(VARDATA(out), val->val.binary.data, val->val.binary.len); |
110 | } |
111 | |
112 | return out; |
113 | } |
114 | |
115 | /* |
116 | * Get the offset of the variable-length portion of a Jsonb node within |
117 | * the variable-length-data part of its container. The node is identified |
118 | * by index within the container's JEntry array. |
119 | */ |
120 | uint32 |
121 | getJsonbOffset(const JsonbContainer *jc, int index) |
122 | { |
123 | uint32 offset = 0; |
124 | int i; |
125 | |
126 | /* |
127 | * Start offset of this entry is equal to the end offset of the previous |
128 | * entry. Walk backwards to the most recent entry stored as an end |
129 | * offset, returning that offset plus any lengths in between. |
130 | */ |
131 | for (i = index - 1; i >= 0; i--) |
132 | { |
133 | offset += JBE_OFFLENFLD(jc->children[i]); |
134 | if (JBE_HAS_OFF(jc->children[i])) |
135 | break; |
136 | } |
137 | |
138 | return offset; |
139 | } |
140 | |
141 | /* |
142 | * Get the length of the variable-length portion of a Jsonb node. |
143 | * The node is identified by index within the container's JEntry array. |
144 | */ |
145 | uint32 |
146 | getJsonbLength(const JsonbContainer *jc, int index) |
147 | { |
148 | uint32 off; |
149 | uint32 len; |
150 | |
151 | /* |
152 | * If the length is stored directly in the JEntry, just return it. |
153 | * Otherwise, get the begin offset of the entry, and subtract that from |
154 | * the stored end+1 offset. |
155 | */ |
156 | if (JBE_HAS_OFF(jc->children[index])) |
157 | { |
158 | off = getJsonbOffset(jc, index); |
159 | len = JBE_OFFLENFLD(jc->children[index]) - off; |
160 | } |
161 | else |
162 | len = JBE_OFFLENFLD(jc->children[index]); |
163 | |
164 | return len; |
165 | } |
166 | |
167 | /* |
168 | * BT comparator worker function. Returns an integer less than, equal to, or |
169 | * greater than zero, indicating whether a is less than, equal to, or greater |
170 | * than b. Consistent with the requirements for a B-Tree operator class |
171 | * |
172 | * Strings are compared lexically, in contrast with other places where we use a |
173 | * much simpler comparator logic for searching through Strings. Since this is |
174 | * called from B-Tree support function 1, we're careful about not leaking |
175 | * memory here. |
176 | */ |
177 | int |
178 | compareJsonbContainers(JsonbContainer *a, JsonbContainer *b) |
179 | { |
180 | JsonbIterator *ita, |
181 | *itb; |
182 | int res = 0; |
183 | |
184 | ita = JsonbIteratorInit(a); |
185 | itb = JsonbIteratorInit(b); |
186 | |
187 | do |
188 | { |
189 | JsonbValue va, |
190 | vb; |
191 | JsonbIteratorToken ra, |
192 | rb; |
193 | |
194 | ra = JsonbIteratorNext(&ita, &va, false); |
195 | rb = JsonbIteratorNext(&itb, &vb, false); |
196 | |
197 | if (ra == rb) |
198 | { |
199 | if (ra == WJB_DONE) |
200 | { |
201 | /* Decisively equal */ |
202 | break; |
203 | } |
204 | |
205 | if (ra == WJB_END_ARRAY || ra == WJB_END_OBJECT) |
206 | { |
207 | /* |
208 | * There is no array or object to compare at this stage of |
209 | * processing. jbvArray/jbvObject values are compared |
210 | * initially, at the WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT |
211 | * tokens. |
212 | */ |
213 | continue; |
214 | } |
215 | |
216 | if (va.type == vb.type) |
217 | { |
218 | switch (va.type) |
219 | { |
220 | case jbvString: |
221 | case jbvNull: |
222 | case jbvNumeric: |
223 | case jbvBool: |
224 | res = compareJsonbScalarValue(&va, &vb); |
225 | break; |
226 | case jbvArray: |
227 | |
228 | /* |
229 | * This could be a "raw scalar" pseudo array. That's |
230 | * a special case here though, since we still want the |
231 | * general type-based comparisons to apply, and as far |
232 | * as we're concerned a pseudo array is just a scalar. |
233 | */ |
234 | if (va.val.array.rawScalar != vb.val.array.rawScalar) |
235 | res = (va.val.array.rawScalar) ? -1 : 1; |
236 | if (va.val.array.nElems != vb.val.array.nElems) |
237 | res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1; |
238 | break; |
239 | case jbvObject: |
240 | if (va.val.object.nPairs != vb.val.object.nPairs) |
241 | res = (va.val.object.nPairs > vb.val.object.nPairs) ? 1 : -1; |
242 | break; |
243 | case jbvBinary: |
244 | elog(ERROR, "unexpected jbvBinary value" ); |
245 | } |
246 | } |
247 | else |
248 | { |
249 | /* Type-defined order */ |
250 | res = (va.type > vb.type) ? 1 : -1; |
251 | } |
252 | } |
253 | else |
254 | { |
255 | /* |
256 | * It's safe to assume that the types differed, and that the va |
257 | * and vb values passed were set. |
258 | * |
259 | * If the two values were of the same container type, then there'd |
260 | * have been a chance to observe the variation in the number of |
261 | * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're |
262 | * either two heterogeneously-typed containers, or a container and |
263 | * some scalar type. |
264 | * |
265 | * We don't have to consider the WJB_END_ARRAY and WJB_END_OBJECT |
266 | * cases here, because we would have seen the corresponding |
267 | * WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT tokens first, and |
268 | * concluded that they don't match. |
269 | */ |
270 | Assert(ra != WJB_END_ARRAY && ra != WJB_END_OBJECT); |
271 | Assert(rb != WJB_END_ARRAY && rb != WJB_END_OBJECT); |
272 | |
273 | Assert(va.type != vb.type); |
274 | Assert(va.type != jbvBinary); |
275 | Assert(vb.type != jbvBinary); |
276 | /* Type-defined order */ |
277 | res = (va.type > vb.type) ? 1 : -1; |
278 | } |
279 | } |
280 | while (res == 0); |
281 | |
282 | while (ita != NULL) |
283 | { |
284 | JsonbIterator *i = ita->parent; |
285 | |
286 | pfree(ita); |
287 | ita = i; |
288 | } |
289 | while (itb != NULL) |
290 | { |
291 | JsonbIterator *i = itb->parent; |
292 | |
293 | pfree(itb); |
294 | itb = i; |
295 | } |
296 | |
297 | return res; |
298 | } |
299 | |
300 | /* |
301 | * Find value in object (i.e. the "value" part of some key/value pair in an |
302 | * object), or find a matching element if we're looking through an array. Do |
303 | * so on the basis of equality of the object keys only, or alternatively |
304 | * element values only, with a caller-supplied value "key". The "flags" |
305 | * argument allows the caller to specify which container types are of interest. |
306 | * |
307 | * This exported utility function exists to facilitate various cases concerned |
308 | * with "containment". If asked to look through an object, the caller had |
309 | * better pass a Jsonb String, because their keys can only be strings. |
310 | * Otherwise, for an array, any type of JsonbValue will do. |
311 | * |
312 | * In order to proceed with the search, it is necessary for callers to have |
313 | * both specified an interest in exactly one particular container type with an |
314 | * appropriate flag, as well as having the pointed-to Jsonb container be of |
315 | * one of those same container types at the top level. (Actually, we just do |
316 | * whichever makes sense to save callers the trouble of figuring it out - at |
317 | * most one can make sense, because the container either points to an array |
318 | * (possibly a "raw scalar" pseudo array) or an object.) |
319 | * |
320 | * Note that we can return a jbvBinary JsonbValue if this is called on an |
321 | * object, but we never do so on an array. If the caller asks to look through |
322 | * a container type that is not of the type pointed to by the container, |
323 | * immediately fall through and return NULL. If we cannot find the value, |
324 | * return NULL. Otherwise, return palloc()'d copy of value. |
325 | */ |
326 | JsonbValue * |
327 | findJsonbValueFromContainer(JsonbContainer *container, uint32 flags, |
328 | JsonbValue *key) |
329 | { |
330 | JEntry *children = container->children; |
331 | int count = JsonContainerSize(container); |
332 | JsonbValue *result; |
333 | |
334 | Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0); |
335 | |
336 | /* Quick out without a palloc cycle if object/array is empty */ |
337 | if (count <= 0) |
338 | return NULL; |
339 | |
340 | result = palloc(sizeof(JsonbValue)); |
341 | |
342 | if ((flags & JB_FARRAY) && JsonContainerIsArray(container)) |
343 | { |
344 | char *base_addr = (char *) (children + count); |
345 | uint32 offset = 0; |
346 | int i; |
347 | |
348 | for (i = 0; i < count; i++) |
349 | { |
350 | fillJsonbValue(container, i, base_addr, offset, result); |
351 | |
352 | if (key->type == result->type) |
353 | { |
354 | if (equalsJsonbScalarValue(key, result)) |
355 | return result; |
356 | } |
357 | |
358 | JBE_ADVANCE_OFFSET(offset, children[i]); |
359 | } |
360 | } |
361 | else if ((flags & JB_FOBJECT) && JsonContainerIsObject(container)) |
362 | { |
363 | /* Since this is an object, account for *Pairs* of Jentrys */ |
364 | char *base_addr = (char *) (children + count * 2); |
365 | uint32 stopLow = 0, |
366 | stopHigh = count; |
367 | |
368 | /* Object key passed by caller must be a string */ |
369 | Assert(key->type == jbvString); |
370 | |
371 | /* Binary search on object/pair keys *only* */ |
372 | while (stopLow < stopHigh) |
373 | { |
374 | uint32 stopMiddle; |
375 | int difference; |
376 | JsonbValue candidate; |
377 | |
378 | stopMiddle = stopLow + (stopHigh - stopLow) / 2; |
379 | |
380 | candidate.type = jbvString; |
381 | candidate.val.string.val = |
382 | base_addr + getJsonbOffset(container, stopMiddle); |
383 | candidate.val.string.len = getJsonbLength(container, stopMiddle); |
384 | |
385 | difference = lengthCompareJsonbStringValue(&candidate, key); |
386 | |
387 | if (difference == 0) |
388 | { |
389 | /* Found our key, return corresponding value */ |
390 | int index = stopMiddle + count; |
391 | |
392 | fillJsonbValue(container, index, base_addr, |
393 | getJsonbOffset(container, index), |
394 | result); |
395 | |
396 | return result; |
397 | } |
398 | else |
399 | { |
400 | if (difference < 0) |
401 | stopLow = stopMiddle + 1; |
402 | else |
403 | stopHigh = stopMiddle; |
404 | } |
405 | } |
406 | } |
407 | |
408 | /* Not found */ |
409 | pfree(result); |
410 | return NULL; |
411 | } |
412 | |
413 | /* |
414 | * Get i-th value of a Jsonb array. |
415 | * |
416 | * Returns palloc()'d copy of the value, or NULL if it does not exist. |
417 | */ |
418 | JsonbValue * |
419 | getIthJsonbValueFromContainer(JsonbContainer *container, uint32 i) |
420 | { |
421 | JsonbValue *result; |
422 | char *base_addr; |
423 | uint32 nelements; |
424 | |
425 | if (!JsonContainerIsArray(container)) |
426 | elog(ERROR, "not a jsonb array" ); |
427 | |
428 | nelements = JsonContainerSize(container); |
429 | base_addr = (char *) &container->children[nelements]; |
430 | |
431 | if (i >= nelements) |
432 | return NULL; |
433 | |
434 | result = palloc(sizeof(JsonbValue)); |
435 | |
436 | fillJsonbValue(container, i, base_addr, |
437 | getJsonbOffset(container, i), |
438 | result); |
439 | |
440 | return result; |
441 | } |
442 | |
443 | /* |
444 | * A helper function to fill in a JsonbValue to represent an element of an |
445 | * array, or a key or value of an object. |
446 | * |
447 | * The node's JEntry is at container->children[index], and its variable-length |
448 | * data is at base_addr + offset. We make the caller determine the offset |
449 | * since in many cases the caller can amortize that work across multiple |
450 | * children. When it can't, it can just call getJsonbOffset(). |
451 | * |
452 | * A nested array or object will be returned as jbvBinary, ie. it won't be |
453 | * expanded. |
454 | */ |
455 | static void |
456 | fillJsonbValue(JsonbContainer *container, int index, |
457 | char *base_addr, uint32 offset, |
458 | JsonbValue *result) |
459 | { |
460 | JEntry entry = container->children[index]; |
461 | |
462 | if (JBE_ISNULL(entry)) |
463 | { |
464 | result->type = jbvNull; |
465 | } |
466 | else if (JBE_ISSTRING(entry)) |
467 | { |
468 | result->type = jbvString; |
469 | result->val.string.val = base_addr + offset; |
470 | result->val.string.len = getJsonbLength(container, index); |
471 | Assert(result->val.string.len >= 0); |
472 | } |
473 | else if (JBE_ISNUMERIC(entry)) |
474 | { |
475 | result->type = jbvNumeric; |
476 | result->val.numeric = (Numeric) (base_addr + INTALIGN(offset)); |
477 | } |
478 | else if (JBE_ISBOOL_TRUE(entry)) |
479 | { |
480 | result->type = jbvBool; |
481 | result->val.boolean = true; |
482 | } |
483 | else if (JBE_ISBOOL_FALSE(entry)) |
484 | { |
485 | result->type = jbvBool; |
486 | result->val.boolean = false; |
487 | } |
488 | else |
489 | { |
490 | Assert(JBE_ISCONTAINER(entry)); |
491 | result->type = jbvBinary; |
492 | /* Remove alignment padding from data pointer and length */ |
493 | result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(offset)); |
494 | result->val.binary.len = getJsonbLength(container, index) - |
495 | (INTALIGN(offset) - offset); |
496 | } |
497 | } |
498 | |
499 | /* |
500 | * Push JsonbValue into JsonbParseState. |
501 | * |
502 | * Used when parsing JSON tokens to form Jsonb, or when converting an in-memory |
503 | * JsonbValue to a Jsonb. |
504 | * |
505 | * Initial state of *JsonbParseState is NULL, since it'll be allocated here |
506 | * originally (caller will get JsonbParseState back by reference). |
507 | * |
508 | * Only sequential tokens pertaining to non-container types should pass a |
509 | * JsonbValue. There is one exception -- WJB_BEGIN_ARRAY callers may pass a |
510 | * "raw scalar" pseudo array to append it - the actual scalar should be passed |
511 | * next and it will be added as the only member of the array. |
512 | * |
513 | * Values of type jvbBinary, which are rolled up arrays and objects, |
514 | * are unpacked before being added to the result. |
515 | */ |
516 | JsonbValue * |
517 | pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq, |
518 | JsonbValue *jbval) |
519 | { |
520 | JsonbIterator *it; |
521 | JsonbValue *res = NULL; |
522 | JsonbValue v; |
523 | JsonbIteratorToken tok; |
524 | |
525 | if (!jbval || (seq != WJB_ELEM && seq != WJB_VALUE) || |
526 | jbval->type != jbvBinary) |
527 | { |
528 | /* drop through */ |
529 | return pushJsonbValueScalar(pstate, seq, jbval); |
530 | } |
531 | |
532 | /* unpack the binary and add each piece to the pstate */ |
533 | it = JsonbIteratorInit(jbval->val.binary.data); |
534 | while ((tok = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) |
535 | res = pushJsonbValueScalar(pstate, tok, |
536 | tok < WJB_BEGIN_ARRAY ? &v : NULL); |
537 | |
538 | return res; |
539 | } |
540 | |
541 | /* |
542 | * Do the actual pushing, with only scalar or pseudo-scalar-array values |
543 | * accepted. |
544 | */ |
545 | static JsonbValue * |
546 | pushJsonbValueScalar(JsonbParseState **pstate, JsonbIteratorToken seq, |
547 | JsonbValue *scalarVal) |
548 | { |
549 | JsonbValue *result = NULL; |
550 | |
551 | switch (seq) |
552 | { |
553 | case WJB_BEGIN_ARRAY: |
554 | Assert(!scalarVal || scalarVal->val.array.rawScalar); |
555 | *pstate = pushState(pstate); |
556 | result = &(*pstate)->contVal; |
557 | (*pstate)->contVal.type = jbvArray; |
558 | (*pstate)->contVal.val.array.nElems = 0; |
559 | (*pstate)->contVal.val.array.rawScalar = (scalarVal && |
560 | scalarVal->val.array.rawScalar); |
561 | if (scalarVal && scalarVal->val.array.nElems > 0) |
562 | { |
563 | /* Assume that this array is still really a scalar */ |
564 | Assert(scalarVal->type == jbvArray); |
565 | (*pstate)->size = scalarVal->val.array.nElems; |
566 | } |
567 | else |
568 | { |
569 | (*pstate)->size = 4; |
570 | } |
571 | (*pstate)->contVal.val.array.elems = palloc(sizeof(JsonbValue) * |
572 | (*pstate)->size); |
573 | break; |
574 | case WJB_BEGIN_OBJECT: |
575 | Assert(!scalarVal); |
576 | *pstate = pushState(pstate); |
577 | result = &(*pstate)->contVal; |
578 | (*pstate)->contVal.type = jbvObject; |
579 | (*pstate)->contVal.val.object.nPairs = 0; |
580 | (*pstate)->size = 4; |
581 | (*pstate)->contVal.val.object.pairs = palloc(sizeof(JsonbPair) * |
582 | (*pstate)->size); |
583 | break; |
584 | case WJB_KEY: |
585 | Assert(scalarVal->type == jbvString); |
586 | appendKey(*pstate, scalarVal); |
587 | break; |
588 | case WJB_VALUE: |
589 | Assert(IsAJsonbScalar(scalarVal)); |
590 | appendValue(*pstate, scalarVal); |
591 | break; |
592 | case WJB_ELEM: |
593 | Assert(IsAJsonbScalar(scalarVal)); |
594 | appendElement(*pstate, scalarVal); |
595 | break; |
596 | case WJB_END_OBJECT: |
597 | uniqueifyJsonbObject(&(*pstate)->contVal); |
598 | /* fall through! */ |
599 | case WJB_END_ARRAY: |
600 | /* Steps here common to WJB_END_OBJECT case */ |
601 | Assert(!scalarVal); |
602 | result = &(*pstate)->contVal; |
603 | |
604 | /* |
605 | * Pop stack and push current array/object as value in parent |
606 | * array/object |
607 | */ |
608 | *pstate = (*pstate)->next; |
609 | if (*pstate) |
610 | { |
611 | switch ((*pstate)->contVal.type) |
612 | { |
613 | case jbvArray: |
614 | appendElement(*pstate, result); |
615 | break; |
616 | case jbvObject: |
617 | appendValue(*pstate, result); |
618 | break; |
619 | default: |
620 | elog(ERROR, "invalid jsonb container type" ); |
621 | } |
622 | } |
623 | break; |
624 | default: |
625 | elog(ERROR, "unrecognized jsonb sequential processing token" ); |
626 | } |
627 | |
628 | return result; |
629 | } |
630 | |
631 | /* |
632 | * pushJsonbValue() worker: Iteration-like forming of Jsonb |
633 | */ |
634 | static JsonbParseState * |
635 | pushState(JsonbParseState **pstate) |
636 | { |
637 | JsonbParseState *ns = palloc(sizeof(JsonbParseState)); |
638 | |
639 | ns->next = *pstate; |
640 | return ns; |
641 | } |
642 | |
643 | /* |
644 | * pushJsonbValue() worker: Append a pair key to state when generating a Jsonb |
645 | */ |
646 | static void |
647 | appendKey(JsonbParseState *pstate, JsonbValue *string) |
648 | { |
649 | JsonbValue *object = &pstate->contVal; |
650 | |
651 | Assert(object->type == jbvObject); |
652 | Assert(string->type == jbvString); |
653 | |
654 | if (object->val.object.nPairs >= JSONB_MAX_PAIRS) |
655 | ereport(ERROR, |
656 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
657 | errmsg("number of jsonb object pairs exceeds the maximum allowed (%zu)" , |
658 | JSONB_MAX_PAIRS))); |
659 | |
660 | if (object->val.object.nPairs >= pstate->size) |
661 | { |
662 | pstate->size *= 2; |
663 | object->val.object.pairs = repalloc(object->val.object.pairs, |
664 | sizeof(JsonbPair) * pstate->size); |
665 | } |
666 | |
667 | object->val.object.pairs[object->val.object.nPairs].key = *string; |
668 | object->val.object.pairs[object->val.object.nPairs].order = object->val.object.nPairs; |
669 | } |
670 | |
671 | /* |
672 | * pushJsonbValue() worker: Append a pair value to state when generating a |
673 | * Jsonb |
674 | */ |
675 | static void |
676 | appendValue(JsonbParseState *pstate, JsonbValue *scalarVal) |
677 | { |
678 | JsonbValue *object = &pstate->contVal; |
679 | |
680 | Assert(object->type == jbvObject); |
681 | |
682 | object->val.object.pairs[object->val.object.nPairs++].value = *scalarVal; |
683 | } |
684 | |
685 | /* |
686 | * pushJsonbValue() worker: Append an element to state when generating a Jsonb |
687 | */ |
688 | static void |
689 | appendElement(JsonbParseState *pstate, JsonbValue *scalarVal) |
690 | { |
691 | JsonbValue *array = &pstate->contVal; |
692 | |
693 | Assert(array->type == jbvArray); |
694 | |
695 | if (array->val.array.nElems >= JSONB_MAX_ELEMS) |
696 | ereport(ERROR, |
697 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
698 | errmsg("number of jsonb array elements exceeds the maximum allowed (%zu)" , |
699 | JSONB_MAX_ELEMS))); |
700 | |
701 | if (array->val.array.nElems >= pstate->size) |
702 | { |
703 | pstate->size *= 2; |
704 | array->val.array.elems = repalloc(array->val.array.elems, |
705 | sizeof(JsonbValue) * pstate->size); |
706 | } |
707 | |
708 | array->val.array.elems[array->val.array.nElems++] = *scalarVal; |
709 | } |
710 | |
711 | /* |
712 | * Given a JsonbContainer, expand to JsonbIterator to iterate over items |
713 | * fully expanded to in-memory representation for manipulation. |
714 | * |
715 | * See JsonbIteratorNext() for notes on memory management. |
716 | */ |
717 | JsonbIterator * |
718 | JsonbIteratorInit(JsonbContainer *container) |
719 | { |
720 | return iteratorFromContainer(container, NULL); |
721 | } |
722 | |
723 | /* |
724 | * Get next JsonbValue while iterating |
725 | * |
726 | * Caller should initially pass their own, original iterator. They may get |
727 | * back a child iterator palloc()'d here instead. The function can be relied |
728 | * on to free those child iterators, lest the memory allocated for highly |
729 | * nested objects become unreasonable, but only if callers don't end iteration |
730 | * early (by breaking upon having found something in a search, for example). |
731 | * |
732 | * Callers in such a scenario, that are particularly sensitive to leaking |
733 | * memory in a long-lived context may walk the ancestral tree from the final |
734 | * iterator we left them with to its oldest ancestor, pfree()ing as they go. |
735 | * They do not have to free any other memory previously allocated for iterators |
736 | * but not accessible as direct ancestors of the iterator they're last passed |
737 | * back. |
738 | * |
739 | * Returns "Jsonb sequential processing" token value. Iterator "state" |
740 | * reflects the current stage of the process in a less granular fashion, and is |
741 | * mostly used here to track things internally with respect to particular |
742 | * iterators. |
743 | * |
744 | * Clients of this function should not have to handle any jbvBinary values |
745 | * (since recursive calls will deal with this), provided skipNested is false. |
746 | * It is our job to expand the jbvBinary representation without bothering them |
747 | * with it. However, clients should not take it upon themselves to touch array |
748 | * or Object element/pair buffers, since their element/pair pointers are |
749 | * garbage. Also, *val will not be set when returning WJB_END_ARRAY or |
750 | * WJB_END_OBJECT, on the assumption that it's only useful to access values |
751 | * when recursing in. |
752 | */ |
753 | JsonbIteratorToken |
754 | JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested) |
755 | { |
756 | if (*it == NULL) |
757 | return WJB_DONE; |
758 | |
759 | /* |
760 | * When stepping into a nested container, we jump back here to start |
761 | * processing the child. We will not recurse further in one call, because |
762 | * processing the child will always begin in JBI_ARRAY_START or |
763 | * JBI_OBJECT_START state. |
764 | */ |
765 | recurse: |
766 | switch ((*it)->state) |
767 | { |
768 | case JBI_ARRAY_START: |
769 | /* Set v to array on first array call */ |
770 | val->type = jbvArray; |
771 | val->val.array.nElems = (*it)->nElems; |
772 | |
773 | /* |
774 | * v->val.array.elems is not actually set, because we aren't doing |
775 | * a full conversion |
776 | */ |
777 | val->val.array.rawScalar = (*it)->isScalar; |
778 | (*it)->curIndex = 0; |
779 | (*it)->curDataOffset = 0; |
780 | (*it)->curValueOffset = 0; /* not actually used */ |
781 | /* Set state for next call */ |
782 | (*it)->state = JBI_ARRAY_ELEM; |
783 | return WJB_BEGIN_ARRAY; |
784 | |
785 | case JBI_ARRAY_ELEM: |
786 | if ((*it)->curIndex >= (*it)->nElems) |
787 | { |
788 | /* |
789 | * All elements within array already processed. Report this |
790 | * to caller, and give it back original parent iterator (which |
791 | * independently tracks iteration progress at its level of |
792 | * nesting). |
793 | */ |
794 | *it = freeAndGetParent(*it); |
795 | return WJB_END_ARRAY; |
796 | } |
797 | |
798 | fillJsonbValue((*it)->container, (*it)->curIndex, |
799 | (*it)->dataProper, (*it)->curDataOffset, |
800 | val); |
801 | |
802 | JBE_ADVANCE_OFFSET((*it)->curDataOffset, |
803 | (*it)->children[(*it)->curIndex]); |
804 | (*it)->curIndex++; |
805 | |
806 | if (!IsAJsonbScalar(val) && !skipNested) |
807 | { |
808 | /* Recurse into container. */ |
809 | *it = iteratorFromContainer(val->val.binary.data, *it); |
810 | goto recurse; |
811 | } |
812 | else |
813 | { |
814 | /* |
815 | * Scalar item in array, or a container and caller didn't want |
816 | * us to recurse into it. |
817 | */ |
818 | return WJB_ELEM; |
819 | } |
820 | |
821 | case JBI_OBJECT_START: |
822 | /* Set v to object on first object call */ |
823 | val->type = jbvObject; |
824 | val->val.object.nPairs = (*it)->nElems; |
825 | |
826 | /* |
827 | * v->val.object.pairs is not actually set, because we aren't |
828 | * doing a full conversion |
829 | */ |
830 | (*it)->curIndex = 0; |
831 | (*it)->curDataOffset = 0; |
832 | (*it)->curValueOffset = getJsonbOffset((*it)->container, |
833 | (*it)->nElems); |
834 | /* Set state for next call */ |
835 | (*it)->state = JBI_OBJECT_KEY; |
836 | return WJB_BEGIN_OBJECT; |
837 | |
838 | case JBI_OBJECT_KEY: |
839 | if ((*it)->curIndex >= (*it)->nElems) |
840 | { |
841 | /* |
842 | * All pairs within object already processed. Report this to |
843 | * caller, and give it back original containing iterator |
844 | * (which independently tracks iteration progress at its level |
845 | * of nesting). |
846 | */ |
847 | *it = freeAndGetParent(*it); |
848 | return WJB_END_OBJECT; |
849 | } |
850 | else |
851 | { |
852 | /* Return key of a key/value pair. */ |
853 | fillJsonbValue((*it)->container, (*it)->curIndex, |
854 | (*it)->dataProper, (*it)->curDataOffset, |
855 | val); |
856 | if (val->type != jbvString) |
857 | elog(ERROR, "unexpected jsonb type as object key" ); |
858 | |
859 | /* Set state for next call */ |
860 | (*it)->state = JBI_OBJECT_VALUE; |
861 | return WJB_KEY; |
862 | } |
863 | |
864 | case JBI_OBJECT_VALUE: |
865 | /* Set state for next call */ |
866 | (*it)->state = JBI_OBJECT_KEY; |
867 | |
868 | fillJsonbValue((*it)->container, (*it)->curIndex + (*it)->nElems, |
869 | (*it)->dataProper, (*it)->curValueOffset, |
870 | val); |
871 | |
872 | JBE_ADVANCE_OFFSET((*it)->curDataOffset, |
873 | (*it)->children[(*it)->curIndex]); |
874 | JBE_ADVANCE_OFFSET((*it)->curValueOffset, |
875 | (*it)->children[(*it)->curIndex + (*it)->nElems]); |
876 | (*it)->curIndex++; |
877 | |
878 | /* |
879 | * Value may be a container, in which case we recurse with new, |
880 | * child iterator (unless the caller asked not to, by passing |
881 | * skipNested). |
882 | */ |
883 | if (!IsAJsonbScalar(val) && !skipNested) |
884 | { |
885 | *it = iteratorFromContainer(val->val.binary.data, *it); |
886 | goto recurse; |
887 | } |
888 | else |
889 | return WJB_VALUE; |
890 | } |
891 | |
892 | elog(ERROR, "invalid iterator state" ); |
893 | return -1; |
894 | } |
895 | |
896 | /* |
897 | * Initialize an iterator for iterating all elements in a container. |
898 | */ |
899 | static JsonbIterator * |
900 | iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent) |
901 | { |
902 | JsonbIterator *it; |
903 | |
904 | it = palloc0(sizeof(JsonbIterator)); |
905 | it->container = container; |
906 | it->parent = parent; |
907 | it->nElems = JsonContainerSize(container); |
908 | |
909 | /* Array starts just after header */ |
910 | it->children = container->children; |
911 | |
912 | switch (container->header & (JB_FARRAY | JB_FOBJECT)) |
913 | { |
914 | case JB_FARRAY: |
915 | it->dataProper = |
916 | (char *) it->children + it->nElems * sizeof(JEntry); |
917 | it->isScalar = JsonContainerIsScalar(container); |
918 | /* This is either a "raw scalar", or an array */ |
919 | Assert(!it->isScalar || it->nElems == 1); |
920 | |
921 | it->state = JBI_ARRAY_START; |
922 | break; |
923 | |
924 | case JB_FOBJECT: |
925 | it->dataProper = |
926 | (char *) it->children + it->nElems * sizeof(JEntry) * 2; |
927 | it->state = JBI_OBJECT_START; |
928 | break; |
929 | |
930 | default: |
931 | elog(ERROR, "unknown type of jsonb container" ); |
932 | } |
933 | |
934 | return it; |
935 | } |
936 | |
937 | /* |
938 | * JsonbIteratorNext() worker: Return parent, while freeing memory for current |
939 | * iterator |
940 | */ |
941 | static JsonbIterator * |
942 | freeAndGetParent(JsonbIterator *it) |
943 | { |
944 | JsonbIterator *v = it->parent; |
945 | |
946 | pfree(it); |
947 | return v; |
948 | } |
949 | |
950 | /* |
951 | * Worker for "contains" operator's function |
952 | * |
953 | * Formally speaking, containment is top-down, unordered subtree isomorphism. |
954 | * |
955 | * Takes iterators that belong to some container type. These iterators |
956 | * "belong" to those values in the sense that they've just been initialized in |
957 | * respect of them by the caller (perhaps in a nested fashion). |
958 | * |
959 | * "val" is lhs Jsonb, and mContained is rhs Jsonb when called from top level. |
960 | * We determine if mContained is contained within val. |
961 | */ |
962 | bool |
963 | JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained) |
964 | { |
965 | JsonbValue vval, |
966 | vcontained; |
967 | JsonbIteratorToken rval, |
968 | rcont; |
969 | |
970 | /* |
971 | * Guard against stack overflow due to overly complex Jsonb. |
972 | * |
973 | * Functions called here independently take this precaution, but that |
974 | * might not be sufficient since this is also a recursive function. |
975 | */ |
976 | check_stack_depth(); |
977 | |
978 | rval = JsonbIteratorNext(val, &vval, false); |
979 | rcont = JsonbIteratorNext(mContained, &vcontained, false); |
980 | |
981 | if (rval != rcont) |
982 | { |
983 | /* |
984 | * The differing return values can immediately be taken as indicating |
985 | * two differing container types at this nesting level, which is |
986 | * sufficient reason to give up entirely (but it should be the case |
987 | * that they're both some container type). |
988 | */ |
989 | Assert(rval == WJB_BEGIN_OBJECT || rval == WJB_BEGIN_ARRAY); |
990 | Assert(rcont == WJB_BEGIN_OBJECT || rcont == WJB_BEGIN_ARRAY); |
991 | return false; |
992 | } |
993 | else if (rcont == WJB_BEGIN_OBJECT) |
994 | { |
995 | Assert(vval.type == jbvObject); |
996 | Assert(vcontained.type == jbvObject); |
997 | |
998 | /* |
999 | * If the lhs has fewer pairs than the rhs, it can't possibly contain |
1000 | * the rhs. (This conclusion is safe only because we de-duplicate |
1001 | * keys in all Jsonb objects; thus there can be no corresponding |
1002 | * optimization in the array case.) The case probably won't arise |
1003 | * often, but since it's such a cheap check we may as well make it. |
1004 | */ |
1005 | if (vval.val.object.nPairs < vcontained.val.object.nPairs) |
1006 | return false; |
1007 | |
1008 | /* Work through rhs "is it contained within?" object */ |
1009 | for (;;) |
1010 | { |
1011 | JsonbValue *lhsVal; /* lhsVal is from pair in lhs object */ |
1012 | |
1013 | rcont = JsonbIteratorNext(mContained, &vcontained, false); |
1014 | |
1015 | /* |
1016 | * When we get through caller's rhs "is it contained within?" |
1017 | * object without failing to find one of its values, it's |
1018 | * contained. |
1019 | */ |
1020 | if (rcont == WJB_END_OBJECT) |
1021 | return true; |
1022 | |
1023 | Assert(rcont == WJB_KEY); |
1024 | |
1025 | /* First, find value by key... */ |
1026 | lhsVal = findJsonbValueFromContainer((*val)->container, |
1027 | JB_FOBJECT, |
1028 | &vcontained); |
1029 | |
1030 | if (!lhsVal) |
1031 | return false; |
1032 | |
1033 | /* |
1034 | * ...at this stage it is apparent that there is at least a key |
1035 | * match for this rhs pair. |
1036 | */ |
1037 | rcont = JsonbIteratorNext(mContained, &vcontained, true); |
1038 | |
1039 | Assert(rcont == WJB_VALUE); |
1040 | |
1041 | /* |
1042 | * Compare rhs pair's value with lhs pair's value just found using |
1043 | * key |
1044 | */ |
1045 | if (lhsVal->type != vcontained.type) |
1046 | { |
1047 | return false; |
1048 | } |
1049 | else if (IsAJsonbScalar(lhsVal)) |
1050 | { |
1051 | if (!equalsJsonbScalarValue(lhsVal, &vcontained)) |
1052 | return false; |
1053 | } |
1054 | else |
1055 | { |
1056 | /* Nested container value (object or array) */ |
1057 | JsonbIterator *nestval, |
1058 | *nestContained; |
1059 | |
1060 | Assert(lhsVal->type == jbvBinary); |
1061 | Assert(vcontained.type == jbvBinary); |
1062 | |
1063 | nestval = JsonbIteratorInit(lhsVal->val.binary.data); |
1064 | nestContained = JsonbIteratorInit(vcontained.val.binary.data); |
1065 | |
1066 | /* |
1067 | * Match "value" side of rhs datum object's pair recursively. |
1068 | * It's a nested structure. |
1069 | * |
1070 | * Note that nesting still has to "match up" at the right |
1071 | * nesting sub-levels. However, there need only be zero or |
1072 | * more matching pairs (or elements) at each nesting level |
1073 | * (provided the *rhs* pairs/elements *all* match on each |
1074 | * level), which enables searching nested structures for a |
1075 | * single String or other primitive type sub-datum quite |
1076 | * effectively (provided the user constructed the rhs nested |
1077 | * structure such that we "know where to look"). |
1078 | * |
1079 | * In other words, the mapping of container nodes in the rhs |
1080 | * "vcontained" Jsonb to internal nodes on the lhs is |
1081 | * injective, and parent-child edges on the rhs must be mapped |
1082 | * to parent-child edges on the lhs to satisfy the condition |
1083 | * of containment (plus of course the mapped nodes must be |
1084 | * equal). |
1085 | */ |
1086 | if (!JsonbDeepContains(&nestval, &nestContained)) |
1087 | return false; |
1088 | } |
1089 | } |
1090 | } |
1091 | else if (rcont == WJB_BEGIN_ARRAY) |
1092 | { |
1093 | JsonbValue *lhsConts = NULL; |
1094 | uint32 nLhsElems = vval.val.array.nElems; |
1095 | |
1096 | Assert(vval.type == jbvArray); |
1097 | Assert(vcontained.type == jbvArray); |
1098 | |
1099 | /* |
1100 | * Handle distinction between "raw scalar" pseudo arrays, and real |
1101 | * arrays. |
1102 | * |
1103 | * A raw scalar may contain another raw scalar, and an array may |
1104 | * contain a raw scalar, but a raw scalar may not contain an array. We |
1105 | * don't do something like this for the object case, since objects can |
1106 | * only contain pairs, never raw scalars (a pair is represented by an |
1107 | * rhs object argument with a single contained pair). |
1108 | */ |
1109 | if (vval.val.array.rawScalar && !vcontained.val.array.rawScalar) |
1110 | return false; |
1111 | |
1112 | /* Work through rhs "is it contained within?" array */ |
1113 | for (;;) |
1114 | { |
1115 | rcont = JsonbIteratorNext(mContained, &vcontained, true); |
1116 | |
1117 | /* |
1118 | * When we get through caller's rhs "is it contained within?" |
1119 | * array without failing to find one of its values, it's |
1120 | * contained. |
1121 | */ |
1122 | if (rcont == WJB_END_ARRAY) |
1123 | return true; |
1124 | |
1125 | Assert(rcont == WJB_ELEM); |
1126 | |
1127 | if (IsAJsonbScalar(&vcontained)) |
1128 | { |
1129 | if (!findJsonbValueFromContainer((*val)->container, |
1130 | JB_FARRAY, |
1131 | &vcontained)) |
1132 | return false; |
1133 | } |
1134 | else |
1135 | { |
1136 | uint32 i; |
1137 | |
1138 | /* |
1139 | * If this is first container found in rhs array (at this |
1140 | * depth), initialize temp lhs array of containers |
1141 | */ |
1142 | if (lhsConts == NULL) |
1143 | { |
1144 | uint32 j = 0; |
1145 | |
1146 | /* Make room for all possible values */ |
1147 | lhsConts = palloc(sizeof(JsonbValue) * nLhsElems); |
1148 | |
1149 | for (i = 0; i < nLhsElems; i++) |
1150 | { |
1151 | /* Store all lhs elements in temp array */ |
1152 | rcont = JsonbIteratorNext(val, &vval, true); |
1153 | Assert(rcont == WJB_ELEM); |
1154 | |
1155 | if (vval.type == jbvBinary) |
1156 | lhsConts[j++] = vval; |
1157 | } |
1158 | |
1159 | /* No container elements in temp array, so give up now */ |
1160 | if (j == 0) |
1161 | return false; |
1162 | |
1163 | /* We may have only partially filled array */ |
1164 | nLhsElems = j; |
1165 | } |
1166 | |
1167 | /* XXX: Nested array containment is O(N^2) */ |
1168 | for (i = 0; i < nLhsElems; i++) |
1169 | { |
1170 | /* Nested container value (object or array) */ |
1171 | JsonbIterator *nestval, |
1172 | *nestContained; |
1173 | bool contains; |
1174 | |
1175 | nestval = JsonbIteratorInit(lhsConts[i].val.binary.data); |
1176 | nestContained = JsonbIteratorInit(vcontained.val.binary.data); |
1177 | |
1178 | contains = JsonbDeepContains(&nestval, &nestContained); |
1179 | |
1180 | if (nestval) |
1181 | pfree(nestval); |
1182 | if (nestContained) |
1183 | pfree(nestContained); |
1184 | if (contains) |
1185 | break; |
1186 | } |
1187 | |
1188 | /* |
1189 | * Report rhs container value is not contained if couldn't |
1190 | * match rhs container to *some* lhs cont |
1191 | */ |
1192 | if (i == nLhsElems) |
1193 | return false; |
1194 | } |
1195 | } |
1196 | } |
1197 | else |
1198 | { |
1199 | elog(ERROR, "invalid jsonb container type" ); |
1200 | } |
1201 | |
1202 | elog(ERROR, "unexpectedly fell off end of jsonb container" ); |
1203 | return false; |
1204 | } |
1205 | |
1206 | /* |
1207 | * Hash a JsonbValue scalar value, mixing the hash value into an existing |
1208 | * hash provided by the caller. |
1209 | * |
1210 | * Some callers may wish to independently XOR in JB_FOBJECT and JB_FARRAY |
1211 | * flags. |
1212 | */ |
1213 | void |
1214 | JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash) |
1215 | { |
1216 | uint32 tmp; |
1217 | |
1218 | /* Compute hash value for scalarVal */ |
1219 | switch (scalarVal->type) |
1220 | { |
1221 | case jbvNull: |
1222 | tmp = 0x01; |
1223 | break; |
1224 | case jbvString: |
1225 | tmp = DatumGetUInt32(hash_any((const unsigned char *) scalarVal->val.string.val, |
1226 | scalarVal->val.string.len)); |
1227 | break; |
1228 | case jbvNumeric: |
1229 | /* Must hash equal numerics to equal hash codes */ |
1230 | tmp = DatumGetUInt32(DirectFunctionCall1(hash_numeric, |
1231 | NumericGetDatum(scalarVal->val.numeric))); |
1232 | break; |
1233 | case jbvBool: |
1234 | tmp = scalarVal->val.boolean ? 0x02 : 0x04; |
1235 | |
1236 | break; |
1237 | default: |
1238 | elog(ERROR, "invalid jsonb scalar type" ); |
1239 | tmp = 0; /* keep compiler quiet */ |
1240 | break; |
1241 | } |
1242 | |
1243 | /* |
1244 | * Combine hash values of successive keys, values and elements by rotating |
1245 | * the previous value left 1 bit, then XOR'ing in the new |
1246 | * key/value/element's hash value. |
1247 | */ |
1248 | *hash = (*hash << 1) | (*hash >> 31); |
1249 | *hash ^= tmp; |
1250 | } |
1251 | |
1252 | /* |
1253 | * Hash a value to a 64-bit value, with a seed. Otherwise, similar to |
1254 | * JsonbHashScalarValue. |
1255 | */ |
1256 | void |
1257 | JsonbHashScalarValueExtended(const JsonbValue *scalarVal, uint64 *hash, |
1258 | uint64 seed) |
1259 | { |
1260 | uint64 tmp; |
1261 | |
1262 | switch (scalarVal->type) |
1263 | { |
1264 | case jbvNull: |
1265 | tmp = seed + 0x01; |
1266 | break; |
1267 | case jbvString: |
1268 | tmp = DatumGetUInt64(hash_any_extended((const unsigned char *) scalarVal->val.string.val, |
1269 | scalarVal->val.string.len, |
1270 | seed)); |
1271 | break; |
1272 | case jbvNumeric: |
1273 | tmp = DatumGetUInt64(DirectFunctionCall2(hash_numeric_extended, |
1274 | NumericGetDatum(scalarVal->val.numeric), |
1275 | UInt64GetDatum(seed))); |
1276 | break; |
1277 | case jbvBool: |
1278 | if (seed) |
1279 | tmp = DatumGetUInt64(DirectFunctionCall2(hashcharextended, |
1280 | BoolGetDatum(scalarVal->val.boolean), |
1281 | UInt64GetDatum(seed))); |
1282 | else |
1283 | tmp = scalarVal->val.boolean ? 0x02 : 0x04; |
1284 | |
1285 | break; |
1286 | default: |
1287 | elog(ERROR, "invalid jsonb scalar type" ); |
1288 | break; |
1289 | } |
1290 | |
1291 | *hash = ROTATE_HIGH_AND_LOW_32BITS(*hash); |
1292 | *hash ^= tmp; |
1293 | } |
1294 | |
1295 | /* |
1296 | * Are two scalar JsonbValues of the same type a and b equal? |
1297 | */ |
1298 | static bool |
1299 | equalsJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar) |
1300 | { |
1301 | if (aScalar->type == bScalar->type) |
1302 | { |
1303 | switch (aScalar->type) |
1304 | { |
1305 | case jbvNull: |
1306 | return true; |
1307 | case jbvString: |
1308 | return lengthCompareJsonbStringValue(aScalar, bScalar) == 0; |
1309 | case jbvNumeric: |
1310 | return DatumGetBool(DirectFunctionCall2(numeric_eq, |
1311 | PointerGetDatum(aScalar->val.numeric), |
1312 | PointerGetDatum(bScalar->val.numeric))); |
1313 | case jbvBool: |
1314 | return aScalar->val.boolean == bScalar->val.boolean; |
1315 | |
1316 | default: |
1317 | elog(ERROR, "invalid jsonb scalar type" ); |
1318 | } |
1319 | } |
1320 | elog(ERROR, "jsonb scalar type mismatch" ); |
1321 | return false; |
1322 | } |
1323 | |
1324 | /* |
1325 | * Compare two scalar JsonbValues, returning -1, 0, or 1. |
1326 | * |
1327 | * Strings are compared using the default collation. Used by B-tree |
1328 | * operators, where a lexical sort order is generally expected. |
1329 | */ |
1330 | static int |
1331 | compareJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar) |
1332 | { |
1333 | if (aScalar->type == bScalar->type) |
1334 | { |
1335 | switch (aScalar->type) |
1336 | { |
1337 | case jbvNull: |
1338 | return 0; |
1339 | case jbvString: |
1340 | return varstr_cmp(aScalar->val.string.val, |
1341 | aScalar->val.string.len, |
1342 | bScalar->val.string.val, |
1343 | bScalar->val.string.len, |
1344 | DEFAULT_COLLATION_OID); |
1345 | case jbvNumeric: |
1346 | return DatumGetInt32(DirectFunctionCall2(numeric_cmp, |
1347 | PointerGetDatum(aScalar->val.numeric), |
1348 | PointerGetDatum(bScalar->val.numeric))); |
1349 | case jbvBool: |
1350 | if (aScalar->val.boolean == bScalar->val.boolean) |
1351 | return 0; |
1352 | else if (aScalar->val.boolean > bScalar->val.boolean) |
1353 | return 1; |
1354 | else |
1355 | return -1; |
1356 | default: |
1357 | elog(ERROR, "invalid jsonb scalar type" ); |
1358 | } |
1359 | } |
1360 | elog(ERROR, "jsonb scalar type mismatch" ); |
1361 | return -1; |
1362 | } |
1363 | |
1364 | |
1365 | /* |
1366 | * Functions for manipulating the resizable buffer used by convertJsonb and |
1367 | * its subroutines. |
1368 | */ |
1369 | |
1370 | /* |
1371 | * Reserve 'len' bytes, at the end of the buffer, enlarging it if necessary. |
1372 | * Returns the offset to the reserved area. The caller is expected to fill |
1373 | * the reserved area later with copyToBuffer(). |
1374 | */ |
1375 | static int |
1376 | reserveFromBuffer(StringInfo buffer, int len) |
1377 | { |
1378 | int offset; |
1379 | |
1380 | /* Make more room if needed */ |
1381 | enlargeStringInfo(buffer, len); |
1382 | |
1383 | /* remember current offset */ |
1384 | offset = buffer->len; |
1385 | |
1386 | /* reserve the space */ |
1387 | buffer->len += len; |
1388 | |
1389 | /* |
1390 | * Keep a trailing null in place, even though it's not useful for us; it |
1391 | * seems best to preserve the invariants of StringInfos. |
1392 | */ |
1393 | buffer->data[buffer->len] = '\0'; |
1394 | |
1395 | return offset; |
1396 | } |
1397 | |
1398 | /* |
1399 | * Copy 'len' bytes to a previously reserved area in buffer. |
1400 | */ |
1401 | static void |
1402 | copyToBuffer(StringInfo buffer, int offset, const char *data, int len) |
1403 | { |
1404 | memcpy(buffer->data + offset, data, len); |
1405 | } |
1406 | |
1407 | /* |
1408 | * A shorthand for reserveFromBuffer + copyToBuffer. |
1409 | */ |
1410 | static void |
1411 | appendToBuffer(StringInfo buffer, const char *data, int len) |
1412 | { |
1413 | int offset; |
1414 | |
1415 | offset = reserveFromBuffer(buffer, len); |
1416 | copyToBuffer(buffer, offset, data, len); |
1417 | } |
1418 | |
1419 | |
1420 | /* |
1421 | * Append padding, so that the length of the StringInfo is int-aligned. |
1422 | * Returns the number of padding bytes appended. |
1423 | */ |
1424 | static short |
1425 | padBufferToInt(StringInfo buffer) |
1426 | { |
1427 | int padlen, |
1428 | p, |
1429 | offset; |
1430 | |
1431 | padlen = INTALIGN(buffer->len) - buffer->len; |
1432 | |
1433 | offset = reserveFromBuffer(buffer, padlen); |
1434 | |
1435 | /* padlen must be small, so this is probably faster than a memset */ |
1436 | for (p = 0; p < padlen; p++) |
1437 | buffer->data[offset + p] = '\0'; |
1438 | |
1439 | return padlen; |
1440 | } |
1441 | |
1442 | /* |
1443 | * Given a JsonbValue, convert to Jsonb. The result is palloc'd. |
1444 | */ |
1445 | static Jsonb * |
1446 | convertToJsonb(JsonbValue *val) |
1447 | { |
1448 | StringInfoData buffer; |
1449 | JEntry jentry; |
1450 | Jsonb *res; |
1451 | |
1452 | /* Should not already have binary representation */ |
1453 | Assert(val->type != jbvBinary); |
1454 | |
1455 | /* Allocate an output buffer. It will be enlarged as needed */ |
1456 | initStringInfo(&buffer); |
1457 | |
1458 | /* Make room for the varlena header */ |
1459 | reserveFromBuffer(&buffer, VARHDRSZ); |
1460 | |
1461 | convertJsonbValue(&buffer, &jentry, val, 0); |
1462 | |
1463 | /* |
1464 | * Note: the JEntry of the root is discarded. Therefore the root |
1465 | * JsonbContainer struct must contain enough information to tell what kind |
1466 | * of value it is. |
1467 | */ |
1468 | |
1469 | res = (Jsonb *) buffer.data; |
1470 | |
1471 | SET_VARSIZE(res, buffer.len); |
1472 | |
1473 | return res; |
1474 | } |
1475 | |
1476 | /* |
1477 | * Subroutine of convertJsonb: serialize a single JsonbValue into buffer. |
1478 | * |
1479 | * The JEntry header for this node is returned in *header. It is filled in |
1480 | * with the length of this value and appropriate type bits. If we wish to |
1481 | * store an end offset rather than a length, it is the caller's responsibility |
1482 | * to adjust for that. |
1483 | * |
1484 | * If the value is an array or an object, this recurses. 'level' is only used |
1485 | * for debugging purposes. |
1486 | */ |
1487 | static void |
1488 | convertJsonbValue(StringInfo buffer, JEntry *, JsonbValue *val, int level) |
1489 | { |
1490 | check_stack_depth(); |
1491 | |
1492 | if (!val) |
1493 | return; |
1494 | |
1495 | /* |
1496 | * A JsonbValue passed as val should never have a type of jbvBinary, and |
1497 | * neither should any of its sub-components. Those values will be produced |
1498 | * by convertJsonbArray and convertJsonbObject, the results of which will |
1499 | * not be passed back to this function as an argument. |
1500 | */ |
1501 | |
1502 | if (IsAJsonbScalar(val)) |
1503 | convertJsonbScalar(buffer, header, val); |
1504 | else if (val->type == jbvArray) |
1505 | convertJsonbArray(buffer, header, val, level); |
1506 | else if (val->type == jbvObject) |
1507 | convertJsonbObject(buffer, header, val, level); |
1508 | else |
1509 | elog(ERROR, "unknown type of jsonb container to convert" ); |
1510 | } |
1511 | |
1512 | static void |
1513 | convertJsonbArray(StringInfo buffer, JEntry *, JsonbValue *val, int level) |
1514 | { |
1515 | int base_offset; |
1516 | int jentry_offset; |
1517 | int i; |
1518 | int totallen; |
1519 | uint32 ; |
1520 | int nElems = val->val.array.nElems; |
1521 | |
1522 | /* Remember where in the buffer this array starts. */ |
1523 | base_offset = buffer->len; |
1524 | |
1525 | /* Align to 4-byte boundary (any padding counts as part of my data) */ |
1526 | padBufferToInt(buffer); |
1527 | |
1528 | /* |
1529 | * Construct the header Jentry and store it in the beginning of the |
1530 | * variable-length payload. |
1531 | */ |
1532 | header = nElems | JB_FARRAY; |
1533 | if (val->val.array.rawScalar) |
1534 | { |
1535 | Assert(nElems == 1); |
1536 | Assert(level == 0); |
1537 | header |= JB_FSCALAR; |
1538 | } |
1539 | |
1540 | appendToBuffer(buffer, (char *) &header, sizeof(uint32)); |
1541 | |
1542 | /* Reserve space for the JEntries of the elements. */ |
1543 | jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nElems); |
1544 | |
1545 | totallen = 0; |
1546 | for (i = 0; i < nElems; i++) |
1547 | { |
1548 | JsonbValue *elem = &val->val.array.elems[i]; |
1549 | int len; |
1550 | JEntry meta; |
1551 | |
1552 | /* |
1553 | * Convert element, producing a JEntry and appending its |
1554 | * variable-length data to buffer |
1555 | */ |
1556 | convertJsonbValue(buffer, &meta, elem, level + 1); |
1557 | |
1558 | len = JBE_OFFLENFLD(meta); |
1559 | totallen += len; |
1560 | |
1561 | /* |
1562 | * Bail out if total variable-length data exceeds what will fit in a |
1563 | * JEntry length field. We check this in each iteration, not just |
1564 | * once at the end, to forestall possible integer overflow. |
1565 | */ |
1566 | if (totallen > JENTRY_OFFLENMASK) |
1567 | ereport(ERROR, |
1568 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
1569 | errmsg("total size of jsonb array elements exceeds the maximum of %u bytes" , |
1570 | JENTRY_OFFLENMASK))); |
1571 | |
1572 | /* |
1573 | * Convert each JB_OFFSET_STRIDE'th length to an offset. |
1574 | */ |
1575 | if ((i % JB_OFFSET_STRIDE) == 0) |
1576 | meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF; |
1577 | |
1578 | copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry)); |
1579 | jentry_offset += sizeof(JEntry); |
1580 | } |
1581 | |
1582 | /* Total data size is everything we've appended to buffer */ |
1583 | totallen = buffer->len - base_offset; |
1584 | |
1585 | /* Check length again, since we didn't include the metadata above */ |
1586 | if (totallen > JENTRY_OFFLENMASK) |
1587 | ereport(ERROR, |
1588 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
1589 | errmsg("total size of jsonb array elements exceeds the maximum of %u bytes" , |
1590 | JENTRY_OFFLENMASK))); |
1591 | |
1592 | /* Initialize the header of this node in the container's JEntry array */ |
1593 | *pheader = JENTRY_ISCONTAINER | totallen; |
1594 | } |
1595 | |
1596 | static void |
1597 | convertJsonbObject(StringInfo buffer, JEntry *, JsonbValue *val, int level) |
1598 | { |
1599 | int base_offset; |
1600 | int jentry_offset; |
1601 | int i; |
1602 | int totallen; |
1603 | uint32 ; |
1604 | int nPairs = val->val.object.nPairs; |
1605 | |
1606 | /* Remember where in the buffer this object starts. */ |
1607 | base_offset = buffer->len; |
1608 | |
1609 | /* Align to 4-byte boundary (any padding counts as part of my data) */ |
1610 | padBufferToInt(buffer); |
1611 | |
1612 | /* |
1613 | * Construct the header Jentry and store it in the beginning of the |
1614 | * variable-length payload. |
1615 | */ |
1616 | header = nPairs | JB_FOBJECT; |
1617 | appendToBuffer(buffer, (char *) &header, sizeof(uint32)); |
1618 | |
1619 | /* Reserve space for the JEntries of the keys and values. */ |
1620 | jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2); |
1621 | |
1622 | /* |
1623 | * Iterate over the keys, then over the values, since that is the ordering |
1624 | * we want in the on-disk representation. |
1625 | */ |
1626 | totallen = 0; |
1627 | for (i = 0; i < nPairs; i++) |
1628 | { |
1629 | JsonbPair *pair = &val->val.object.pairs[i]; |
1630 | int len; |
1631 | JEntry meta; |
1632 | |
1633 | /* |
1634 | * Convert key, producing a JEntry and appending its variable-length |
1635 | * data to buffer |
1636 | */ |
1637 | convertJsonbScalar(buffer, &meta, &pair->key); |
1638 | |
1639 | len = JBE_OFFLENFLD(meta); |
1640 | totallen += len; |
1641 | |
1642 | /* |
1643 | * Bail out if total variable-length data exceeds what will fit in a |
1644 | * JEntry length field. We check this in each iteration, not just |
1645 | * once at the end, to forestall possible integer overflow. |
1646 | */ |
1647 | if (totallen > JENTRY_OFFLENMASK) |
1648 | ereport(ERROR, |
1649 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
1650 | errmsg("total size of jsonb object elements exceeds the maximum of %u bytes" , |
1651 | JENTRY_OFFLENMASK))); |
1652 | |
1653 | /* |
1654 | * Convert each JB_OFFSET_STRIDE'th length to an offset. |
1655 | */ |
1656 | if ((i % JB_OFFSET_STRIDE) == 0) |
1657 | meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF; |
1658 | |
1659 | copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry)); |
1660 | jentry_offset += sizeof(JEntry); |
1661 | } |
1662 | for (i = 0; i < nPairs; i++) |
1663 | { |
1664 | JsonbPair *pair = &val->val.object.pairs[i]; |
1665 | int len; |
1666 | JEntry meta; |
1667 | |
1668 | /* |
1669 | * Convert value, producing a JEntry and appending its variable-length |
1670 | * data to buffer |
1671 | */ |
1672 | convertJsonbValue(buffer, &meta, &pair->value, level + 1); |
1673 | |
1674 | len = JBE_OFFLENFLD(meta); |
1675 | totallen += len; |
1676 | |
1677 | /* |
1678 | * Bail out if total variable-length data exceeds what will fit in a |
1679 | * JEntry length field. We check this in each iteration, not just |
1680 | * once at the end, to forestall possible integer overflow. |
1681 | */ |
1682 | if (totallen > JENTRY_OFFLENMASK) |
1683 | ereport(ERROR, |
1684 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
1685 | errmsg("total size of jsonb object elements exceeds the maximum of %u bytes" , |
1686 | JENTRY_OFFLENMASK))); |
1687 | |
1688 | /* |
1689 | * Convert each JB_OFFSET_STRIDE'th length to an offset. |
1690 | */ |
1691 | if (((i + nPairs) % JB_OFFSET_STRIDE) == 0) |
1692 | meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF; |
1693 | |
1694 | copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry)); |
1695 | jentry_offset += sizeof(JEntry); |
1696 | } |
1697 | |
1698 | /* Total data size is everything we've appended to buffer */ |
1699 | totallen = buffer->len - base_offset; |
1700 | |
1701 | /* Check length again, since we didn't include the metadata above */ |
1702 | if (totallen > JENTRY_OFFLENMASK) |
1703 | ereport(ERROR, |
1704 | (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
1705 | errmsg("total size of jsonb object elements exceeds the maximum of %u bytes" , |
1706 | JENTRY_OFFLENMASK))); |
1707 | |
1708 | /* Initialize the header of this node in the container's JEntry array */ |
1709 | *pheader = JENTRY_ISCONTAINER | totallen; |
1710 | } |
1711 | |
1712 | static void |
1713 | convertJsonbScalar(StringInfo buffer, JEntry *jentry, JsonbValue *scalarVal) |
1714 | { |
1715 | int numlen; |
1716 | short padlen; |
1717 | |
1718 | switch (scalarVal->type) |
1719 | { |
1720 | case jbvNull: |
1721 | *jentry = JENTRY_ISNULL; |
1722 | break; |
1723 | |
1724 | case jbvString: |
1725 | appendToBuffer(buffer, scalarVal->val.string.val, scalarVal->val.string.len); |
1726 | |
1727 | *jentry = scalarVal->val.string.len; |
1728 | break; |
1729 | |
1730 | case jbvNumeric: |
1731 | /* replace numeric NaN with string "NaN" */ |
1732 | if (numeric_is_nan(scalarVal->val.numeric)) |
1733 | { |
1734 | appendToBuffer(buffer, "NaN" , 3); |
1735 | *jentry = 3; |
1736 | break; |
1737 | } |
1738 | |
1739 | numlen = VARSIZE_ANY(scalarVal->val.numeric); |
1740 | padlen = padBufferToInt(buffer); |
1741 | |
1742 | appendToBuffer(buffer, (char *) scalarVal->val.numeric, numlen); |
1743 | |
1744 | *jentry = JENTRY_ISNUMERIC | (padlen + numlen); |
1745 | break; |
1746 | |
1747 | case jbvBool: |
1748 | *jentry = (scalarVal->val.boolean) ? |
1749 | JENTRY_ISBOOL_TRUE : JENTRY_ISBOOL_FALSE; |
1750 | break; |
1751 | |
1752 | default: |
1753 | elog(ERROR, "invalid jsonb scalar type" ); |
1754 | } |
1755 | } |
1756 | |
1757 | /* |
1758 | * Compare two jbvString JsonbValue values, a and b. |
1759 | * |
1760 | * This is a special qsort() comparator used to sort strings in certain |
1761 | * internal contexts where it is sufficient to have a well-defined sort order. |
1762 | * In particular, object pair keys are sorted according to this criteria to |
1763 | * facilitate cheap binary searches where we don't care about lexical sort |
1764 | * order. |
1765 | * |
1766 | * a and b are first sorted based on their length. If a tie-breaker is |
1767 | * required, only then do we consider string binary equality. |
1768 | */ |
1769 | static int |
1770 | lengthCompareJsonbStringValue(const void *a, const void *b) |
1771 | { |
1772 | const JsonbValue *va = (const JsonbValue *) a; |
1773 | const JsonbValue *vb = (const JsonbValue *) b; |
1774 | int res; |
1775 | |
1776 | Assert(va->type == jbvString); |
1777 | Assert(vb->type == jbvString); |
1778 | |
1779 | if (va->val.string.len == vb->val.string.len) |
1780 | { |
1781 | res = memcmp(va->val.string.val, vb->val.string.val, va->val.string.len); |
1782 | } |
1783 | else |
1784 | { |
1785 | res = (va->val.string.len > vb->val.string.len) ? 1 : -1; |
1786 | } |
1787 | |
1788 | return res; |
1789 | } |
1790 | |
1791 | /* |
1792 | * qsort_arg() comparator to compare JsonbPair values. |
1793 | * |
1794 | * Third argument 'binequal' may point to a bool. If it's set, *binequal is set |
1795 | * to true iff a and b have full binary equality, since some callers have an |
1796 | * interest in whether the two values are equal or merely equivalent. |
1797 | * |
1798 | * N.B: String comparisons here are "length-wise" |
1799 | * |
1800 | * Pairs with equals keys are ordered such that the order field is respected. |
1801 | */ |
1802 | static int |
1803 | lengthCompareJsonbPair(const void *a, const void *b, void *binequal) |
1804 | { |
1805 | const JsonbPair *pa = (const JsonbPair *) a; |
1806 | const JsonbPair *pb = (const JsonbPair *) b; |
1807 | int res; |
1808 | |
1809 | res = lengthCompareJsonbStringValue(&pa->key, &pb->key); |
1810 | if (res == 0 && binequal) |
1811 | *((bool *) binequal) = true; |
1812 | |
1813 | /* |
1814 | * Guarantee keeping order of equal pair. Unique algorithm will prefer |
1815 | * first element as value. |
1816 | */ |
1817 | if (res == 0) |
1818 | res = (pa->order > pb->order) ? -1 : 1; |
1819 | |
1820 | return res; |
1821 | } |
1822 | |
1823 | /* |
1824 | * Sort and unique-ify pairs in JsonbValue object |
1825 | */ |
1826 | static void |
1827 | uniqueifyJsonbObject(JsonbValue *object) |
1828 | { |
1829 | bool hasNonUniq = false; |
1830 | |
1831 | Assert(object->type == jbvObject); |
1832 | |
1833 | if (object->val.object.nPairs > 1) |
1834 | qsort_arg(object->val.object.pairs, object->val.object.nPairs, sizeof(JsonbPair), |
1835 | lengthCompareJsonbPair, &hasNonUniq); |
1836 | |
1837 | if (hasNonUniq) |
1838 | { |
1839 | JsonbPair *ptr = object->val.object.pairs + 1, |
1840 | *res = object->val.object.pairs; |
1841 | |
1842 | while (ptr - object->val.object.pairs < object->val.object.nPairs) |
1843 | { |
1844 | /* Avoid copying over duplicate */ |
1845 | if (lengthCompareJsonbStringValue(ptr, res) != 0) |
1846 | { |
1847 | res++; |
1848 | if (ptr != res) |
1849 | memcpy(res, ptr, sizeof(JsonbPair)); |
1850 | } |
1851 | ptr++; |
1852 | } |
1853 | |
1854 | object->val.object.nPairs = res + 1 - object->val.object.pairs; |
1855 | } |
1856 | } |
1857 | |