1/*-------------------------------------------------------------------------
2 *
3 * jsonb_util.c
4 * converting between Jsonb and JsonbValues, and iterating.
5 *
6 * Copyright (c) 2014-2019, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/utils/adt/jsonb_util.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "catalog/pg_collation.h"
17#include "miscadmin.h"
18#include "utils/builtins.h"
19#include "utils/hashutils.h"
20#include "utils/jsonb.h"
21#include "utils/memutils.h"
22#include "utils/varlena.h"
23
24/*
25 * Maximum number of elements in an array (or key/value pairs in an object).
26 * This is limited by two things: the size of the JEntry array must fit
27 * in MaxAllocSize, and the number of elements (or pairs) must fit in the bits
28 * reserved for that in the JsonbContainer.header field.
29 *
30 * (The total size of an array's or object's elements is also limited by
31 * JENTRY_OFFLENMASK, but we're not concerned about that here.)
32 */
33#define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK))
34#define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK))
35
36static void fillJsonbValue(JsonbContainer *container, int index,
37 char *base_addr, uint32 offset,
38 JsonbValue *result);
39static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b);
40static int compareJsonbScalarValue(JsonbValue *a, JsonbValue *b);
41static Jsonb *convertToJsonb(JsonbValue *val);
42static void convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
43static void convertJsonbArray(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
44static void convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
45static void convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal);
46
47static int reserveFromBuffer(StringInfo buffer, int len);
48static void appendToBuffer(StringInfo buffer, const char *data, int len);
49static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len);
50static short padBufferToInt(StringInfo buffer);
51
52static JsonbIterator *iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent);
53static JsonbIterator *freeAndGetParent(JsonbIterator *it);
54static JsonbParseState *pushState(JsonbParseState **pstate);
55static void appendKey(JsonbParseState *pstate, JsonbValue *scalarVal);
56static void appendValue(JsonbParseState *pstate, JsonbValue *scalarVal);
57static void appendElement(JsonbParseState *pstate, JsonbValue *scalarVal);
58static int lengthCompareJsonbStringValue(const void *a, const void *b);
59static int lengthCompareJsonbPair(const void *a, const void *b, void *arg);
60static void uniqueifyJsonbObject(JsonbValue *object);
61static JsonbValue *pushJsonbValueScalar(JsonbParseState **pstate,
62 JsonbIteratorToken seq,
63 JsonbValue *scalarVal);
64
65/*
66 * Turn an in-memory JsonbValue into a Jsonb for on-disk storage.
67 *
68 * There isn't a JsonbToJsonbValue(), because generally we find it more
69 * convenient to directly iterate through the Jsonb representation and only
70 * really convert nested scalar values. JsonbIteratorNext() does this, so that
71 * clients of the iteration code don't have to directly deal with the binary
72 * representation (JsonbDeepContains() is a notable exception, although all
73 * exceptions are internal to this module). In general, functions that accept
74 * a JsonbValue argument are concerned with the manipulation of scalar values,
75 * or simple containers of scalar values, where it would be inconvenient to
76 * deal with a great amount of other state.
77 */
78Jsonb *
79JsonbValueToJsonb(JsonbValue *val)
80{
81 Jsonb *out;
82
83 if (IsAJsonbScalar(val))
84 {
85 /* Scalar value */
86 JsonbParseState *pstate = NULL;
87 JsonbValue *res;
88 JsonbValue scalarArray;
89
90 scalarArray.type = jbvArray;
91 scalarArray.val.array.rawScalar = true;
92 scalarArray.val.array.nElems = 1;
93
94 pushJsonbValue(&pstate, WJB_BEGIN_ARRAY, &scalarArray);
95 pushJsonbValue(&pstate, WJB_ELEM, val);
96 res = pushJsonbValue(&pstate, WJB_END_ARRAY, NULL);
97
98 out = convertToJsonb(res);
99 }
100 else if (val->type == jbvObject || val->type == jbvArray)
101 {
102 out = convertToJsonb(val);
103 }
104 else
105 {
106 Assert(val->type == jbvBinary);
107 out = palloc(VARHDRSZ + val->val.binary.len);
108 SET_VARSIZE(out, VARHDRSZ + val->val.binary.len);
109 memcpy(VARDATA(out), val->val.binary.data, val->val.binary.len);
110 }
111
112 return out;
113}
114
115/*
116 * Get the offset of the variable-length portion of a Jsonb node within
117 * the variable-length-data part of its container. The node is identified
118 * by index within the container's JEntry array.
119 */
120uint32
121getJsonbOffset(const JsonbContainer *jc, int index)
122{
123 uint32 offset = 0;
124 int i;
125
126 /*
127 * Start offset of this entry is equal to the end offset of the previous
128 * entry. Walk backwards to the most recent entry stored as an end
129 * offset, returning that offset plus any lengths in between.
130 */
131 for (i = index - 1; i >= 0; i--)
132 {
133 offset += JBE_OFFLENFLD(jc->children[i]);
134 if (JBE_HAS_OFF(jc->children[i]))
135 break;
136 }
137
138 return offset;
139}
140
141/*
142 * Get the length of the variable-length portion of a Jsonb node.
143 * The node is identified by index within the container's JEntry array.
144 */
145uint32
146getJsonbLength(const JsonbContainer *jc, int index)
147{
148 uint32 off;
149 uint32 len;
150
151 /*
152 * If the length is stored directly in the JEntry, just return it.
153 * Otherwise, get the begin offset of the entry, and subtract that from
154 * the stored end+1 offset.
155 */
156 if (JBE_HAS_OFF(jc->children[index]))
157 {
158 off = getJsonbOffset(jc, index);
159 len = JBE_OFFLENFLD(jc->children[index]) - off;
160 }
161 else
162 len = JBE_OFFLENFLD(jc->children[index]);
163
164 return len;
165}
166
167/*
168 * BT comparator worker function. Returns an integer less than, equal to, or
169 * greater than zero, indicating whether a is less than, equal to, or greater
170 * than b. Consistent with the requirements for a B-Tree operator class
171 *
172 * Strings are compared lexically, in contrast with other places where we use a
173 * much simpler comparator logic for searching through Strings. Since this is
174 * called from B-Tree support function 1, we're careful about not leaking
175 * memory here.
176 */
177int
178compareJsonbContainers(JsonbContainer *a, JsonbContainer *b)
179{
180 JsonbIterator *ita,
181 *itb;
182 int res = 0;
183
184 ita = JsonbIteratorInit(a);
185 itb = JsonbIteratorInit(b);
186
187 do
188 {
189 JsonbValue va,
190 vb;
191 JsonbIteratorToken ra,
192 rb;
193
194 ra = JsonbIteratorNext(&ita, &va, false);
195 rb = JsonbIteratorNext(&itb, &vb, false);
196
197 if (ra == rb)
198 {
199 if (ra == WJB_DONE)
200 {
201 /* Decisively equal */
202 break;
203 }
204
205 if (ra == WJB_END_ARRAY || ra == WJB_END_OBJECT)
206 {
207 /*
208 * There is no array or object to compare at this stage of
209 * processing. jbvArray/jbvObject values are compared
210 * initially, at the WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT
211 * tokens.
212 */
213 continue;
214 }
215
216 if (va.type == vb.type)
217 {
218 switch (va.type)
219 {
220 case jbvString:
221 case jbvNull:
222 case jbvNumeric:
223 case jbvBool:
224 res = compareJsonbScalarValue(&va, &vb);
225 break;
226 case jbvArray:
227
228 /*
229 * This could be a "raw scalar" pseudo array. That's
230 * a special case here though, since we still want the
231 * general type-based comparisons to apply, and as far
232 * as we're concerned a pseudo array is just a scalar.
233 */
234 if (va.val.array.rawScalar != vb.val.array.rawScalar)
235 res = (va.val.array.rawScalar) ? -1 : 1;
236 if (va.val.array.nElems != vb.val.array.nElems)
237 res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1;
238 break;
239 case jbvObject:
240 if (va.val.object.nPairs != vb.val.object.nPairs)
241 res = (va.val.object.nPairs > vb.val.object.nPairs) ? 1 : -1;
242 break;
243 case jbvBinary:
244 elog(ERROR, "unexpected jbvBinary value");
245 }
246 }
247 else
248 {
249 /* Type-defined order */
250 res = (va.type > vb.type) ? 1 : -1;
251 }
252 }
253 else
254 {
255 /*
256 * It's safe to assume that the types differed, and that the va
257 * and vb values passed were set.
258 *
259 * If the two values were of the same container type, then there'd
260 * have been a chance to observe the variation in the number of
261 * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're
262 * either two heterogeneously-typed containers, or a container and
263 * some scalar type.
264 *
265 * We don't have to consider the WJB_END_ARRAY and WJB_END_OBJECT
266 * cases here, because we would have seen the corresponding
267 * WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT tokens first, and
268 * concluded that they don't match.
269 */
270 Assert(ra != WJB_END_ARRAY && ra != WJB_END_OBJECT);
271 Assert(rb != WJB_END_ARRAY && rb != WJB_END_OBJECT);
272
273 Assert(va.type != vb.type);
274 Assert(va.type != jbvBinary);
275 Assert(vb.type != jbvBinary);
276 /* Type-defined order */
277 res = (va.type > vb.type) ? 1 : -1;
278 }
279 }
280 while (res == 0);
281
282 while (ita != NULL)
283 {
284 JsonbIterator *i = ita->parent;
285
286 pfree(ita);
287 ita = i;
288 }
289 while (itb != NULL)
290 {
291 JsonbIterator *i = itb->parent;
292
293 pfree(itb);
294 itb = i;
295 }
296
297 return res;
298}
299
300/*
301 * Find value in object (i.e. the "value" part of some key/value pair in an
302 * object), or find a matching element if we're looking through an array. Do
303 * so on the basis of equality of the object keys only, or alternatively
304 * element values only, with a caller-supplied value "key". The "flags"
305 * argument allows the caller to specify which container types are of interest.
306 *
307 * This exported utility function exists to facilitate various cases concerned
308 * with "containment". If asked to look through an object, the caller had
309 * better pass a Jsonb String, because their keys can only be strings.
310 * Otherwise, for an array, any type of JsonbValue will do.
311 *
312 * In order to proceed with the search, it is necessary for callers to have
313 * both specified an interest in exactly one particular container type with an
314 * appropriate flag, as well as having the pointed-to Jsonb container be of
315 * one of those same container types at the top level. (Actually, we just do
316 * whichever makes sense to save callers the trouble of figuring it out - at
317 * most one can make sense, because the container either points to an array
318 * (possibly a "raw scalar" pseudo array) or an object.)
319 *
320 * Note that we can return a jbvBinary JsonbValue if this is called on an
321 * object, but we never do so on an array. If the caller asks to look through
322 * a container type that is not of the type pointed to by the container,
323 * immediately fall through and return NULL. If we cannot find the value,
324 * return NULL. Otherwise, return palloc()'d copy of value.
325 */
326JsonbValue *
327findJsonbValueFromContainer(JsonbContainer *container, uint32 flags,
328 JsonbValue *key)
329{
330 JEntry *children = container->children;
331 int count = JsonContainerSize(container);
332 JsonbValue *result;
333
334 Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0);
335
336 /* Quick out without a palloc cycle if object/array is empty */
337 if (count <= 0)
338 return NULL;
339
340 result = palloc(sizeof(JsonbValue));
341
342 if ((flags & JB_FARRAY) && JsonContainerIsArray(container))
343 {
344 char *base_addr = (char *) (children + count);
345 uint32 offset = 0;
346 int i;
347
348 for (i = 0; i < count; i++)
349 {
350 fillJsonbValue(container, i, base_addr, offset, result);
351
352 if (key->type == result->type)
353 {
354 if (equalsJsonbScalarValue(key, result))
355 return result;
356 }
357
358 JBE_ADVANCE_OFFSET(offset, children[i]);
359 }
360 }
361 else if ((flags & JB_FOBJECT) && JsonContainerIsObject(container))
362 {
363 /* Since this is an object, account for *Pairs* of Jentrys */
364 char *base_addr = (char *) (children + count * 2);
365 uint32 stopLow = 0,
366 stopHigh = count;
367
368 /* Object key passed by caller must be a string */
369 Assert(key->type == jbvString);
370
371 /* Binary search on object/pair keys *only* */
372 while (stopLow < stopHigh)
373 {
374 uint32 stopMiddle;
375 int difference;
376 JsonbValue candidate;
377
378 stopMiddle = stopLow + (stopHigh - stopLow) / 2;
379
380 candidate.type = jbvString;
381 candidate.val.string.val =
382 base_addr + getJsonbOffset(container, stopMiddle);
383 candidate.val.string.len = getJsonbLength(container, stopMiddle);
384
385 difference = lengthCompareJsonbStringValue(&candidate, key);
386
387 if (difference == 0)
388 {
389 /* Found our key, return corresponding value */
390 int index = stopMiddle + count;
391
392 fillJsonbValue(container, index, base_addr,
393 getJsonbOffset(container, index),
394 result);
395
396 return result;
397 }
398 else
399 {
400 if (difference < 0)
401 stopLow = stopMiddle + 1;
402 else
403 stopHigh = stopMiddle;
404 }
405 }
406 }
407
408 /* Not found */
409 pfree(result);
410 return NULL;
411}
412
413/*
414 * Get i-th value of a Jsonb array.
415 *
416 * Returns palloc()'d copy of the value, or NULL if it does not exist.
417 */
418JsonbValue *
419getIthJsonbValueFromContainer(JsonbContainer *container, uint32 i)
420{
421 JsonbValue *result;
422 char *base_addr;
423 uint32 nelements;
424
425 if (!JsonContainerIsArray(container))
426 elog(ERROR, "not a jsonb array");
427
428 nelements = JsonContainerSize(container);
429 base_addr = (char *) &container->children[nelements];
430
431 if (i >= nelements)
432 return NULL;
433
434 result = palloc(sizeof(JsonbValue));
435
436 fillJsonbValue(container, i, base_addr,
437 getJsonbOffset(container, i),
438 result);
439
440 return result;
441}
442
443/*
444 * A helper function to fill in a JsonbValue to represent an element of an
445 * array, or a key or value of an object.
446 *
447 * The node's JEntry is at container->children[index], and its variable-length
448 * data is at base_addr + offset. We make the caller determine the offset
449 * since in many cases the caller can amortize that work across multiple
450 * children. When it can't, it can just call getJsonbOffset().
451 *
452 * A nested array or object will be returned as jbvBinary, ie. it won't be
453 * expanded.
454 */
455static void
456fillJsonbValue(JsonbContainer *container, int index,
457 char *base_addr, uint32 offset,
458 JsonbValue *result)
459{
460 JEntry entry = container->children[index];
461
462 if (JBE_ISNULL(entry))
463 {
464 result->type = jbvNull;
465 }
466 else if (JBE_ISSTRING(entry))
467 {
468 result->type = jbvString;
469 result->val.string.val = base_addr + offset;
470 result->val.string.len = getJsonbLength(container, index);
471 Assert(result->val.string.len >= 0);
472 }
473 else if (JBE_ISNUMERIC(entry))
474 {
475 result->type = jbvNumeric;
476 result->val.numeric = (Numeric) (base_addr + INTALIGN(offset));
477 }
478 else if (JBE_ISBOOL_TRUE(entry))
479 {
480 result->type = jbvBool;
481 result->val.boolean = true;
482 }
483 else if (JBE_ISBOOL_FALSE(entry))
484 {
485 result->type = jbvBool;
486 result->val.boolean = false;
487 }
488 else
489 {
490 Assert(JBE_ISCONTAINER(entry));
491 result->type = jbvBinary;
492 /* Remove alignment padding from data pointer and length */
493 result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(offset));
494 result->val.binary.len = getJsonbLength(container, index) -
495 (INTALIGN(offset) - offset);
496 }
497}
498
499/*
500 * Push JsonbValue into JsonbParseState.
501 *
502 * Used when parsing JSON tokens to form Jsonb, or when converting an in-memory
503 * JsonbValue to a Jsonb.
504 *
505 * Initial state of *JsonbParseState is NULL, since it'll be allocated here
506 * originally (caller will get JsonbParseState back by reference).
507 *
508 * Only sequential tokens pertaining to non-container types should pass a
509 * JsonbValue. There is one exception -- WJB_BEGIN_ARRAY callers may pass a
510 * "raw scalar" pseudo array to append it - the actual scalar should be passed
511 * next and it will be added as the only member of the array.
512 *
513 * Values of type jvbBinary, which are rolled up arrays and objects,
514 * are unpacked before being added to the result.
515 */
516JsonbValue *
517pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq,
518 JsonbValue *jbval)
519{
520 JsonbIterator *it;
521 JsonbValue *res = NULL;
522 JsonbValue v;
523 JsonbIteratorToken tok;
524
525 if (!jbval || (seq != WJB_ELEM && seq != WJB_VALUE) ||
526 jbval->type != jbvBinary)
527 {
528 /* drop through */
529 return pushJsonbValueScalar(pstate, seq, jbval);
530 }
531
532 /* unpack the binary and add each piece to the pstate */
533 it = JsonbIteratorInit(jbval->val.binary.data);
534 while ((tok = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
535 res = pushJsonbValueScalar(pstate, tok,
536 tok < WJB_BEGIN_ARRAY ? &v : NULL);
537
538 return res;
539}
540
541/*
542 * Do the actual pushing, with only scalar or pseudo-scalar-array values
543 * accepted.
544 */
545static JsonbValue *
546pushJsonbValueScalar(JsonbParseState **pstate, JsonbIteratorToken seq,
547 JsonbValue *scalarVal)
548{
549 JsonbValue *result = NULL;
550
551 switch (seq)
552 {
553 case WJB_BEGIN_ARRAY:
554 Assert(!scalarVal || scalarVal->val.array.rawScalar);
555 *pstate = pushState(pstate);
556 result = &(*pstate)->contVal;
557 (*pstate)->contVal.type = jbvArray;
558 (*pstate)->contVal.val.array.nElems = 0;
559 (*pstate)->contVal.val.array.rawScalar = (scalarVal &&
560 scalarVal->val.array.rawScalar);
561 if (scalarVal && scalarVal->val.array.nElems > 0)
562 {
563 /* Assume that this array is still really a scalar */
564 Assert(scalarVal->type == jbvArray);
565 (*pstate)->size = scalarVal->val.array.nElems;
566 }
567 else
568 {
569 (*pstate)->size = 4;
570 }
571 (*pstate)->contVal.val.array.elems = palloc(sizeof(JsonbValue) *
572 (*pstate)->size);
573 break;
574 case WJB_BEGIN_OBJECT:
575 Assert(!scalarVal);
576 *pstate = pushState(pstate);
577 result = &(*pstate)->contVal;
578 (*pstate)->contVal.type = jbvObject;
579 (*pstate)->contVal.val.object.nPairs = 0;
580 (*pstate)->size = 4;
581 (*pstate)->contVal.val.object.pairs = palloc(sizeof(JsonbPair) *
582 (*pstate)->size);
583 break;
584 case WJB_KEY:
585 Assert(scalarVal->type == jbvString);
586 appendKey(*pstate, scalarVal);
587 break;
588 case WJB_VALUE:
589 Assert(IsAJsonbScalar(scalarVal));
590 appendValue(*pstate, scalarVal);
591 break;
592 case WJB_ELEM:
593 Assert(IsAJsonbScalar(scalarVal));
594 appendElement(*pstate, scalarVal);
595 break;
596 case WJB_END_OBJECT:
597 uniqueifyJsonbObject(&(*pstate)->contVal);
598 /* fall through! */
599 case WJB_END_ARRAY:
600 /* Steps here common to WJB_END_OBJECT case */
601 Assert(!scalarVal);
602 result = &(*pstate)->contVal;
603
604 /*
605 * Pop stack and push current array/object as value in parent
606 * array/object
607 */
608 *pstate = (*pstate)->next;
609 if (*pstate)
610 {
611 switch ((*pstate)->contVal.type)
612 {
613 case jbvArray:
614 appendElement(*pstate, result);
615 break;
616 case jbvObject:
617 appendValue(*pstate, result);
618 break;
619 default:
620 elog(ERROR, "invalid jsonb container type");
621 }
622 }
623 break;
624 default:
625 elog(ERROR, "unrecognized jsonb sequential processing token");
626 }
627
628 return result;
629}
630
631/*
632 * pushJsonbValue() worker: Iteration-like forming of Jsonb
633 */
634static JsonbParseState *
635pushState(JsonbParseState **pstate)
636{
637 JsonbParseState *ns = palloc(sizeof(JsonbParseState));
638
639 ns->next = *pstate;
640 return ns;
641}
642
643/*
644 * pushJsonbValue() worker: Append a pair key to state when generating a Jsonb
645 */
646static void
647appendKey(JsonbParseState *pstate, JsonbValue *string)
648{
649 JsonbValue *object = &pstate->contVal;
650
651 Assert(object->type == jbvObject);
652 Assert(string->type == jbvString);
653
654 if (object->val.object.nPairs >= JSONB_MAX_PAIRS)
655 ereport(ERROR,
656 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
657 errmsg("number of jsonb object pairs exceeds the maximum allowed (%zu)",
658 JSONB_MAX_PAIRS)));
659
660 if (object->val.object.nPairs >= pstate->size)
661 {
662 pstate->size *= 2;
663 object->val.object.pairs = repalloc(object->val.object.pairs,
664 sizeof(JsonbPair) * pstate->size);
665 }
666
667 object->val.object.pairs[object->val.object.nPairs].key = *string;
668 object->val.object.pairs[object->val.object.nPairs].order = object->val.object.nPairs;
669}
670
671/*
672 * pushJsonbValue() worker: Append a pair value to state when generating a
673 * Jsonb
674 */
675static void
676appendValue(JsonbParseState *pstate, JsonbValue *scalarVal)
677{
678 JsonbValue *object = &pstate->contVal;
679
680 Assert(object->type == jbvObject);
681
682 object->val.object.pairs[object->val.object.nPairs++].value = *scalarVal;
683}
684
685/*
686 * pushJsonbValue() worker: Append an element to state when generating a Jsonb
687 */
688static void
689appendElement(JsonbParseState *pstate, JsonbValue *scalarVal)
690{
691 JsonbValue *array = &pstate->contVal;
692
693 Assert(array->type == jbvArray);
694
695 if (array->val.array.nElems >= JSONB_MAX_ELEMS)
696 ereport(ERROR,
697 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
698 errmsg("number of jsonb array elements exceeds the maximum allowed (%zu)",
699 JSONB_MAX_ELEMS)));
700
701 if (array->val.array.nElems >= pstate->size)
702 {
703 pstate->size *= 2;
704 array->val.array.elems = repalloc(array->val.array.elems,
705 sizeof(JsonbValue) * pstate->size);
706 }
707
708 array->val.array.elems[array->val.array.nElems++] = *scalarVal;
709}
710
711/*
712 * Given a JsonbContainer, expand to JsonbIterator to iterate over items
713 * fully expanded to in-memory representation for manipulation.
714 *
715 * See JsonbIteratorNext() for notes on memory management.
716 */
717JsonbIterator *
718JsonbIteratorInit(JsonbContainer *container)
719{
720 return iteratorFromContainer(container, NULL);
721}
722
723/*
724 * Get next JsonbValue while iterating
725 *
726 * Caller should initially pass their own, original iterator. They may get
727 * back a child iterator palloc()'d here instead. The function can be relied
728 * on to free those child iterators, lest the memory allocated for highly
729 * nested objects become unreasonable, but only if callers don't end iteration
730 * early (by breaking upon having found something in a search, for example).
731 *
732 * Callers in such a scenario, that are particularly sensitive to leaking
733 * memory in a long-lived context may walk the ancestral tree from the final
734 * iterator we left them with to its oldest ancestor, pfree()ing as they go.
735 * They do not have to free any other memory previously allocated for iterators
736 * but not accessible as direct ancestors of the iterator they're last passed
737 * back.
738 *
739 * Returns "Jsonb sequential processing" token value. Iterator "state"
740 * reflects the current stage of the process in a less granular fashion, and is
741 * mostly used here to track things internally with respect to particular
742 * iterators.
743 *
744 * Clients of this function should not have to handle any jbvBinary values
745 * (since recursive calls will deal with this), provided skipNested is false.
746 * It is our job to expand the jbvBinary representation without bothering them
747 * with it. However, clients should not take it upon themselves to touch array
748 * or Object element/pair buffers, since their element/pair pointers are
749 * garbage. Also, *val will not be set when returning WJB_END_ARRAY or
750 * WJB_END_OBJECT, on the assumption that it's only useful to access values
751 * when recursing in.
752 */
753JsonbIteratorToken
754JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested)
755{
756 if (*it == NULL)
757 return WJB_DONE;
758
759 /*
760 * When stepping into a nested container, we jump back here to start
761 * processing the child. We will not recurse further in one call, because
762 * processing the child will always begin in JBI_ARRAY_START or
763 * JBI_OBJECT_START state.
764 */
765recurse:
766 switch ((*it)->state)
767 {
768 case JBI_ARRAY_START:
769 /* Set v to array on first array call */
770 val->type = jbvArray;
771 val->val.array.nElems = (*it)->nElems;
772
773 /*
774 * v->val.array.elems is not actually set, because we aren't doing
775 * a full conversion
776 */
777 val->val.array.rawScalar = (*it)->isScalar;
778 (*it)->curIndex = 0;
779 (*it)->curDataOffset = 0;
780 (*it)->curValueOffset = 0; /* not actually used */
781 /* Set state for next call */
782 (*it)->state = JBI_ARRAY_ELEM;
783 return WJB_BEGIN_ARRAY;
784
785 case JBI_ARRAY_ELEM:
786 if ((*it)->curIndex >= (*it)->nElems)
787 {
788 /*
789 * All elements within array already processed. Report this
790 * to caller, and give it back original parent iterator (which
791 * independently tracks iteration progress at its level of
792 * nesting).
793 */
794 *it = freeAndGetParent(*it);
795 return WJB_END_ARRAY;
796 }
797
798 fillJsonbValue((*it)->container, (*it)->curIndex,
799 (*it)->dataProper, (*it)->curDataOffset,
800 val);
801
802 JBE_ADVANCE_OFFSET((*it)->curDataOffset,
803 (*it)->children[(*it)->curIndex]);
804 (*it)->curIndex++;
805
806 if (!IsAJsonbScalar(val) && !skipNested)
807 {
808 /* Recurse into container. */
809 *it = iteratorFromContainer(val->val.binary.data, *it);
810 goto recurse;
811 }
812 else
813 {
814 /*
815 * Scalar item in array, or a container and caller didn't want
816 * us to recurse into it.
817 */
818 return WJB_ELEM;
819 }
820
821 case JBI_OBJECT_START:
822 /* Set v to object on first object call */
823 val->type = jbvObject;
824 val->val.object.nPairs = (*it)->nElems;
825
826 /*
827 * v->val.object.pairs is not actually set, because we aren't
828 * doing a full conversion
829 */
830 (*it)->curIndex = 0;
831 (*it)->curDataOffset = 0;
832 (*it)->curValueOffset = getJsonbOffset((*it)->container,
833 (*it)->nElems);
834 /* Set state for next call */
835 (*it)->state = JBI_OBJECT_KEY;
836 return WJB_BEGIN_OBJECT;
837
838 case JBI_OBJECT_KEY:
839 if ((*it)->curIndex >= (*it)->nElems)
840 {
841 /*
842 * All pairs within object already processed. Report this to
843 * caller, and give it back original containing iterator
844 * (which independently tracks iteration progress at its level
845 * of nesting).
846 */
847 *it = freeAndGetParent(*it);
848 return WJB_END_OBJECT;
849 }
850 else
851 {
852 /* Return key of a key/value pair. */
853 fillJsonbValue((*it)->container, (*it)->curIndex,
854 (*it)->dataProper, (*it)->curDataOffset,
855 val);
856 if (val->type != jbvString)
857 elog(ERROR, "unexpected jsonb type as object key");
858
859 /* Set state for next call */
860 (*it)->state = JBI_OBJECT_VALUE;
861 return WJB_KEY;
862 }
863
864 case JBI_OBJECT_VALUE:
865 /* Set state for next call */
866 (*it)->state = JBI_OBJECT_KEY;
867
868 fillJsonbValue((*it)->container, (*it)->curIndex + (*it)->nElems,
869 (*it)->dataProper, (*it)->curValueOffset,
870 val);
871
872 JBE_ADVANCE_OFFSET((*it)->curDataOffset,
873 (*it)->children[(*it)->curIndex]);
874 JBE_ADVANCE_OFFSET((*it)->curValueOffset,
875 (*it)->children[(*it)->curIndex + (*it)->nElems]);
876 (*it)->curIndex++;
877
878 /*
879 * Value may be a container, in which case we recurse with new,
880 * child iterator (unless the caller asked not to, by passing
881 * skipNested).
882 */
883 if (!IsAJsonbScalar(val) && !skipNested)
884 {
885 *it = iteratorFromContainer(val->val.binary.data, *it);
886 goto recurse;
887 }
888 else
889 return WJB_VALUE;
890 }
891
892 elog(ERROR, "invalid iterator state");
893 return -1;
894}
895
896/*
897 * Initialize an iterator for iterating all elements in a container.
898 */
899static JsonbIterator *
900iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent)
901{
902 JsonbIterator *it;
903
904 it = palloc0(sizeof(JsonbIterator));
905 it->container = container;
906 it->parent = parent;
907 it->nElems = JsonContainerSize(container);
908
909 /* Array starts just after header */
910 it->children = container->children;
911
912 switch (container->header & (JB_FARRAY | JB_FOBJECT))
913 {
914 case JB_FARRAY:
915 it->dataProper =
916 (char *) it->children + it->nElems * sizeof(JEntry);
917 it->isScalar = JsonContainerIsScalar(container);
918 /* This is either a "raw scalar", or an array */
919 Assert(!it->isScalar || it->nElems == 1);
920
921 it->state = JBI_ARRAY_START;
922 break;
923
924 case JB_FOBJECT:
925 it->dataProper =
926 (char *) it->children + it->nElems * sizeof(JEntry) * 2;
927 it->state = JBI_OBJECT_START;
928 break;
929
930 default:
931 elog(ERROR, "unknown type of jsonb container");
932 }
933
934 return it;
935}
936
937/*
938 * JsonbIteratorNext() worker: Return parent, while freeing memory for current
939 * iterator
940 */
941static JsonbIterator *
942freeAndGetParent(JsonbIterator *it)
943{
944 JsonbIterator *v = it->parent;
945
946 pfree(it);
947 return v;
948}
949
950/*
951 * Worker for "contains" operator's function
952 *
953 * Formally speaking, containment is top-down, unordered subtree isomorphism.
954 *
955 * Takes iterators that belong to some container type. These iterators
956 * "belong" to those values in the sense that they've just been initialized in
957 * respect of them by the caller (perhaps in a nested fashion).
958 *
959 * "val" is lhs Jsonb, and mContained is rhs Jsonb when called from top level.
960 * We determine if mContained is contained within val.
961 */
962bool
963JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained)
964{
965 JsonbValue vval,
966 vcontained;
967 JsonbIteratorToken rval,
968 rcont;
969
970 /*
971 * Guard against stack overflow due to overly complex Jsonb.
972 *
973 * Functions called here independently take this precaution, but that
974 * might not be sufficient since this is also a recursive function.
975 */
976 check_stack_depth();
977
978 rval = JsonbIteratorNext(val, &vval, false);
979 rcont = JsonbIteratorNext(mContained, &vcontained, false);
980
981 if (rval != rcont)
982 {
983 /*
984 * The differing return values can immediately be taken as indicating
985 * two differing container types at this nesting level, which is
986 * sufficient reason to give up entirely (but it should be the case
987 * that they're both some container type).
988 */
989 Assert(rval == WJB_BEGIN_OBJECT || rval == WJB_BEGIN_ARRAY);
990 Assert(rcont == WJB_BEGIN_OBJECT || rcont == WJB_BEGIN_ARRAY);
991 return false;
992 }
993 else if (rcont == WJB_BEGIN_OBJECT)
994 {
995 Assert(vval.type == jbvObject);
996 Assert(vcontained.type == jbvObject);
997
998 /*
999 * If the lhs has fewer pairs than the rhs, it can't possibly contain
1000 * the rhs. (This conclusion is safe only because we de-duplicate
1001 * keys in all Jsonb objects; thus there can be no corresponding
1002 * optimization in the array case.) The case probably won't arise
1003 * often, but since it's such a cheap check we may as well make it.
1004 */
1005 if (vval.val.object.nPairs < vcontained.val.object.nPairs)
1006 return false;
1007
1008 /* Work through rhs "is it contained within?" object */
1009 for (;;)
1010 {
1011 JsonbValue *lhsVal; /* lhsVal is from pair in lhs object */
1012
1013 rcont = JsonbIteratorNext(mContained, &vcontained, false);
1014
1015 /*
1016 * When we get through caller's rhs "is it contained within?"
1017 * object without failing to find one of its values, it's
1018 * contained.
1019 */
1020 if (rcont == WJB_END_OBJECT)
1021 return true;
1022
1023 Assert(rcont == WJB_KEY);
1024
1025 /* First, find value by key... */
1026 lhsVal = findJsonbValueFromContainer((*val)->container,
1027 JB_FOBJECT,
1028 &vcontained);
1029
1030 if (!lhsVal)
1031 return false;
1032
1033 /*
1034 * ...at this stage it is apparent that there is at least a key
1035 * match for this rhs pair.
1036 */
1037 rcont = JsonbIteratorNext(mContained, &vcontained, true);
1038
1039 Assert(rcont == WJB_VALUE);
1040
1041 /*
1042 * Compare rhs pair's value with lhs pair's value just found using
1043 * key
1044 */
1045 if (lhsVal->type != vcontained.type)
1046 {
1047 return false;
1048 }
1049 else if (IsAJsonbScalar(lhsVal))
1050 {
1051 if (!equalsJsonbScalarValue(lhsVal, &vcontained))
1052 return false;
1053 }
1054 else
1055 {
1056 /* Nested container value (object or array) */
1057 JsonbIterator *nestval,
1058 *nestContained;
1059
1060 Assert(lhsVal->type == jbvBinary);
1061 Assert(vcontained.type == jbvBinary);
1062
1063 nestval = JsonbIteratorInit(lhsVal->val.binary.data);
1064 nestContained = JsonbIteratorInit(vcontained.val.binary.data);
1065
1066 /*
1067 * Match "value" side of rhs datum object's pair recursively.
1068 * It's a nested structure.
1069 *
1070 * Note that nesting still has to "match up" at the right
1071 * nesting sub-levels. However, there need only be zero or
1072 * more matching pairs (or elements) at each nesting level
1073 * (provided the *rhs* pairs/elements *all* match on each
1074 * level), which enables searching nested structures for a
1075 * single String or other primitive type sub-datum quite
1076 * effectively (provided the user constructed the rhs nested
1077 * structure such that we "know where to look").
1078 *
1079 * In other words, the mapping of container nodes in the rhs
1080 * "vcontained" Jsonb to internal nodes on the lhs is
1081 * injective, and parent-child edges on the rhs must be mapped
1082 * to parent-child edges on the lhs to satisfy the condition
1083 * of containment (plus of course the mapped nodes must be
1084 * equal).
1085 */
1086 if (!JsonbDeepContains(&nestval, &nestContained))
1087 return false;
1088 }
1089 }
1090 }
1091 else if (rcont == WJB_BEGIN_ARRAY)
1092 {
1093 JsonbValue *lhsConts = NULL;
1094 uint32 nLhsElems = vval.val.array.nElems;
1095
1096 Assert(vval.type == jbvArray);
1097 Assert(vcontained.type == jbvArray);
1098
1099 /*
1100 * Handle distinction between "raw scalar" pseudo arrays, and real
1101 * arrays.
1102 *
1103 * A raw scalar may contain another raw scalar, and an array may
1104 * contain a raw scalar, but a raw scalar may not contain an array. We
1105 * don't do something like this for the object case, since objects can
1106 * only contain pairs, never raw scalars (a pair is represented by an
1107 * rhs object argument with a single contained pair).
1108 */
1109 if (vval.val.array.rawScalar && !vcontained.val.array.rawScalar)
1110 return false;
1111
1112 /* Work through rhs "is it contained within?" array */
1113 for (;;)
1114 {
1115 rcont = JsonbIteratorNext(mContained, &vcontained, true);
1116
1117 /*
1118 * When we get through caller's rhs "is it contained within?"
1119 * array without failing to find one of its values, it's
1120 * contained.
1121 */
1122 if (rcont == WJB_END_ARRAY)
1123 return true;
1124
1125 Assert(rcont == WJB_ELEM);
1126
1127 if (IsAJsonbScalar(&vcontained))
1128 {
1129 if (!findJsonbValueFromContainer((*val)->container,
1130 JB_FARRAY,
1131 &vcontained))
1132 return false;
1133 }
1134 else
1135 {
1136 uint32 i;
1137
1138 /*
1139 * If this is first container found in rhs array (at this
1140 * depth), initialize temp lhs array of containers
1141 */
1142 if (lhsConts == NULL)
1143 {
1144 uint32 j = 0;
1145
1146 /* Make room for all possible values */
1147 lhsConts = palloc(sizeof(JsonbValue) * nLhsElems);
1148
1149 for (i = 0; i < nLhsElems; i++)
1150 {
1151 /* Store all lhs elements in temp array */
1152 rcont = JsonbIteratorNext(val, &vval, true);
1153 Assert(rcont == WJB_ELEM);
1154
1155 if (vval.type == jbvBinary)
1156 lhsConts[j++] = vval;
1157 }
1158
1159 /* No container elements in temp array, so give up now */
1160 if (j == 0)
1161 return false;
1162
1163 /* We may have only partially filled array */
1164 nLhsElems = j;
1165 }
1166
1167 /* XXX: Nested array containment is O(N^2) */
1168 for (i = 0; i < nLhsElems; i++)
1169 {
1170 /* Nested container value (object or array) */
1171 JsonbIterator *nestval,
1172 *nestContained;
1173 bool contains;
1174
1175 nestval = JsonbIteratorInit(lhsConts[i].val.binary.data);
1176 nestContained = JsonbIteratorInit(vcontained.val.binary.data);
1177
1178 contains = JsonbDeepContains(&nestval, &nestContained);
1179
1180 if (nestval)
1181 pfree(nestval);
1182 if (nestContained)
1183 pfree(nestContained);
1184 if (contains)
1185 break;
1186 }
1187
1188 /*
1189 * Report rhs container value is not contained if couldn't
1190 * match rhs container to *some* lhs cont
1191 */
1192 if (i == nLhsElems)
1193 return false;
1194 }
1195 }
1196 }
1197 else
1198 {
1199 elog(ERROR, "invalid jsonb container type");
1200 }
1201
1202 elog(ERROR, "unexpectedly fell off end of jsonb container");
1203 return false;
1204}
1205
1206/*
1207 * Hash a JsonbValue scalar value, mixing the hash value into an existing
1208 * hash provided by the caller.
1209 *
1210 * Some callers may wish to independently XOR in JB_FOBJECT and JB_FARRAY
1211 * flags.
1212 */
1213void
1214JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash)
1215{
1216 uint32 tmp;
1217
1218 /* Compute hash value for scalarVal */
1219 switch (scalarVal->type)
1220 {
1221 case jbvNull:
1222 tmp = 0x01;
1223 break;
1224 case jbvString:
1225 tmp = DatumGetUInt32(hash_any((const unsigned char *) scalarVal->val.string.val,
1226 scalarVal->val.string.len));
1227 break;
1228 case jbvNumeric:
1229 /* Must hash equal numerics to equal hash codes */
1230 tmp = DatumGetUInt32(DirectFunctionCall1(hash_numeric,
1231 NumericGetDatum(scalarVal->val.numeric)));
1232 break;
1233 case jbvBool:
1234 tmp = scalarVal->val.boolean ? 0x02 : 0x04;
1235
1236 break;
1237 default:
1238 elog(ERROR, "invalid jsonb scalar type");
1239 tmp = 0; /* keep compiler quiet */
1240 break;
1241 }
1242
1243 /*
1244 * Combine hash values of successive keys, values and elements by rotating
1245 * the previous value left 1 bit, then XOR'ing in the new
1246 * key/value/element's hash value.
1247 */
1248 *hash = (*hash << 1) | (*hash >> 31);
1249 *hash ^= tmp;
1250}
1251
1252/*
1253 * Hash a value to a 64-bit value, with a seed. Otherwise, similar to
1254 * JsonbHashScalarValue.
1255 */
1256void
1257JsonbHashScalarValueExtended(const JsonbValue *scalarVal, uint64 *hash,
1258 uint64 seed)
1259{
1260 uint64 tmp;
1261
1262 switch (scalarVal->type)
1263 {
1264 case jbvNull:
1265 tmp = seed + 0x01;
1266 break;
1267 case jbvString:
1268 tmp = DatumGetUInt64(hash_any_extended((const unsigned char *) scalarVal->val.string.val,
1269 scalarVal->val.string.len,
1270 seed));
1271 break;
1272 case jbvNumeric:
1273 tmp = DatumGetUInt64(DirectFunctionCall2(hash_numeric_extended,
1274 NumericGetDatum(scalarVal->val.numeric),
1275 UInt64GetDatum(seed)));
1276 break;
1277 case jbvBool:
1278 if (seed)
1279 tmp = DatumGetUInt64(DirectFunctionCall2(hashcharextended,
1280 BoolGetDatum(scalarVal->val.boolean),
1281 UInt64GetDatum(seed)));
1282 else
1283 tmp = scalarVal->val.boolean ? 0x02 : 0x04;
1284
1285 break;
1286 default:
1287 elog(ERROR, "invalid jsonb scalar type");
1288 break;
1289 }
1290
1291 *hash = ROTATE_HIGH_AND_LOW_32BITS(*hash);
1292 *hash ^= tmp;
1293}
1294
1295/*
1296 * Are two scalar JsonbValues of the same type a and b equal?
1297 */
1298static bool
1299equalsJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
1300{
1301 if (aScalar->type == bScalar->type)
1302 {
1303 switch (aScalar->type)
1304 {
1305 case jbvNull:
1306 return true;
1307 case jbvString:
1308 return lengthCompareJsonbStringValue(aScalar, bScalar) == 0;
1309 case jbvNumeric:
1310 return DatumGetBool(DirectFunctionCall2(numeric_eq,
1311 PointerGetDatum(aScalar->val.numeric),
1312 PointerGetDatum(bScalar->val.numeric)));
1313 case jbvBool:
1314 return aScalar->val.boolean == bScalar->val.boolean;
1315
1316 default:
1317 elog(ERROR, "invalid jsonb scalar type");
1318 }
1319 }
1320 elog(ERROR, "jsonb scalar type mismatch");
1321 return false;
1322}
1323
1324/*
1325 * Compare two scalar JsonbValues, returning -1, 0, or 1.
1326 *
1327 * Strings are compared using the default collation. Used by B-tree
1328 * operators, where a lexical sort order is generally expected.
1329 */
1330static int
1331compareJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
1332{
1333 if (aScalar->type == bScalar->type)
1334 {
1335 switch (aScalar->type)
1336 {
1337 case jbvNull:
1338 return 0;
1339 case jbvString:
1340 return varstr_cmp(aScalar->val.string.val,
1341 aScalar->val.string.len,
1342 bScalar->val.string.val,
1343 bScalar->val.string.len,
1344 DEFAULT_COLLATION_OID);
1345 case jbvNumeric:
1346 return DatumGetInt32(DirectFunctionCall2(numeric_cmp,
1347 PointerGetDatum(aScalar->val.numeric),
1348 PointerGetDatum(bScalar->val.numeric)));
1349 case jbvBool:
1350 if (aScalar->val.boolean == bScalar->val.boolean)
1351 return 0;
1352 else if (aScalar->val.boolean > bScalar->val.boolean)
1353 return 1;
1354 else
1355 return -1;
1356 default:
1357 elog(ERROR, "invalid jsonb scalar type");
1358 }
1359 }
1360 elog(ERROR, "jsonb scalar type mismatch");
1361 return -1;
1362}
1363
1364
1365/*
1366 * Functions for manipulating the resizable buffer used by convertJsonb and
1367 * its subroutines.
1368 */
1369
1370/*
1371 * Reserve 'len' bytes, at the end of the buffer, enlarging it if necessary.
1372 * Returns the offset to the reserved area. The caller is expected to fill
1373 * the reserved area later with copyToBuffer().
1374 */
1375static int
1376reserveFromBuffer(StringInfo buffer, int len)
1377{
1378 int offset;
1379
1380 /* Make more room if needed */
1381 enlargeStringInfo(buffer, len);
1382
1383 /* remember current offset */
1384 offset = buffer->len;
1385
1386 /* reserve the space */
1387 buffer->len += len;
1388
1389 /*
1390 * Keep a trailing null in place, even though it's not useful for us; it
1391 * seems best to preserve the invariants of StringInfos.
1392 */
1393 buffer->data[buffer->len] = '\0';
1394
1395 return offset;
1396}
1397
1398/*
1399 * Copy 'len' bytes to a previously reserved area in buffer.
1400 */
1401static void
1402copyToBuffer(StringInfo buffer, int offset, const char *data, int len)
1403{
1404 memcpy(buffer->data + offset, data, len);
1405}
1406
1407/*
1408 * A shorthand for reserveFromBuffer + copyToBuffer.
1409 */
1410static void
1411appendToBuffer(StringInfo buffer, const char *data, int len)
1412{
1413 int offset;
1414
1415 offset = reserveFromBuffer(buffer, len);
1416 copyToBuffer(buffer, offset, data, len);
1417}
1418
1419
1420/*
1421 * Append padding, so that the length of the StringInfo is int-aligned.
1422 * Returns the number of padding bytes appended.
1423 */
1424static short
1425padBufferToInt(StringInfo buffer)
1426{
1427 int padlen,
1428 p,
1429 offset;
1430
1431 padlen = INTALIGN(buffer->len) - buffer->len;
1432
1433 offset = reserveFromBuffer(buffer, padlen);
1434
1435 /* padlen must be small, so this is probably faster than a memset */
1436 for (p = 0; p < padlen; p++)
1437 buffer->data[offset + p] = '\0';
1438
1439 return padlen;
1440}
1441
1442/*
1443 * Given a JsonbValue, convert to Jsonb. The result is palloc'd.
1444 */
1445static Jsonb *
1446convertToJsonb(JsonbValue *val)
1447{
1448 StringInfoData buffer;
1449 JEntry jentry;
1450 Jsonb *res;
1451
1452 /* Should not already have binary representation */
1453 Assert(val->type != jbvBinary);
1454
1455 /* Allocate an output buffer. It will be enlarged as needed */
1456 initStringInfo(&buffer);
1457
1458 /* Make room for the varlena header */
1459 reserveFromBuffer(&buffer, VARHDRSZ);
1460
1461 convertJsonbValue(&buffer, &jentry, val, 0);
1462
1463 /*
1464 * Note: the JEntry of the root is discarded. Therefore the root
1465 * JsonbContainer struct must contain enough information to tell what kind
1466 * of value it is.
1467 */
1468
1469 res = (Jsonb *) buffer.data;
1470
1471 SET_VARSIZE(res, buffer.len);
1472
1473 return res;
1474}
1475
1476/*
1477 * Subroutine of convertJsonb: serialize a single JsonbValue into buffer.
1478 *
1479 * The JEntry header for this node is returned in *header. It is filled in
1480 * with the length of this value and appropriate type bits. If we wish to
1481 * store an end offset rather than a length, it is the caller's responsibility
1482 * to adjust for that.
1483 *
1484 * If the value is an array or an object, this recurses. 'level' is only used
1485 * for debugging purposes.
1486 */
1487static void
1488convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level)
1489{
1490 check_stack_depth();
1491
1492 if (!val)
1493 return;
1494
1495 /*
1496 * A JsonbValue passed as val should never have a type of jbvBinary, and
1497 * neither should any of its sub-components. Those values will be produced
1498 * by convertJsonbArray and convertJsonbObject, the results of which will
1499 * not be passed back to this function as an argument.
1500 */
1501
1502 if (IsAJsonbScalar(val))
1503 convertJsonbScalar(buffer, header, val);
1504 else if (val->type == jbvArray)
1505 convertJsonbArray(buffer, header, val, level);
1506 else if (val->type == jbvObject)
1507 convertJsonbObject(buffer, header, val, level);
1508 else
1509 elog(ERROR, "unknown type of jsonb container to convert");
1510}
1511
1512static void
1513convertJsonbArray(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
1514{
1515 int base_offset;
1516 int jentry_offset;
1517 int i;
1518 int totallen;
1519 uint32 header;
1520 int nElems = val->val.array.nElems;
1521
1522 /* Remember where in the buffer this array starts. */
1523 base_offset = buffer->len;
1524
1525 /* Align to 4-byte boundary (any padding counts as part of my data) */
1526 padBufferToInt(buffer);
1527
1528 /*
1529 * Construct the header Jentry and store it in the beginning of the
1530 * variable-length payload.
1531 */
1532 header = nElems | JB_FARRAY;
1533 if (val->val.array.rawScalar)
1534 {
1535 Assert(nElems == 1);
1536 Assert(level == 0);
1537 header |= JB_FSCALAR;
1538 }
1539
1540 appendToBuffer(buffer, (char *) &header, sizeof(uint32));
1541
1542 /* Reserve space for the JEntries of the elements. */
1543 jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nElems);
1544
1545 totallen = 0;
1546 for (i = 0; i < nElems; i++)
1547 {
1548 JsonbValue *elem = &val->val.array.elems[i];
1549 int len;
1550 JEntry meta;
1551
1552 /*
1553 * Convert element, producing a JEntry and appending its
1554 * variable-length data to buffer
1555 */
1556 convertJsonbValue(buffer, &meta, elem, level + 1);
1557
1558 len = JBE_OFFLENFLD(meta);
1559 totallen += len;
1560
1561 /*
1562 * Bail out if total variable-length data exceeds what will fit in a
1563 * JEntry length field. We check this in each iteration, not just
1564 * once at the end, to forestall possible integer overflow.
1565 */
1566 if (totallen > JENTRY_OFFLENMASK)
1567 ereport(ERROR,
1568 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1569 errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
1570 JENTRY_OFFLENMASK)));
1571
1572 /*
1573 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1574 */
1575 if ((i % JB_OFFSET_STRIDE) == 0)
1576 meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1577
1578 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1579 jentry_offset += sizeof(JEntry);
1580 }
1581
1582 /* Total data size is everything we've appended to buffer */
1583 totallen = buffer->len - base_offset;
1584
1585 /* Check length again, since we didn't include the metadata above */
1586 if (totallen > JENTRY_OFFLENMASK)
1587 ereport(ERROR,
1588 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1589 errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
1590 JENTRY_OFFLENMASK)));
1591
1592 /* Initialize the header of this node in the container's JEntry array */
1593 *pheader = JENTRY_ISCONTAINER | totallen;
1594}
1595
1596static void
1597convertJsonbObject(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
1598{
1599 int base_offset;
1600 int jentry_offset;
1601 int i;
1602 int totallen;
1603 uint32 header;
1604 int nPairs = val->val.object.nPairs;
1605
1606 /* Remember where in the buffer this object starts. */
1607 base_offset = buffer->len;
1608
1609 /* Align to 4-byte boundary (any padding counts as part of my data) */
1610 padBufferToInt(buffer);
1611
1612 /*
1613 * Construct the header Jentry and store it in the beginning of the
1614 * variable-length payload.
1615 */
1616 header = nPairs | JB_FOBJECT;
1617 appendToBuffer(buffer, (char *) &header, sizeof(uint32));
1618
1619 /* Reserve space for the JEntries of the keys and values. */
1620 jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2);
1621
1622 /*
1623 * Iterate over the keys, then over the values, since that is the ordering
1624 * we want in the on-disk representation.
1625 */
1626 totallen = 0;
1627 for (i = 0; i < nPairs; i++)
1628 {
1629 JsonbPair *pair = &val->val.object.pairs[i];
1630 int len;
1631 JEntry meta;
1632
1633 /*
1634 * Convert key, producing a JEntry and appending its variable-length
1635 * data to buffer
1636 */
1637 convertJsonbScalar(buffer, &meta, &pair->key);
1638
1639 len = JBE_OFFLENFLD(meta);
1640 totallen += len;
1641
1642 /*
1643 * Bail out if total variable-length data exceeds what will fit in a
1644 * JEntry length field. We check this in each iteration, not just
1645 * once at the end, to forestall possible integer overflow.
1646 */
1647 if (totallen > JENTRY_OFFLENMASK)
1648 ereport(ERROR,
1649 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1650 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1651 JENTRY_OFFLENMASK)));
1652
1653 /*
1654 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1655 */
1656 if ((i % JB_OFFSET_STRIDE) == 0)
1657 meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1658
1659 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1660 jentry_offset += sizeof(JEntry);
1661 }
1662 for (i = 0; i < nPairs; i++)
1663 {
1664 JsonbPair *pair = &val->val.object.pairs[i];
1665 int len;
1666 JEntry meta;
1667
1668 /*
1669 * Convert value, producing a JEntry and appending its variable-length
1670 * data to buffer
1671 */
1672 convertJsonbValue(buffer, &meta, &pair->value, level + 1);
1673
1674 len = JBE_OFFLENFLD(meta);
1675 totallen += len;
1676
1677 /*
1678 * Bail out if total variable-length data exceeds what will fit in a
1679 * JEntry length field. We check this in each iteration, not just
1680 * once at the end, to forestall possible integer overflow.
1681 */
1682 if (totallen > JENTRY_OFFLENMASK)
1683 ereport(ERROR,
1684 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1685 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1686 JENTRY_OFFLENMASK)));
1687
1688 /*
1689 * Convert each JB_OFFSET_STRIDE'th length to an offset.
1690 */
1691 if (((i + nPairs) % JB_OFFSET_STRIDE) == 0)
1692 meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
1693
1694 copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
1695 jentry_offset += sizeof(JEntry);
1696 }
1697
1698 /* Total data size is everything we've appended to buffer */
1699 totallen = buffer->len - base_offset;
1700
1701 /* Check length again, since we didn't include the metadata above */
1702 if (totallen > JENTRY_OFFLENMASK)
1703 ereport(ERROR,
1704 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1705 errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
1706 JENTRY_OFFLENMASK)));
1707
1708 /* Initialize the header of this node in the container's JEntry array */
1709 *pheader = JENTRY_ISCONTAINER | totallen;
1710}
1711
1712static void
1713convertJsonbScalar(StringInfo buffer, JEntry *jentry, JsonbValue *scalarVal)
1714{
1715 int numlen;
1716 short padlen;
1717
1718 switch (scalarVal->type)
1719 {
1720 case jbvNull:
1721 *jentry = JENTRY_ISNULL;
1722 break;
1723
1724 case jbvString:
1725 appendToBuffer(buffer, scalarVal->val.string.val, scalarVal->val.string.len);
1726
1727 *jentry = scalarVal->val.string.len;
1728 break;
1729
1730 case jbvNumeric:
1731 /* replace numeric NaN with string "NaN" */
1732 if (numeric_is_nan(scalarVal->val.numeric))
1733 {
1734 appendToBuffer(buffer, "NaN", 3);
1735 *jentry = 3;
1736 break;
1737 }
1738
1739 numlen = VARSIZE_ANY(scalarVal->val.numeric);
1740 padlen = padBufferToInt(buffer);
1741
1742 appendToBuffer(buffer, (char *) scalarVal->val.numeric, numlen);
1743
1744 *jentry = JENTRY_ISNUMERIC | (padlen + numlen);
1745 break;
1746
1747 case jbvBool:
1748 *jentry = (scalarVal->val.boolean) ?
1749 JENTRY_ISBOOL_TRUE : JENTRY_ISBOOL_FALSE;
1750 break;
1751
1752 default:
1753 elog(ERROR, "invalid jsonb scalar type");
1754 }
1755}
1756
1757/*
1758 * Compare two jbvString JsonbValue values, a and b.
1759 *
1760 * This is a special qsort() comparator used to sort strings in certain
1761 * internal contexts where it is sufficient to have a well-defined sort order.
1762 * In particular, object pair keys are sorted according to this criteria to
1763 * facilitate cheap binary searches where we don't care about lexical sort
1764 * order.
1765 *
1766 * a and b are first sorted based on their length. If a tie-breaker is
1767 * required, only then do we consider string binary equality.
1768 */
1769static int
1770lengthCompareJsonbStringValue(const void *a, const void *b)
1771{
1772 const JsonbValue *va = (const JsonbValue *) a;
1773 const JsonbValue *vb = (const JsonbValue *) b;
1774 int res;
1775
1776 Assert(va->type == jbvString);
1777 Assert(vb->type == jbvString);
1778
1779 if (va->val.string.len == vb->val.string.len)
1780 {
1781 res = memcmp(va->val.string.val, vb->val.string.val, va->val.string.len);
1782 }
1783 else
1784 {
1785 res = (va->val.string.len > vb->val.string.len) ? 1 : -1;
1786 }
1787
1788 return res;
1789}
1790
1791/*
1792 * qsort_arg() comparator to compare JsonbPair values.
1793 *
1794 * Third argument 'binequal' may point to a bool. If it's set, *binequal is set
1795 * to true iff a and b have full binary equality, since some callers have an
1796 * interest in whether the two values are equal or merely equivalent.
1797 *
1798 * N.B: String comparisons here are "length-wise"
1799 *
1800 * Pairs with equals keys are ordered such that the order field is respected.
1801 */
1802static int
1803lengthCompareJsonbPair(const void *a, const void *b, void *binequal)
1804{
1805 const JsonbPair *pa = (const JsonbPair *) a;
1806 const JsonbPair *pb = (const JsonbPair *) b;
1807 int res;
1808
1809 res = lengthCompareJsonbStringValue(&pa->key, &pb->key);
1810 if (res == 0 && binequal)
1811 *((bool *) binequal) = true;
1812
1813 /*
1814 * Guarantee keeping order of equal pair. Unique algorithm will prefer
1815 * first element as value.
1816 */
1817 if (res == 0)
1818 res = (pa->order > pb->order) ? -1 : 1;
1819
1820 return res;
1821}
1822
1823/*
1824 * Sort and unique-ify pairs in JsonbValue object
1825 */
1826static void
1827uniqueifyJsonbObject(JsonbValue *object)
1828{
1829 bool hasNonUniq = false;
1830
1831 Assert(object->type == jbvObject);
1832
1833 if (object->val.object.nPairs > 1)
1834 qsort_arg(object->val.object.pairs, object->val.object.nPairs, sizeof(JsonbPair),
1835 lengthCompareJsonbPair, &hasNonUniq);
1836
1837 if (hasNonUniq)
1838 {
1839 JsonbPair *ptr = object->val.object.pairs + 1,
1840 *res = object->val.object.pairs;
1841
1842 while (ptr - object->val.object.pairs < object->val.object.nPairs)
1843 {
1844 /* Avoid copying over duplicate */
1845 if (lengthCompareJsonbStringValue(ptr, res) != 0)
1846 {
1847 res++;
1848 if (ptr != res)
1849 memcpy(res, ptr, sizeof(JsonbPair));
1850 }
1851 ptr++;
1852 }
1853
1854 object->val.object.nPairs = res + 1 - object->val.object.pairs;
1855 }
1856}
1857