1/*-------------------------------------------------------------------------
2 *
3 * jsonb_gin.c
4 * GIN support functions for jsonb
5 *
6 * Copyright (c) 2014-2019, PostgreSQL Global Development Group
7 *
8 * We provide two opclasses for jsonb indexing: jsonb_ops and jsonb_path_ops.
9 * For their description see json.sgml and comments in jsonb.h.
10 *
11 * The operators support, among the others, "jsonb @? jsonpath" and
12 * "jsonb @@ jsonpath". Expressions containing these operators are easily
13 * expressed through each other.
14 *
15 * jb @? 'path' <=> jb @@ 'EXISTS(path)'
16 * jb @@ 'expr' <=> jb @? '$ ? (expr)'
17 *
18 * Thus, we're going to consider only @@ operator, while regarding @? operator
19 * the same is true for jb @@ 'EXISTS(path)'.
20 *
21 * Result of jsonpath query extraction is a tree, which leaf nodes are index
22 * entries and non-leaf nodes are AND/OR logical expressions. Basically we
23 * extract following statements out of jsonpath:
24 *
25 * 1) "accessors_chain = const",
26 * 2) "EXISTS(accessors_chain)".
27 *
28 * Accessors chain may consist of .key, [*] and [index] accessors. jsonb_ops
29 * additionally supports .* and .**.
30 *
31 * For now, both jsonb_ops and jsonb_path_ops supports only statements of
32 * the 1st find. jsonb_ops might also support statements of the 2nd kind,
33 * but given we have no statistics keys extracted from accessors chain
34 * are likely non-selective. Therefore, we choose to not confuse optimizer
35 * and skip statements of the 2nd kind altogether. In future versions that
36 * might be changed.
37 *
38 * In jsonb_ops statement of the 1st kind is split into expression of AND'ed
39 * keys and const. Sometimes const might be interpreted as both value or key
40 * in jsonb_ops. Then statement of 1st kind is decomposed into the expression
41 * below.
42 *
43 * key1 AND key2 AND ... AND keyN AND (const_as_value OR const_as_key)
44 *
45 * jsonb_path_ops transforms each statement of the 1st kind into single hash
46 * entry below.
47 *
48 * HASH(key1, key2, ... , keyN, const)
49 *
50 * Despite statements of the 2nd kind are not supported by both jsonb_ops and
51 * jsonb_path_ops, EXISTS(path) expressions might be still supported,
52 * when statements of 1st kind could be extracted out of their filters.
53 *
54 * IDENTIFICATION
55 * src/backend/utils/adt/jsonb_gin.c
56 *
57 *-------------------------------------------------------------------------
58 */
59
60#include "postgres.h"
61
62#include "access/gin.h"
63#include "access/stratnum.h"
64#include "catalog/pg_collation.h"
65#include "catalog/pg_type.h"
66#include "miscadmin.h"
67#include "utils/builtins.h"
68#include "utils/hashutils.h"
69#include "utils/jsonb.h"
70#include "utils/jsonpath.h"
71#include "utils/varlena.h"
72
73typedef struct PathHashStack
74{
75 uint32 hash;
76 struct PathHashStack *parent;
77} PathHashStack;
78
79/* Buffer for GIN entries */
80typedef struct GinEntries
81{
82 Datum *buf;
83 int count;
84 int allocated;
85} GinEntries;
86
87typedef enum JsonPathGinNodeType
88{
89 JSP_GIN_OR,
90 JSP_GIN_AND,
91 JSP_GIN_ENTRY
92} JsonPathGinNodeType;
93
94typedef struct JsonPathGinNode JsonPathGinNode;
95
96/* Node in jsonpath expression tree */
97struct JsonPathGinNode
98{
99 JsonPathGinNodeType type;
100 union
101 {
102 int nargs; /* valid for OR and AND nodes */
103 int entryIndex; /* index in GinEntries array, valid for ENTRY
104 * nodes after entries output */
105 Datum entryDatum; /* path hash or key name/scalar, valid for
106 * ENTRY nodes before entries output */
107 } val;
108 JsonPathGinNode *args[FLEXIBLE_ARRAY_MEMBER]; /* valid for OR and AND
109 * nodes */
110};
111
112/*
113 * jsonb_ops entry extracted from jsonpath item. Corresponding path item
114 * may be: '.key', '.*', '.**', '[index]' or '[*]'.
115 * Entry type is stored in 'type' field.
116 */
117typedef struct JsonPathGinPathItem
118{
119 struct JsonPathGinPathItem *parent;
120 Datum keyName; /* key name (for '.key' path item) or NULL */
121 JsonPathItemType type; /* type of jsonpath item */
122} JsonPathGinPathItem;
123
124/* GIN representation of the extracted json path */
125typedef union JsonPathGinPath
126{
127 JsonPathGinPathItem *items; /* list of path items (jsonb_ops) */
128 uint32 hash; /* hash of the path (jsonb_path_ops) */
129} JsonPathGinPath;
130
131typedef struct JsonPathGinContext JsonPathGinContext;
132
133/* Callback, which stores information about path item into JsonPathGinPath */
134typedef bool (*JsonPathGinAddPathItemFunc) (JsonPathGinPath *path,
135 JsonPathItem *jsp);
136
137/*
138 * Callback, which extracts set of nodes from statement of 1st kind
139 * (scalar != NULL) or statement of 2nd kind (scalar == NULL).
140 */
141typedef List *(*JsonPathGinExtractNodesFunc) (JsonPathGinContext *cxt,
142 JsonPathGinPath path,
143 JsonbValue *scalar,
144 List *nodes);
145
146/* Context for jsonpath entries extraction */
147struct JsonPathGinContext
148{
149 JsonPathGinAddPathItemFunc add_path_item;
150 JsonPathGinExtractNodesFunc extract_nodes;
151 bool lax;
152};
153
154static Datum make_text_key(char flag, const char *str, int len);
155static Datum make_scalar_key(const JsonbValue *scalarVal, bool is_key);
156
157static JsonPathGinNode *extract_jsp_bool_expr(JsonPathGinContext *cxt,
158 JsonPathGinPath path, JsonPathItem *jsp, bool not);
159
160
161/* Initialize GinEntries struct */
162static void
163init_gin_entries(GinEntries *entries, int preallocated)
164{
165 entries->allocated = preallocated;
166 entries->buf = preallocated ? palloc(sizeof(Datum) * preallocated) : NULL;
167 entries->count = 0;
168}
169
170/* Add new entry to GinEntries */
171static int
172add_gin_entry(GinEntries *entries, Datum entry)
173{
174 int id = entries->count;
175
176 if (entries->count >= entries->allocated)
177 {
178 if (entries->allocated)
179 {
180 entries->allocated *= 2;
181 entries->buf = repalloc(entries->buf,
182 sizeof(Datum) * entries->allocated);
183 }
184 else
185 {
186 entries->allocated = 8;
187 entries->buf = palloc(sizeof(Datum) * entries->allocated);
188 }
189 }
190
191 entries->buf[entries->count++] = entry;
192
193 return id;
194}
195
196/*
197 *
198 * jsonb_ops GIN opclass support functions
199 *
200 */
201
202Datum
203gin_compare_jsonb(PG_FUNCTION_ARGS)
204{
205 text *arg1 = PG_GETARG_TEXT_PP(0);
206 text *arg2 = PG_GETARG_TEXT_PP(1);
207 int32 result;
208 char *a1p,
209 *a2p;
210 int len1,
211 len2;
212
213 a1p = VARDATA_ANY(arg1);
214 a2p = VARDATA_ANY(arg2);
215
216 len1 = VARSIZE_ANY_EXHDR(arg1);
217 len2 = VARSIZE_ANY_EXHDR(arg2);
218
219 /* Compare text as bttextcmp does, but always using C collation */
220 result = varstr_cmp(a1p, len1, a2p, len2, C_COLLATION_OID);
221
222 PG_FREE_IF_COPY(arg1, 0);
223 PG_FREE_IF_COPY(arg2, 1);
224
225 PG_RETURN_INT32(result);
226}
227
228Datum
229gin_extract_jsonb(PG_FUNCTION_ARGS)
230{
231 Jsonb *jb = (Jsonb *) PG_GETARG_JSONB_P(0);
232 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
233 int total = JB_ROOT_COUNT(jb);
234 JsonbIterator *it;
235 JsonbValue v;
236 JsonbIteratorToken r;
237 GinEntries entries;
238
239 /* If the root level is empty, we certainly have no keys */
240 if (total == 0)
241 {
242 *nentries = 0;
243 PG_RETURN_POINTER(NULL);
244 }
245
246 /* Otherwise, use 2 * root count as initial estimate of result size */
247 init_gin_entries(&entries, 2 * total);
248
249 it = JsonbIteratorInit(&jb->root);
250
251 while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
252 {
253 switch (r)
254 {
255 case WJB_KEY:
256 add_gin_entry(&entries, make_scalar_key(&v, true));
257 break;
258 case WJB_ELEM:
259 /* Pretend string array elements are keys, see jsonb.h */
260 add_gin_entry(&entries, make_scalar_key(&v, v.type == jbvString));
261 break;
262 case WJB_VALUE:
263 add_gin_entry(&entries, make_scalar_key(&v, false));
264 break;
265 default:
266 /* we can ignore structural items */
267 break;
268 }
269 }
270
271 *nentries = entries.count;
272
273 PG_RETURN_POINTER(entries.buf);
274}
275
276/* Append JsonPathGinPathItem to JsonPathGinPath (jsonb_ops) */
277static bool
278jsonb_ops__add_path_item(JsonPathGinPath *path, JsonPathItem *jsp)
279{
280 JsonPathGinPathItem *pentry;
281 Datum keyName;
282
283 switch (jsp->type)
284 {
285 case jpiRoot:
286 path->items = NULL; /* reset path */
287 return true;
288
289 case jpiKey:
290 {
291 int len;
292 char *key = jspGetString(jsp, &len);
293
294 keyName = make_text_key(JGINFLAG_KEY, key, len);
295 break;
296 }
297
298 case jpiAny:
299 case jpiAnyKey:
300 case jpiAnyArray:
301 case jpiIndexArray:
302 keyName = PointerGetDatum(NULL);
303 break;
304
305 default:
306 /* other path items like item methods are not supported */
307 return false;
308 }
309
310 pentry = palloc(sizeof(*pentry));
311
312 pentry->type = jsp->type;
313 pentry->keyName = keyName;
314 pentry->parent = path->items;
315
316 path->items = pentry;
317
318 return true;
319}
320
321/* Combine existing path hash with next key hash (jsonb_path_ops) */
322static bool
323jsonb_path_ops__add_path_item(JsonPathGinPath *path, JsonPathItem *jsp)
324{
325 switch (jsp->type)
326 {
327 case jpiRoot:
328 path->hash = 0; /* reset path hash */
329 return true;
330
331 case jpiKey:
332 {
333 JsonbValue jbv;
334
335 jbv.type = jbvString;
336 jbv.val.string.val = jspGetString(jsp, &jbv.val.string.len);
337
338 JsonbHashScalarValue(&jbv, &path->hash);
339 return true;
340 }
341
342 case jpiIndexArray:
343 case jpiAnyArray:
344 return true; /* path hash is unchanged */
345
346 default:
347 /* other items (wildcard paths, item methods) are not supported */
348 return false;
349 }
350}
351
352static JsonPathGinNode *
353make_jsp_entry_node(Datum entry)
354{
355 JsonPathGinNode *node = palloc(offsetof(JsonPathGinNode, args));
356
357 node->type = JSP_GIN_ENTRY;
358 node->val.entryDatum = entry;
359
360 return node;
361}
362
363static JsonPathGinNode *
364make_jsp_entry_node_scalar(JsonbValue *scalar, bool iskey)
365{
366 return make_jsp_entry_node(make_scalar_key(scalar, iskey));
367}
368
369static JsonPathGinNode *
370make_jsp_expr_node(JsonPathGinNodeType type, int nargs)
371{
372 JsonPathGinNode *node = palloc(offsetof(JsonPathGinNode, args) +
373 sizeof(node->args[0]) * nargs);
374
375 node->type = type;
376 node->val.nargs = nargs;
377
378 return node;
379}
380
381static JsonPathGinNode *
382make_jsp_expr_node_args(JsonPathGinNodeType type, List *args)
383{
384 JsonPathGinNode *node = make_jsp_expr_node(type, list_length(args));
385 ListCell *lc;
386 int i = 0;
387
388 foreach(lc, args)
389 node->args[i++] = lfirst(lc);
390
391 return node;
392}
393
394static JsonPathGinNode *
395make_jsp_expr_node_binary(JsonPathGinNodeType type,
396 JsonPathGinNode *arg1, JsonPathGinNode *arg2)
397{
398 JsonPathGinNode *node = make_jsp_expr_node(type, 2);
399
400 node->args[0] = arg1;
401 node->args[1] = arg2;
402
403 return node;
404}
405
406/* Append a list of nodes from the jsonpath (jsonb_ops). */
407static List *
408jsonb_ops__extract_nodes(JsonPathGinContext *cxt, JsonPathGinPath path,
409 JsonbValue *scalar, List *nodes)
410{
411 JsonPathGinPathItem *pentry;
412
413 if (scalar)
414 {
415 JsonPathGinNode *node;
416
417 /*
418 * Append path entry nodes only if scalar is provided. See header
419 * comment for details.
420 */
421 for (pentry = path.items; pentry; pentry = pentry->parent)
422 {
423 if (pentry->type == jpiKey) /* only keys are indexed */
424 nodes = lappend(nodes, make_jsp_entry_node(pentry->keyName));
425 }
426
427 /* Append scalar node for equality queries. */
428 if (scalar->type == jbvString)
429 {
430 JsonPathGinPathItem *last = path.items;
431 GinTernaryValue key_entry;
432
433 /*
434 * Assuming that jsonb_ops interprets string array elements as
435 * keys, we may extract key or non-key entry or even both. In the
436 * latter case we create OR-node. It is possible in lax mode
437 * where arrays are automatically unwrapped, or in strict mode for
438 * jpiAny items.
439 */
440
441 if (cxt->lax)
442 key_entry = GIN_MAYBE;
443 else if (!last) /* root ($) */
444 key_entry = GIN_FALSE;
445 else if (last->type == jpiAnyArray || last->type == jpiIndexArray)
446 key_entry = GIN_TRUE;
447 else if (last->type == jpiAny)
448 key_entry = GIN_MAYBE;
449 else
450 key_entry = GIN_FALSE;
451
452 if (key_entry == GIN_MAYBE)
453 {
454 JsonPathGinNode *n1 = make_jsp_entry_node_scalar(scalar, true);
455 JsonPathGinNode *n2 = make_jsp_entry_node_scalar(scalar, false);
456
457 node = make_jsp_expr_node_binary(JSP_GIN_OR, n1, n2);
458 }
459 else
460 {
461 node = make_jsp_entry_node_scalar(scalar,
462 key_entry == GIN_TRUE);
463 }
464 }
465 else
466 {
467 node = make_jsp_entry_node_scalar(scalar, false);
468 }
469
470 nodes = lappend(nodes, node);
471 }
472
473 return nodes;
474}
475
476/* Append a list of nodes from the jsonpath (jsonb_path_ops). */
477static List *
478jsonb_path_ops__extract_nodes(JsonPathGinContext *cxt, JsonPathGinPath path,
479 JsonbValue *scalar, List *nodes)
480{
481 if (scalar)
482 {
483 /* append path hash node for equality queries */
484 uint32 hash = path.hash;
485
486 JsonbHashScalarValue(scalar, &hash);
487
488 return lappend(nodes,
489 make_jsp_entry_node(UInt32GetDatum(hash)));
490 }
491 else
492 {
493 /* jsonb_path_ops doesn't support EXISTS queries => nothing to append */
494 return nodes;
495 }
496}
497
498/*
499 * Extract a list of expression nodes that need to be AND-ed by the caller.
500 * Extracted expression is 'path == scalar' if 'scalar' is non-NULL, and
501 * 'EXISTS(path)' otherwise.
502 */
503static List *
504extract_jsp_path_expr_nodes(JsonPathGinContext *cxt, JsonPathGinPath path,
505 JsonPathItem *jsp, JsonbValue *scalar)
506{
507 JsonPathItem next;
508 List *nodes = NIL;
509
510 for (;;)
511 {
512 switch (jsp->type)
513 {
514 case jpiCurrent:
515 break;
516
517 case jpiFilter:
518 {
519 JsonPathItem arg;
520 JsonPathGinNode *filter;
521
522 jspGetArg(jsp, &arg);
523
524 filter = extract_jsp_bool_expr(cxt, path, &arg, false);
525
526 if (filter)
527 nodes = lappend(nodes, filter);
528
529 break;
530 }
531
532 default:
533 if (!cxt->add_path_item(&path, jsp))
534
535 /*
536 * Path is not supported by the index opclass, return only
537 * the extracted filter nodes.
538 */
539 return nodes;
540 break;
541 }
542
543 if (!jspGetNext(jsp, &next))
544 break;
545
546 jsp = &next;
547 }
548
549 /*
550 * Append nodes from the path expression itself to the already extracted
551 * list of filter nodes.
552 */
553 return cxt->extract_nodes(cxt, path, scalar, nodes);
554}
555
556/*
557 * Extract an expression node from one of following jsonpath path expressions:
558 * EXISTS(jsp) (when 'scalar' is NULL)
559 * jsp == scalar (when 'scalar' is not NULL).
560 *
561 * The current path (@) is passed in 'path'.
562 */
563static JsonPathGinNode *
564extract_jsp_path_expr(JsonPathGinContext *cxt, JsonPathGinPath path,
565 JsonPathItem *jsp, JsonbValue *scalar)
566{
567 /* extract a list of nodes to be AND-ed */
568 List *nodes = extract_jsp_path_expr_nodes(cxt, path, jsp, scalar);
569
570 if (list_length(nodes) <= 0)
571 /* no nodes were extracted => full scan is needed for this path */
572 return NULL;
573
574 if (list_length(nodes) == 1)
575 return linitial(nodes); /* avoid extra AND-node */
576
577 /* construct AND-node for path with filters */
578 return make_jsp_expr_node_args(JSP_GIN_AND, nodes);
579}
580
581/* Recursively extract nodes from the boolean jsonpath expression. */
582static JsonPathGinNode *
583extract_jsp_bool_expr(JsonPathGinContext *cxt, JsonPathGinPath path,
584 JsonPathItem *jsp, bool not)
585{
586 check_stack_depth();
587
588 switch (jsp->type)
589 {
590 case jpiAnd: /* expr && expr */
591 case jpiOr: /* expr || expr */
592 {
593 JsonPathItem arg;
594 JsonPathGinNode *larg;
595 JsonPathGinNode *rarg;
596 JsonPathGinNodeType type;
597
598 jspGetLeftArg(jsp, &arg);
599 larg = extract_jsp_bool_expr(cxt, path, &arg, not);
600
601 jspGetRightArg(jsp, &arg);
602 rarg = extract_jsp_bool_expr(cxt, path, &arg, not);
603
604 if (!larg || !rarg)
605 {
606 if (jsp->type == jpiOr)
607 return NULL;
608
609 return larg ? larg : rarg;
610 }
611
612 type = not ^ (jsp->type == jpiAnd) ? JSP_GIN_AND : JSP_GIN_OR;
613
614 return make_jsp_expr_node_binary(type, larg, rarg);
615 }
616
617 case jpiNot: /* !expr */
618 {
619 JsonPathItem arg;
620
621 jspGetArg(jsp, &arg);
622
623 /* extract child expression inverting 'not' flag */
624 return extract_jsp_bool_expr(cxt, path, &arg, !not);
625 }
626
627 case jpiExists: /* EXISTS(path) */
628 {
629 JsonPathItem arg;
630
631 if (not)
632 return NULL; /* NOT EXISTS is not supported */
633
634 jspGetArg(jsp, &arg);
635
636 return extract_jsp_path_expr(cxt, path, &arg, NULL);
637 }
638
639 case jpiNotEqual:
640
641 /*
642 * 'not' == true case is not supported here because '!(path !=
643 * scalar)' is not equivalent to 'path == scalar' in the general
644 * case because of sequence comparison semantics: 'path == scalar'
645 * === 'EXISTS (path, @ == scalar)', '!(path != scalar)' ===
646 * 'FOR_ALL(path, @ == scalar)'. So, we should translate '!(path
647 * != scalar)' into GIN query 'path == scalar || EMPTY(path)', but
648 * 'EMPTY(path)' queries are not supported by the both jsonb
649 * opclasses. However in strict mode we could omit 'EMPTY(path)'
650 * part if the path can return exactly one item (it does not
651 * contain wildcard accessors or item methods like .keyvalue()
652 * etc.).
653 */
654 return NULL;
655
656 case jpiEqual: /* path == scalar */
657 {
658 JsonPathItem left_item;
659 JsonPathItem right_item;
660 JsonPathItem *path_item;
661 JsonPathItem *scalar_item;
662 JsonbValue scalar;
663
664 if (not)
665 return NULL;
666
667 jspGetLeftArg(jsp, &left_item);
668 jspGetRightArg(jsp, &right_item);
669
670 if (jspIsScalar(left_item.type))
671 {
672 scalar_item = &left_item;
673 path_item = &right_item;
674 }
675 else if (jspIsScalar(right_item.type))
676 {
677 scalar_item = &right_item;
678 path_item = &left_item;
679 }
680 else
681 return NULL; /* at least one operand should be a scalar */
682
683 switch (scalar_item->type)
684 {
685 case jpiNull:
686 scalar.type = jbvNull;
687 break;
688 case jpiBool:
689 scalar.type = jbvBool;
690 scalar.val.boolean = !!*scalar_item->content.value.data;
691 break;
692 case jpiNumeric:
693 scalar.type = jbvNumeric;
694 scalar.val.numeric =
695 (Numeric) scalar_item->content.value.data;
696 break;
697 case jpiString:
698 scalar.type = jbvString;
699 scalar.val.string.val = scalar_item->content.value.data;
700 scalar.val.string.len =
701 scalar_item->content.value.datalen;
702 break;
703 default:
704 elog(ERROR, "invalid scalar jsonpath item type: %d",
705 scalar_item->type);
706 return NULL;
707 }
708
709 return extract_jsp_path_expr(cxt, path, path_item, &scalar);
710 }
711
712 default:
713 return NULL; /* not a boolean expression */
714 }
715}
716
717/* Recursively emit all GIN entries found in the node tree */
718static void
719emit_jsp_gin_entries(JsonPathGinNode *node, GinEntries *entries)
720{
721 check_stack_depth();
722
723 switch (node->type)
724 {
725 case JSP_GIN_ENTRY:
726 /* replace datum with its index in the array */
727 node->val.entryIndex = add_gin_entry(entries, node->val.entryDatum);
728 break;
729
730 case JSP_GIN_OR:
731 case JSP_GIN_AND:
732 {
733 int i;
734
735 for (i = 0; i < node->val.nargs; i++)
736 emit_jsp_gin_entries(node->args[i], entries);
737
738 break;
739 }
740 }
741}
742
743/*
744 * Recursively extract GIN entries from jsonpath query.
745 * Root expression node is put into (*extra_data)[0].
746 */
747static Datum *
748extract_jsp_query(JsonPath *jp, StrategyNumber strat, bool pathOps,
749 int32 *nentries, Pointer **extra_data)
750{
751 JsonPathGinContext cxt;
752 JsonPathItem root;
753 JsonPathGinNode *node;
754 JsonPathGinPath path = {0};
755 GinEntries entries = {0};
756
757 cxt.lax = (jp->header & JSONPATH_LAX) != 0;
758
759 if (pathOps)
760 {
761 cxt.add_path_item = jsonb_path_ops__add_path_item;
762 cxt.extract_nodes = jsonb_path_ops__extract_nodes;
763 }
764 else
765 {
766 cxt.add_path_item = jsonb_ops__add_path_item;
767 cxt.extract_nodes = jsonb_ops__extract_nodes;
768 }
769
770 jspInit(&root, jp);
771
772 node = strat == JsonbJsonpathExistsStrategyNumber
773 ? extract_jsp_path_expr(&cxt, path, &root, NULL)
774 : extract_jsp_bool_expr(&cxt, path, &root, false);
775
776 if (!node)
777 {
778 *nentries = 0;
779 return NULL;
780 }
781
782 emit_jsp_gin_entries(node, &entries);
783
784 *nentries = entries.count;
785 if (!*nentries)
786 return NULL;
787
788 *extra_data = palloc0(sizeof(**extra_data) * entries.count);
789 **extra_data = (Pointer) node;
790
791 return entries.buf;
792}
793
794/*
795 * Recursively execute jsonpath expression.
796 * 'check' is a bool[] or a GinTernaryValue[] depending on 'ternary' flag.
797 */
798static GinTernaryValue
799execute_jsp_gin_node(JsonPathGinNode *node, void *check, bool ternary)
800{
801 GinTernaryValue res;
802 GinTernaryValue v;
803 int i;
804
805 switch (node->type)
806 {
807 case JSP_GIN_AND:
808 res = GIN_TRUE;
809 for (i = 0; i < node->val.nargs; i++)
810 {
811 v = execute_jsp_gin_node(node->args[i], check, ternary);
812 if (v == GIN_FALSE)
813 return GIN_FALSE;
814 else if (v == GIN_MAYBE)
815 res = GIN_MAYBE;
816 }
817 return res;
818
819 case JSP_GIN_OR:
820 res = GIN_FALSE;
821 for (i = 0; i < node->val.nargs; i++)
822 {
823 v = execute_jsp_gin_node(node->args[i], check, ternary);
824 if (v == GIN_TRUE)
825 return GIN_TRUE;
826 else if (v == GIN_MAYBE)
827 res = GIN_MAYBE;
828 }
829 return res;
830
831 case JSP_GIN_ENTRY:
832 {
833 int index = node->val.entryIndex;
834
835 if (ternary)
836 return ((GinTernaryValue *) check)[index];
837 else
838 return ((bool *) check)[index] ? GIN_TRUE : GIN_FALSE;
839 }
840
841 default:
842 elog(ERROR, "invalid jsonpath gin node type: %d", node->type);
843 return GIN_FALSE; /* keep compiler quiet */
844 }
845}
846
847Datum
848gin_extract_jsonb_query(PG_FUNCTION_ARGS)
849{
850 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
851 StrategyNumber strategy = PG_GETARG_UINT16(2);
852 int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
853 Datum *entries;
854
855 if (strategy == JsonbContainsStrategyNumber)
856 {
857 /* Query is a jsonb, so just apply gin_extract_jsonb... */
858 entries = (Datum *)
859 DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb,
860 PG_GETARG_DATUM(0),
861 PointerGetDatum(nentries)));
862 /* ...although "contains {}" requires a full index scan */
863 if (*nentries == 0)
864 *searchMode = GIN_SEARCH_MODE_ALL;
865 }
866 else if (strategy == JsonbExistsStrategyNumber)
867 {
868 /* Query is a text string, which we treat as a key */
869 text *query = PG_GETARG_TEXT_PP(0);
870
871 *nentries = 1;
872 entries = (Datum *) palloc(sizeof(Datum));
873 entries[0] = make_text_key(JGINFLAG_KEY,
874 VARDATA_ANY(query),
875 VARSIZE_ANY_EXHDR(query));
876 }
877 else if (strategy == JsonbExistsAnyStrategyNumber ||
878 strategy == JsonbExistsAllStrategyNumber)
879 {
880 /* Query is a text array; each element is treated as a key */
881 ArrayType *query = PG_GETARG_ARRAYTYPE_P(0);
882 Datum *key_datums;
883 bool *key_nulls;
884 int key_count;
885 int i,
886 j;
887
888 deconstruct_array(query,
889 TEXTOID, -1, false, 'i',
890 &key_datums, &key_nulls, &key_count);
891
892 entries = (Datum *) palloc(sizeof(Datum) * key_count);
893
894 for (i = 0, j = 0; i < key_count; i++)
895 {
896 /* Nulls in the array are ignored */
897 if (key_nulls[i])
898 continue;
899 entries[j++] = make_text_key(JGINFLAG_KEY,
900 VARDATA(key_datums[i]),
901 VARSIZE(key_datums[i]) - VARHDRSZ);
902 }
903
904 *nentries = j;
905 /* ExistsAll with no keys should match everything */
906 if (j == 0 && strategy == JsonbExistsAllStrategyNumber)
907 *searchMode = GIN_SEARCH_MODE_ALL;
908 }
909 else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
910 strategy == JsonbJsonpathExistsStrategyNumber)
911 {
912 JsonPath *jp = PG_GETARG_JSONPATH_P(0);
913 Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
914
915 entries = extract_jsp_query(jp, strategy, false, nentries, extra_data);
916
917 if (!entries)
918 *searchMode = GIN_SEARCH_MODE_ALL;
919 }
920 else
921 {
922 elog(ERROR, "unrecognized strategy number: %d", strategy);
923 entries = NULL; /* keep compiler quiet */
924 }
925
926 PG_RETURN_POINTER(entries);
927}
928
929Datum
930gin_consistent_jsonb(PG_FUNCTION_ARGS)
931{
932 bool *check = (bool *) PG_GETARG_POINTER(0);
933 StrategyNumber strategy = PG_GETARG_UINT16(1);
934
935 /* Jsonb *query = PG_GETARG_JSONB_P(2); */
936 int32 nkeys = PG_GETARG_INT32(3);
937
938 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
939 bool *recheck = (bool *) PG_GETARG_POINTER(5);
940 bool res = true;
941 int32 i;
942
943 if (strategy == JsonbContainsStrategyNumber)
944 {
945 /*
946 * We must always recheck, since we can't tell from the index whether
947 * the positions of the matched items match the structure of the query
948 * object. (Even if we could, we'd also have to worry about hashed
949 * keys and the index's failure to distinguish keys from string array
950 * elements.) However, the tuple certainly doesn't match unless it
951 * contains all the query keys.
952 */
953 *recheck = true;
954 for (i = 0; i < nkeys; i++)
955 {
956 if (!check[i])
957 {
958 res = false;
959 break;
960 }
961 }
962 }
963 else if (strategy == JsonbExistsStrategyNumber)
964 {
965 /*
966 * Although the key is certainly present in the index, we must recheck
967 * because (1) the key might be hashed, and (2) the index match might
968 * be for a key that's not at top level of the JSON object. For (1),
969 * we could look at the query key to see if it's hashed and not
970 * recheck if not, but the index lacks enough info to tell about (2).
971 */
972 *recheck = true;
973 res = true;
974 }
975 else if (strategy == JsonbExistsAnyStrategyNumber)
976 {
977 /* As for plain exists, we must recheck */
978 *recheck = true;
979 res = true;
980 }
981 else if (strategy == JsonbExistsAllStrategyNumber)
982 {
983 /* As for plain exists, we must recheck */
984 *recheck = true;
985 /* ... but unless all the keys are present, we can say "false" */
986 for (i = 0; i < nkeys; i++)
987 {
988 if (!check[i])
989 {
990 res = false;
991 break;
992 }
993 }
994 }
995 else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
996 strategy == JsonbJsonpathExistsStrategyNumber)
997 {
998 *recheck = true;
999
1000 if (nkeys > 0)
1001 {
1002 Assert(extra_data && extra_data[0]);
1003 res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check,
1004 false) != GIN_FALSE;
1005 }
1006 }
1007 else
1008 elog(ERROR, "unrecognized strategy number: %d", strategy);
1009
1010 PG_RETURN_BOOL(res);
1011}
1012
1013Datum
1014gin_triconsistent_jsonb(PG_FUNCTION_ARGS)
1015{
1016 GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
1017 StrategyNumber strategy = PG_GETARG_UINT16(1);
1018
1019 /* Jsonb *query = PG_GETARG_JSONB_P(2); */
1020 int32 nkeys = PG_GETARG_INT32(3);
1021 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
1022 GinTernaryValue res = GIN_MAYBE;
1023 int32 i;
1024
1025 /*
1026 * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this
1027 * corresponds to always forcing recheck in the regular consistent
1028 * function, for the reasons listed there.
1029 */
1030 if (strategy == JsonbContainsStrategyNumber ||
1031 strategy == JsonbExistsAllStrategyNumber)
1032 {
1033 /* All extracted keys must be present */
1034 for (i = 0; i < nkeys; i++)
1035 {
1036 if (check[i] == GIN_FALSE)
1037 {
1038 res = GIN_FALSE;
1039 break;
1040 }
1041 }
1042 }
1043 else if (strategy == JsonbExistsStrategyNumber ||
1044 strategy == JsonbExistsAnyStrategyNumber)
1045 {
1046 /* At least one extracted key must be present */
1047 res = GIN_FALSE;
1048 for (i = 0; i < nkeys; i++)
1049 {
1050 if (check[i] == GIN_TRUE ||
1051 check[i] == GIN_MAYBE)
1052 {
1053 res = GIN_MAYBE;
1054 break;
1055 }
1056 }
1057 }
1058 else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
1059 strategy == JsonbJsonpathExistsStrategyNumber)
1060 {
1061 if (nkeys > 0)
1062 {
1063 Assert(extra_data && extra_data[0]);
1064 res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check,
1065 true);
1066
1067 /* Should always recheck the result */
1068 if (res == GIN_TRUE)
1069 res = GIN_MAYBE;
1070 }
1071 }
1072 else
1073 elog(ERROR, "unrecognized strategy number: %d", strategy);
1074
1075 PG_RETURN_GIN_TERNARY_VALUE(res);
1076}
1077
1078/*
1079 *
1080 * jsonb_path_ops GIN opclass support functions
1081 *
1082 * In a jsonb_path_ops index, the GIN keys are uint32 hashes, one per JSON
1083 * value; but the JSON key(s) leading to each value are also included in its
1084 * hash computation. This means we can only support containment queries,
1085 * but the index can distinguish, for example, {"foo": 42} from {"bar": 42}
1086 * since different hashes will be generated.
1087 *
1088 */
1089
1090Datum
1091gin_extract_jsonb_path(PG_FUNCTION_ARGS)
1092{
1093 Jsonb *jb = PG_GETARG_JSONB_P(0);
1094 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
1095 int total = JB_ROOT_COUNT(jb);
1096 JsonbIterator *it;
1097 JsonbValue v;
1098 JsonbIteratorToken r;
1099 PathHashStack tail;
1100 PathHashStack *stack;
1101 GinEntries entries;
1102
1103 /* If the root level is empty, we certainly have no keys */
1104 if (total == 0)
1105 {
1106 *nentries = 0;
1107 PG_RETURN_POINTER(NULL);
1108 }
1109
1110 /* Otherwise, use 2 * root count as initial estimate of result size */
1111 init_gin_entries(&entries, 2 * total);
1112
1113 /* We keep a stack of partial hashes corresponding to parent key levels */
1114 tail.parent = NULL;
1115 tail.hash = 0;
1116 stack = &tail;
1117
1118 it = JsonbIteratorInit(&jb->root);
1119
1120 while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
1121 {
1122 PathHashStack *parent;
1123
1124 switch (r)
1125 {
1126 case WJB_BEGIN_ARRAY:
1127 case WJB_BEGIN_OBJECT:
1128 /* Push a stack level for this object */
1129 parent = stack;
1130 stack = (PathHashStack *) palloc(sizeof(PathHashStack));
1131
1132 /*
1133 * We pass forward hashes from outer nesting levels so that
1134 * the hashes for nested values will include outer keys as
1135 * well as their own keys.
1136 *
1137 * Nesting an array within another array will not alter
1138 * innermost scalar element hash values, but that seems
1139 * inconsequential.
1140 */
1141 stack->hash = parent->hash;
1142 stack->parent = parent;
1143 break;
1144 case WJB_KEY:
1145 /* mix this key into the current outer hash */
1146 JsonbHashScalarValue(&v, &stack->hash);
1147 /* hash is now ready to incorporate the value */
1148 break;
1149 case WJB_ELEM:
1150 case WJB_VALUE:
1151 /* mix the element or value's hash into the prepared hash */
1152 JsonbHashScalarValue(&v, &stack->hash);
1153 /* and emit an index entry */
1154 add_gin_entry(&entries, UInt32GetDatum(stack->hash));
1155 /* reset hash for next key, value, or sub-object */
1156 stack->hash = stack->parent->hash;
1157 break;
1158 case WJB_END_ARRAY:
1159 case WJB_END_OBJECT:
1160 /* Pop the stack */
1161 parent = stack->parent;
1162 pfree(stack);
1163 stack = parent;
1164 /* reset hash for next key, value, or sub-object */
1165 if (stack->parent)
1166 stack->hash = stack->parent->hash;
1167 else
1168 stack->hash = 0;
1169 break;
1170 default:
1171 elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r);
1172 }
1173 }
1174
1175 *nentries = entries.count;
1176
1177 PG_RETURN_POINTER(entries.buf);
1178}
1179
1180Datum
1181gin_extract_jsonb_query_path(PG_FUNCTION_ARGS)
1182{
1183 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
1184 StrategyNumber strategy = PG_GETARG_UINT16(2);
1185 int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
1186 Datum *entries;
1187
1188 if (strategy == JsonbContainsStrategyNumber)
1189 {
1190 /* Query is a jsonb, so just apply gin_extract_jsonb_path ... */
1191 entries = (Datum *)
1192 DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_path,
1193 PG_GETARG_DATUM(0),
1194 PointerGetDatum(nentries)));
1195
1196 /* ... although "contains {}" requires a full index scan */
1197 if (*nentries == 0)
1198 *searchMode = GIN_SEARCH_MODE_ALL;
1199 }
1200 else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
1201 strategy == JsonbJsonpathExistsStrategyNumber)
1202 {
1203 JsonPath *jp = PG_GETARG_JSONPATH_P(0);
1204 Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
1205
1206 entries = extract_jsp_query(jp, strategy, true, nentries, extra_data);
1207
1208 if (!entries)
1209 *searchMode = GIN_SEARCH_MODE_ALL;
1210 }
1211 else
1212 {
1213 elog(ERROR, "unrecognized strategy number: %d", strategy);
1214 entries = NULL;
1215 }
1216
1217 PG_RETURN_POINTER(entries);
1218}
1219
1220Datum
1221gin_consistent_jsonb_path(PG_FUNCTION_ARGS)
1222{
1223 bool *check = (bool *) PG_GETARG_POINTER(0);
1224 StrategyNumber strategy = PG_GETARG_UINT16(1);
1225
1226 /* Jsonb *query = PG_GETARG_JSONB_P(2); */
1227 int32 nkeys = PG_GETARG_INT32(3);
1228 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
1229 bool *recheck = (bool *) PG_GETARG_POINTER(5);
1230 bool res = true;
1231 int32 i;
1232
1233 if (strategy == JsonbContainsStrategyNumber)
1234 {
1235 /*
1236 * jsonb_path_ops is necessarily lossy, not only because of hash
1237 * collisions but also because it doesn't preserve complete
1238 * information about the structure of the JSON object. Besides, there
1239 * are some special rules around the containment of raw scalars in
1240 * arrays that are not handled here. So we must always recheck a
1241 * match. However, if not all of the keys are present, the tuple
1242 * certainly doesn't match.
1243 */
1244 *recheck = true;
1245 for (i = 0; i < nkeys; i++)
1246 {
1247 if (!check[i])
1248 {
1249 res = false;
1250 break;
1251 }
1252 }
1253 }
1254 else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
1255 strategy == JsonbJsonpathExistsStrategyNumber)
1256 {
1257 *recheck = true;
1258
1259 if (nkeys > 0)
1260 {
1261 Assert(extra_data && extra_data[0]);
1262 res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check,
1263 false) != GIN_FALSE;
1264 }
1265 }
1266 else
1267 elog(ERROR, "unrecognized strategy number: %d", strategy);
1268
1269 PG_RETURN_BOOL(res);
1270}
1271
1272Datum
1273gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS)
1274{
1275 GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
1276 StrategyNumber strategy = PG_GETARG_UINT16(1);
1277
1278 /* Jsonb *query = PG_GETARG_JSONB_P(2); */
1279 int32 nkeys = PG_GETARG_INT32(3);
1280 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
1281 GinTernaryValue res = GIN_MAYBE;
1282 int32 i;
1283
1284 if (strategy == JsonbContainsStrategyNumber)
1285 {
1286 /*
1287 * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE;
1288 * this corresponds to always forcing recheck in the regular
1289 * consistent function, for the reasons listed there.
1290 */
1291 for (i = 0; i < nkeys; i++)
1292 {
1293 if (check[i] == GIN_FALSE)
1294 {
1295 res = GIN_FALSE;
1296 break;
1297 }
1298 }
1299 }
1300 else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
1301 strategy == JsonbJsonpathExistsStrategyNumber)
1302 {
1303 if (nkeys > 0)
1304 {
1305 Assert(extra_data && extra_data[0]);
1306 res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check,
1307 true);
1308
1309 /* Should always recheck the result */
1310 if (res == GIN_TRUE)
1311 res = GIN_MAYBE;
1312 }
1313 }
1314 else
1315 elog(ERROR, "unrecognized strategy number: %d", strategy);
1316
1317 PG_RETURN_GIN_TERNARY_VALUE(res);
1318}
1319
1320/*
1321 * Construct a jsonb_ops GIN key from a flag byte and a textual representation
1322 * (which need not be null-terminated). This function is responsible
1323 * for hashing overlength text representations; it will add the
1324 * JGINFLAG_HASHED bit to the flag value if it does that.
1325 */
1326static Datum
1327make_text_key(char flag, const char *str, int len)
1328{
1329 text *item;
1330 char hashbuf[10];
1331
1332 if (len > JGIN_MAXLENGTH)
1333 {
1334 uint32 hashval;
1335
1336 hashval = DatumGetUInt32(hash_any((const unsigned char *) str, len));
1337 snprintf(hashbuf, sizeof(hashbuf), "%08x", hashval);
1338 str = hashbuf;
1339 len = 8;
1340 flag |= JGINFLAG_HASHED;
1341 }
1342
1343 /*
1344 * Now build the text Datum. For simplicity we build a 4-byte-header
1345 * varlena text Datum here, but we expect it will get converted to short
1346 * header format when stored in the index.
1347 */
1348 item = (text *) palloc(VARHDRSZ + len + 1);
1349 SET_VARSIZE(item, VARHDRSZ + len + 1);
1350
1351 *VARDATA(item) = flag;
1352
1353 memcpy(VARDATA(item) + 1, str, len);
1354
1355 return PointerGetDatum(item);
1356}
1357
1358/*
1359 * Create a textual representation of a JsonbValue that will serve as a GIN
1360 * key in a jsonb_ops index. is_key is true if the JsonbValue is a key,
1361 * or if it is a string array element (since we pretend those are keys,
1362 * see jsonb.h).
1363 */
1364static Datum
1365make_scalar_key(const JsonbValue *scalarVal, bool is_key)
1366{
1367 Datum item;
1368 char *cstr;
1369
1370 switch (scalarVal->type)
1371 {
1372 case jbvNull:
1373 Assert(!is_key);
1374 item = make_text_key(JGINFLAG_NULL, "", 0);
1375 break;
1376 case jbvBool:
1377 Assert(!is_key);
1378 item = make_text_key(JGINFLAG_BOOL,
1379 scalarVal->val.boolean ? "t" : "f", 1);
1380 break;
1381 case jbvNumeric:
1382 Assert(!is_key);
1383
1384 /*
1385 * A normalized textual representation, free of trailing zeroes,
1386 * is required so that numerically equal values will produce equal
1387 * strings.
1388 *
1389 * It isn't ideal that numerics are stored in a relatively bulky
1390 * textual format. However, it's a notationally convenient way of
1391 * storing a "union" type in the GIN B-Tree, and indexing Jsonb
1392 * strings takes precedence.
1393 */
1394 cstr = numeric_normalize(scalarVal->val.numeric);
1395 item = make_text_key(JGINFLAG_NUM, cstr, strlen(cstr));
1396 pfree(cstr);
1397 break;
1398 case jbvString:
1399 item = make_text_key(is_key ? JGINFLAG_KEY : JGINFLAG_STR,
1400 scalarVal->val.string.val,
1401 scalarVal->val.string.len);
1402 break;
1403 default:
1404 elog(ERROR, "unrecognized jsonb scalar type: %d", scalarVal->type);
1405 item = 0; /* keep compiler quiet */
1406 break;
1407 }
1408
1409 return item;
1410}
1411