| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * jsonb_gin.c |
| 4 | * GIN support functions for jsonb |
| 5 | * |
| 6 | * Copyright (c) 2014-2019, PostgreSQL Global Development Group |
| 7 | * |
| 8 | * We provide two opclasses for jsonb indexing: jsonb_ops and jsonb_path_ops. |
| 9 | * For their description see json.sgml and comments in jsonb.h. |
| 10 | * |
| 11 | * The operators support, among the others, "jsonb @? jsonpath" and |
| 12 | * "jsonb @@ jsonpath". Expressions containing these operators are easily |
| 13 | * expressed through each other. |
| 14 | * |
| 15 | * jb @? 'path' <=> jb @@ 'EXISTS(path)' |
| 16 | * jb @@ 'expr' <=> jb @? '$ ? (expr)' |
| 17 | * |
| 18 | * Thus, we're going to consider only @@ operator, while regarding @? operator |
| 19 | * the same is true for jb @@ 'EXISTS(path)'. |
| 20 | * |
| 21 | * Result of jsonpath query extraction is a tree, which leaf nodes are index |
| 22 | * entries and non-leaf nodes are AND/OR logical expressions. Basically we |
| 23 | * extract following statements out of jsonpath: |
| 24 | * |
| 25 | * 1) "accessors_chain = const", |
| 26 | * 2) "EXISTS(accessors_chain)". |
| 27 | * |
| 28 | * Accessors chain may consist of .key, [*] and [index] accessors. jsonb_ops |
| 29 | * additionally supports .* and .**. |
| 30 | * |
| 31 | * For now, both jsonb_ops and jsonb_path_ops supports only statements of |
| 32 | * the 1st find. jsonb_ops might also support statements of the 2nd kind, |
| 33 | * but given we have no statistics keys extracted from accessors chain |
| 34 | * are likely non-selective. Therefore, we choose to not confuse optimizer |
| 35 | * and skip statements of the 2nd kind altogether. In future versions that |
| 36 | * might be changed. |
| 37 | * |
| 38 | * In jsonb_ops statement of the 1st kind is split into expression of AND'ed |
| 39 | * keys and const. Sometimes const might be interpreted as both value or key |
| 40 | * in jsonb_ops. Then statement of 1st kind is decomposed into the expression |
| 41 | * below. |
| 42 | * |
| 43 | * key1 AND key2 AND ... AND keyN AND (const_as_value OR const_as_key) |
| 44 | * |
| 45 | * jsonb_path_ops transforms each statement of the 1st kind into single hash |
| 46 | * entry below. |
| 47 | * |
| 48 | * HASH(key1, key2, ... , keyN, const) |
| 49 | * |
| 50 | * Despite statements of the 2nd kind are not supported by both jsonb_ops and |
| 51 | * jsonb_path_ops, EXISTS(path) expressions might be still supported, |
| 52 | * when statements of 1st kind could be extracted out of their filters. |
| 53 | * |
| 54 | * IDENTIFICATION |
| 55 | * src/backend/utils/adt/jsonb_gin.c |
| 56 | * |
| 57 | *------------------------------------------------------------------------- |
| 58 | */ |
| 59 | |
| 60 | #include "postgres.h" |
| 61 | |
| 62 | #include "access/gin.h" |
| 63 | #include "access/stratnum.h" |
| 64 | #include "catalog/pg_collation.h" |
| 65 | #include "catalog/pg_type.h" |
| 66 | #include "miscadmin.h" |
| 67 | #include "utils/builtins.h" |
| 68 | #include "utils/hashutils.h" |
| 69 | #include "utils/jsonb.h" |
| 70 | #include "utils/jsonpath.h" |
| 71 | #include "utils/varlena.h" |
| 72 | |
| 73 | typedef struct PathHashStack |
| 74 | { |
| 75 | uint32 hash; |
| 76 | struct PathHashStack *parent; |
| 77 | } PathHashStack; |
| 78 | |
| 79 | /* Buffer for GIN entries */ |
| 80 | typedef struct GinEntries |
| 81 | { |
| 82 | Datum *buf; |
| 83 | int count; |
| 84 | int allocated; |
| 85 | } GinEntries; |
| 86 | |
| 87 | typedef enum JsonPathGinNodeType |
| 88 | { |
| 89 | JSP_GIN_OR, |
| 90 | JSP_GIN_AND, |
| 91 | JSP_GIN_ENTRY |
| 92 | } JsonPathGinNodeType; |
| 93 | |
| 94 | typedef struct JsonPathGinNode JsonPathGinNode; |
| 95 | |
| 96 | /* Node in jsonpath expression tree */ |
| 97 | struct JsonPathGinNode |
| 98 | { |
| 99 | JsonPathGinNodeType type; |
| 100 | union |
| 101 | { |
| 102 | int nargs; /* valid for OR and AND nodes */ |
| 103 | int entryIndex; /* index in GinEntries array, valid for ENTRY |
| 104 | * nodes after entries output */ |
| 105 | Datum entryDatum; /* path hash or key name/scalar, valid for |
| 106 | * ENTRY nodes before entries output */ |
| 107 | } val; |
| 108 | JsonPathGinNode *args[FLEXIBLE_ARRAY_MEMBER]; /* valid for OR and AND |
| 109 | * nodes */ |
| 110 | }; |
| 111 | |
| 112 | /* |
| 113 | * jsonb_ops entry extracted from jsonpath item. Corresponding path item |
| 114 | * may be: '.key', '.*', '.**', '[index]' or '[*]'. |
| 115 | * Entry type is stored in 'type' field. |
| 116 | */ |
| 117 | typedef struct JsonPathGinPathItem |
| 118 | { |
| 119 | struct JsonPathGinPathItem *parent; |
| 120 | Datum keyName; /* key name (for '.key' path item) or NULL */ |
| 121 | JsonPathItemType type; /* type of jsonpath item */ |
| 122 | } JsonPathGinPathItem; |
| 123 | |
| 124 | /* GIN representation of the extracted json path */ |
| 125 | typedef union JsonPathGinPath |
| 126 | { |
| 127 | JsonPathGinPathItem *items; /* list of path items (jsonb_ops) */ |
| 128 | uint32 hash; /* hash of the path (jsonb_path_ops) */ |
| 129 | } JsonPathGinPath; |
| 130 | |
| 131 | typedef struct JsonPathGinContext JsonPathGinContext; |
| 132 | |
| 133 | /* Callback, which stores information about path item into JsonPathGinPath */ |
| 134 | typedef bool (*JsonPathGinAddPathItemFunc) (JsonPathGinPath *path, |
| 135 | JsonPathItem *jsp); |
| 136 | |
| 137 | /* |
| 138 | * Callback, which extracts set of nodes from statement of 1st kind |
| 139 | * (scalar != NULL) or statement of 2nd kind (scalar == NULL). |
| 140 | */ |
| 141 | typedef List *(*) (JsonPathGinContext *cxt, |
| 142 | JsonPathGinPath path, |
| 143 | JsonbValue *scalar, |
| 144 | List *nodes); |
| 145 | |
| 146 | /* Context for jsonpath entries extraction */ |
| 147 | struct JsonPathGinContext |
| 148 | { |
| 149 | JsonPathGinAddPathItemFunc add_path_item; |
| 150 | JsonPathGinExtractNodesFunc ; |
| 151 | bool lax; |
| 152 | }; |
| 153 | |
| 154 | static Datum make_text_key(char flag, const char *str, int len); |
| 155 | static Datum make_scalar_key(const JsonbValue *scalarVal, bool is_key); |
| 156 | |
| 157 | static JsonPathGinNode *extract_jsp_bool_expr(JsonPathGinContext *cxt, |
| 158 | JsonPathGinPath path, JsonPathItem *jsp, bool not); |
| 159 | |
| 160 | |
| 161 | /* Initialize GinEntries struct */ |
| 162 | static void |
| 163 | init_gin_entries(GinEntries *entries, int preallocated) |
| 164 | { |
| 165 | entries->allocated = preallocated; |
| 166 | entries->buf = preallocated ? palloc(sizeof(Datum) * preallocated) : NULL; |
| 167 | entries->count = 0; |
| 168 | } |
| 169 | |
| 170 | /* Add new entry to GinEntries */ |
| 171 | static int |
| 172 | add_gin_entry(GinEntries *entries, Datum entry) |
| 173 | { |
| 174 | int id = entries->count; |
| 175 | |
| 176 | if (entries->count >= entries->allocated) |
| 177 | { |
| 178 | if (entries->allocated) |
| 179 | { |
| 180 | entries->allocated *= 2; |
| 181 | entries->buf = repalloc(entries->buf, |
| 182 | sizeof(Datum) * entries->allocated); |
| 183 | } |
| 184 | else |
| 185 | { |
| 186 | entries->allocated = 8; |
| 187 | entries->buf = palloc(sizeof(Datum) * entries->allocated); |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | entries->buf[entries->count++] = entry; |
| 192 | |
| 193 | return id; |
| 194 | } |
| 195 | |
| 196 | /* |
| 197 | * |
| 198 | * jsonb_ops GIN opclass support functions |
| 199 | * |
| 200 | */ |
| 201 | |
| 202 | Datum |
| 203 | gin_compare_jsonb(PG_FUNCTION_ARGS) |
| 204 | { |
| 205 | text *arg1 = PG_GETARG_TEXT_PP(0); |
| 206 | text *arg2 = PG_GETARG_TEXT_PP(1); |
| 207 | int32 result; |
| 208 | char *a1p, |
| 209 | *a2p; |
| 210 | int len1, |
| 211 | len2; |
| 212 | |
| 213 | a1p = VARDATA_ANY(arg1); |
| 214 | a2p = VARDATA_ANY(arg2); |
| 215 | |
| 216 | len1 = VARSIZE_ANY_EXHDR(arg1); |
| 217 | len2 = VARSIZE_ANY_EXHDR(arg2); |
| 218 | |
| 219 | /* Compare text as bttextcmp does, but always using C collation */ |
| 220 | result = varstr_cmp(a1p, len1, a2p, len2, C_COLLATION_OID); |
| 221 | |
| 222 | PG_FREE_IF_COPY(arg1, 0); |
| 223 | PG_FREE_IF_COPY(arg2, 1); |
| 224 | |
| 225 | PG_RETURN_INT32(result); |
| 226 | } |
| 227 | |
| 228 | Datum |
| 229 | (PG_FUNCTION_ARGS) |
| 230 | { |
| 231 | Jsonb *jb = (Jsonb *) PG_GETARG_JSONB_P(0); |
| 232 | int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
| 233 | int total = JB_ROOT_COUNT(jb); |
| 234 | JsonbIterator *it; |
| 235 | JsonbValue v; |
| 236 | JsonbIteratorToken r; |
| 237 | GinEntries entries; |
| 238 | |
| 239 | /* If the root level is empty, we certainly have no keys */ |
| 240 | if (total == 0) |
| 241 | { |
| 242 | *nentries = 0; |
| 243 | PG_RETURN_POINTER(NULL); |
| 244 | } |
| 245 | |
| 246 | /* Otherwise, use 2 * root count as initial estimate of result size */ |
| 247 | init_gin_entries(&entries, 2 * total); |
| 248 | |
| 249 | it = JsonbIteratorInit(&jb->root); |
| 250 | |
| 251 | while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) |
| 252 | { |
| 253 | switch (r) |
| 254 | { |
| 255 | case WJB_KEY: |
| 256 | add_gin_entry(&entries, make_scalar_key(&v, true)); |
| 257 | break; |
| 258 | case WJB_ELEM: |
| 259 | /* Pretend string array elements are keys, see jsonb.h */ |
| 260 | add_gin_entry(&entries, make_scalar_key(&v, v.type == jbvString)); |
| 261 | break; |
| 262 | case WJB_VALUE: |
| 263 | add_gin_entry(&entries, make_scalar_key(&v, false)); |
| 264 | break; |
| 265 | default: |
| 266 | /* we can ignore structural items */ |
| 267 | break; |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | *nentries = entries.count; |
| 272 | |
| 273 | PG_RETURN_POINTER(entries.buf); |
| 274 | } |
| 275 | |
| 276 | /* Append JsonPathGinPathItem to JsonPathGinPath (jsonb_ops) */ |
| 277 | static bool |
| 278 | jsonb_ops__add_path_item(JsonPathGinPath *path, JsonPathItem *jsp) |
| 279 | { |
| 280 | JsonPathGinPathItem *pentry; |
| 281 | Datum keyName; |
| 282 | |
| 283 | switch (jsp->type) |
| 284 | { |
| 285 | case jpiRoot: |
| 286 | path->items = NULL; /* reset path */ |
| 287 | return true; |
| 288 | |
| 289 | case jpiKey: |
| 290 | { |
| 291 | int len; |
| 292 | char *key = jspGetString(jsp, &len); |
| 293 | |
| 294 | keyName = make_text_key(JGINFLAG_KEY, key, len); |
| 295 | break; |
| 296 | } |
| 297 | |
| 298 | case jpiAny: |
| 299 | case jpiAnyKey: |
| 300 | case jpiAnyArray: |
| 301 | case jpiIndexArray: |
| 302 | keyName = PointerGetDatum(NULL); |
| 303 | break; |
| 304 | |
| 305 | default: |
| 306 | /* other path items like item methods are not supported */ |
| 307 | return false; |
| 308 | } |
| 309 | |
| 310 | pentry = palloc(sizeof(*pentry)); |
| 311 | |
| 312 | pentry->type = jsp->type; |
| 313 | pentry->keyName = keyName; |
| 314 | pentry->parent = path->items; |
| 315 | |
| 316 | path->items = pentry; |
| 317 | |
| 318 | return true; |
| 319 | } |
| 320 | |
| 321 | /* Combine existing path hash with next key hash (jsonb_path_ops) */ |
| 322 | static bool |
| 323 | jsonb_path_ops__add_path_item(JsonPathGinPath *path, JsonPathItem *jsp) |
| 324 | { |
| 325 | switch (jsp->type) |
| 326 | { |
| 327 | case jpiRoot: |
| 328 | path->hash = 0; /* reset path hash */ |
| 329 | return true; |
| 330 | |
| 331 | case jpiKey: |
| 332 | { |
| 333 | JsonbValue jbv; |
| 334 | |
| 335 | jbv.type = jbvString; |
| 336 | jbv.val.string.val = jspGetString(jsp, &jbv.val.string.len); |
| 337 | |
| 338 | JsonbHashScalarValue(&jbv, &path->hash); |
| 339 | return true; |
| 340 | } |
| 341 | |
| 342 | case jpiIndexArray: |
| 343 | case jpiAnyArray: |
| 344 | return true; /* path hash is unchanged */ |
| 345 | |
| 346 | default: |
| 347 | /* other items (wildcard paths, item methods) are not supported */ |
| 348 | return false; |
| 349 | } |
| 350 | } |
| 351 | |
| 352 | static JsonPathGinNode * |
| 353 | make_jsp_entry_node(Datum entry) |
| 354 | { |
| 355 | JsonPathGinNode *node = palloc(offsetof(JsonPathGinNode, args)); |
| 356 | |
| 357 | node->type = JSP_GIN_ENTRY; |
| 358 | node->val.entryDatum = entry; |
| 359 | |
| 360 | return node; |
| 361 | } |
| 362 | |
| 363 | static JsonPathGinNode * |
| 364 | make_jsp_entry_node_scalar(JsonbValue *scalar, bool iskey) |
| 365 | { |
| 366 | return make_jsp_entry_node(make_scalar_key(scalar, iskey)); |
| 367 | } |
| 368 | |
| 369 | static JsonPathGinNode * |
| 370 | make_jsp_expr_node(JsonPathGinNodeType type, int nargs) |
| 371 | { |
| 372 | JsonPathGinNode *node = palloc(offsetof(JsonPathGinNode, args) + |
| 373 | sizeof(node->args[0]) * nargs); |
| 374 | |
| 375 | node->type = type; |
| 376 | node->val.nargs = nargs; |
| 377 | |
| 378 | return node; |
| 379 | } |
| 380 | |
| 381 | static JsonPathGinNode * |
| 382 | make_jsp_expr_node_args(JsonPathGinNodeType type, List *args) |
| 383 | { |
| 384 | JsonPathGinNode *node = make_jsp_expr_node(type, list_length(args)); |
| 385 | ListCell *lc; |
| 386 | int i = 0; |
| 387 | |
| 388 | foreach(lc, args) |
| 389 | node->args[i++] = lfirst(lc); |
| 390 | |
| 391 | return node; |
| 392 | } |
| 393 | |
| 394 | static JsonPathGinNode * |
| 395 | make_jsp_expr_node_binary(JsonPathGinNodeType type, |
| 396 | JsonPathGinNode *arg1, JsonPathGinNode *arg2) |
| 397 | { |
| 398 | JsonPathGinNode *node = make_jsp_expr_node(type, 2); |
| 399 | |
| 400 | node->args[0] = arg1; |
| 401 | node->args[1] = arg2; |
| 402 | |
| 403 | return node; |
| 404 | } |
| 405 | |
| 406 | /* Append a list of nodes from the jsonpath (jsonb_ops). */ |
| 407 | static List * |
| 408 | (JsonPathGinContext *cxt, JsonPathGinPath path, |
| 409 | JsonbValue *scalar, List *nodes) |
| 410 | { |
| 411 | JsonPathGinPathItem *pentry; |
| 412 | |
| 413 | if (scalar) |
| 414 | { |
| 415 | JsonPathGinNode *node; |
| 416 | |
| 417 | /* |
| 418 | * Append path entry nodes only if scalar is provided. See header |
| 419 | * comment for details. |
| 420 | */ |
| 421 | for (pentry = path.items; pentry; pentry = pentry->parent) |
| 422 | { |
| 423 | if (pentry->type == jpiKey) /* only keys are indexed */ |
| 424 | nodes = lappend(nodes, make_jsp_entry_node(pentry->keyName)); |
| 425 | } |
| 426 | |
| 427 | /* Append scalar node for equality queries. */ |
| 428 | if (scalar->type == jbvString) |
| 429 | { |
| 430 | JsonPathGinPathItem *last = path.items; |
| 431 | GinTernaryValue key_entry; |
| 432 | |
| 433 | /* |
| 434 | * Assuming that jsonb_ops interprets string array elements as |
| 435 | * keys, we may extract key or non-key entry or even both. In the |
| 436 | * latter case we create OR-node. It is possible in lax mode |
| 437 | * where arrays are automatically unwrapped, or in strict mode for |
| 438 | * jpiAny items. |
| 439 | */ |
| 440 | |
| 441 | if (cxt->lax) |
| 442 | key_entry = GIN_MAYBE; |
| 443 | else if (!last) /* root ($) */ |
| 444 | key_entry = GIN_FALSE; |
| 445 | else if (last->type == jpiAnyArray || last->type == jpiIndexArray) |
| 446 | key_entry = GIN_TRUE; |
| 447 | else if (last->type == jpiAny) |
| 448 | key_entry = GIN_MAYBE; |
| 449 | else |
| 450 | key_entry = GIN_FALSE; |
| 451 | |
| 452 | if (key_entry == GIN_MAYBE) |
| 453 | { |
| 454 | JsonPathGinNode *n1 = make_jsp_entry_node_scalar(scalar, true); |
| 455 | JsonPathGinNode *n2 = make_jsp_entry_node_scalar(scalar, false); |
| 456 | |
| 457 | node = make_jsp_expr_node_binary(JSP_GIN_OR, n1, n2); |
| 458 | } |
| 459 | else |
| 460 | { |
| 461 | node = make_jsp_entry_node_scalar(scalar, |
| 462 | key_entry == GIN_TRUE); |
| 463 | } |
| 464 | } |
| 465 | else |
| 466 | { |
| 467 | node = make_jsp_entry_node_scalar(scalar, false); |
| 468 | } |
| 469 | |
| 470 | nodes = lappend(nodes, node); |
| 471 | } |
| 472 | |
| 473 | return nodes; |
| 474 | } |
| 475 | |
| 476 | /* Append a list of nodes from the jsonpath (jsonb_path_ops). */ |
| 477 | static List * |
| 478 | (JsonPathGinContext *cxt, JsonPathGinPath path, |
| 479 | JsonbValue *scalar, List *nodes) |
| 480 | { |
| 481 | if (scalar) |
| 482 | { |
| 483 | /* append path hash node for equality queries */ |
| 484 | uint32 hash = path.hash; |
| 485 | |
| 486 | JsonbHashScalarValue(scalar, &hash); |
| 487 | |
| 488 | return lappend(nodes, |
| 489 | make_jsp_entry_node(UInt32GetDatum(hash))); |
| 490 | } |
| 491 | else |
| 492 | { |
| 493 | /* jsonb_path_ops doesn't support EXISTS queries => nothing to append */ |
| 494 | return nodes; |
| 495 | } |
| 496 | } |
| 497 | |
| 498 | /* |
| 499 | * Extract a list of expression nodes that need to be AND-ed by the caller. |
| 500 | * Extracted expression is 'path == scalar' if 'scalar' is non-NULL, and |
| 501 | * 'EXISTS(path)' otherwise. |
| 502 | */ |
| 503 | static List * |
| 504 | (JsonPathGinContext *cxt, JsonPathGinPath path, |
| 505 | JsonPathItem *jsp, JsonbValue *scalar) |
| 506 | { |
| 507 | JsonPathItem next; |
| 508 | List *nodes = NIL; |
| 509 | |
| 510 | for (;;) |
| 511 | { |
| 512 | switch (jsp->type) |
| 513 | { |
| 514 | case jpiCurrent: |
| 515 | break; |
| 516 | |
| 517 | case jpiFilter: |
| 518 | { |
| 519 | JsonPathItem arg; |
| 520 | JsonPathGinNode *filter; |
| 521 | |
| 522 | jspGetArg(jsp, &arg); |
| 523 | |
| 524 | filter = extract_jsp_bool_expr(cxt, path, &arg, false); |
| 525 | |
| 526 | if (filter) |
| 527 | nodes = lappend(nodes, filter); |
| 528 | |
| 529 | break; |
| 530 | } |
| 531 | |
| 532 | default: |
| 533 | if (!cxt->add_path_item(&path, jsp)) |
| 534 | |
| 535 | /* |
| 536 | * Path is not supported by the index opclass, return only |
| 537 | * the extracted filter nodes. |
| 538 | */ |
| 539 | return nodes; |
| 540 | break; |
| 541 | } |
| 542 | |
| 543 | if (!jspGetNext(jsp, &next)) |
| 544 | break; |
| 545 | |
| 546 | jsp = &next; |
| 547 | } |
| 548 | |
| 549 | /* |
| 550 | * Append nodes from the path expression itself to the already extracted |
| 551 | * list of filter nodes. |
| 552 | */ |
| 553 | return cxt->extract_nodes(cxt, path, scalar, nodes); |
| 554 | } |
| 555 | |
| 556 | /* |
| 557 | * Extract an expression node from one of following jsonpath path expressions: |
| 558 | * EXISTS(jsp) (when 'scalar' is NULL) |
| 559 | * jsp == scalar (when 'scalar' is not NULL). |
| 560 | * |
| 561 | * The current path (@) is passed in 'path'. |
| 562 | */ |
| 563 | static JsonPathGinNode * |
| 564 | (JsonPathGinContext *cxt, JsonPathGinPath path, |
| 565 | JsonPathItem *jsp, JsonbValue *scalar) |
| 566 | { |
| 567 | /* extract a list of nodes to be AND-ed */ |
| 568 | List *nodes = extract_jsp_path_expr_nodes(cxt, path, jsp, scalar); |
| 569 | |
| 570 | if (list_length(nodes) <= 0) |
| 571 | /* no nodes were extracted => full scan is needed for this path */ |
| 572 | return NULL; |
| 573 | |
| 574 | if (list_length(nodes) == 1) |
| 575 | return linitial(nodes); /* avoid extra AND-node */ |
| 576 | |
| 577 | /* construct AND-node for path with filters */ |
| 578 | return make_jsp_expr_node_args(JSP_GIN_AND, nodes); |
| 579 | } |
| 580 | |
| 581 | /* Recursively extract nodes from the boolean jsonpath expression. */ |
| 582 | static JsonPathGinNode * |
| 583 | (JsonPathGinContext *cxt, JsonPathGinPath path, |
| 584 | JsonPathItem *jsp, bool not) |
| 585 | { |
| 586 | check_stack_depth(); |
| 587 | |
| 588 | switch (jsp->type) |
| 589 | { |
| 590 | case jpiAnd: /* expr && expr */ |
| 591 | case jpiOr: /* expr || expr */ |
| 592 | { |
| 593 | JsonPathItem arg; |
| 594 | JsonPathGinNode *larg; |
| 595 | JsonPathGinNode *rarg; |
| 596 | JsonPathGinNodeType type; |
| 597 | |
| 598 | jspGetLeftArg(jsp, &arg); |
| 599 | larg = extract_jsp_bool_expr(cxt, path, &arg, not); |
| 600 | |
| 601 | jspGetRightArg(jsp, &arg); |
| 602 | rarg = extract_jsp_bool_expr(cxt, path, &arg, not); |
| 603 | |
| 604 | if (!larg || !rarg) |
| 605 | { |
| 606 | if (jsp->type == jpiOr) |
| 607 | return NULL; |
| 608 | |
| 609 | return larg ? larg : rarg; |
| 610 | } |
| 611 | |
| 612 | type = not ^ (jsp->type == jpiAnd) ? JSP_GIN_AND : JSP_GIN_OR; |
| 613 | |
| 614 | return make_jsp_expr_node_binary(type, larg, rarg); |
| 615 | } |
| 616 | |
| 617 | case jpiNot: /* !expr */ |
| 618 | { |
| 619 | JsonPathItem arg; |
| 620 | |
| 621 | jspGetArg(jsp, &arg); |
| 622 | |
| 623 | /* extract child expression inverting 'not' flag */ |
| 624 | return extract_jsp_bool_expr(cxt, path, &arg, !not); |
| 625 | } |
| 626 | |
| 627 | case jpiExists: /* EXISTS(path) */ |
| 628 | { |
| 629 | JsonPathItem arg; |
| 630 | |
| 631 | if (not) |
| 632 | return NULL; /* NOT EXISTS is not supported */ |
| 633 | |
| 634 | jspGetArg(jsp, &arg); |
| 635 | |
| 636 | return extract_jsp_path_expr(cxt, path, &arg, NULL); |
| 637 | } |
| 638 | |
| 639 | case jpiNotEqual: |
| 640 | |
| 641 | /* |
| 642 | * 'not' == true case is not supported here because '!(path != |
| 643 | * scalar)' is not equivalent to 'path == scalar' in the general |
| 644 | * case because of sequence comparison semantics: 'path == scalar' |
| 645 | * === 'EXISTS (path, @ == scalar)', '!(path != scalar)' === |
| 646 | * 'FOR_ALL(path, @ == scalar)'. So, we should translate '!(path |
| 647 | * != scalar)' into GIN query 'path == scalar || EMPTY(path)', but |
| 648 | * 'EMPTY(path)' queries are not supported by the both jsonb |
| 649 | * opclasses. However in strict mode we could omit 'EMPTY(path)' |
| 650 | * part if the path can return exactly one item (it does not |
| 651 | * contain wildcard accessors or item methods like .keyvalue() |
| 652 | * etc.). |
| 653 | */ |
| 654 | return NULL; |
| 655 | |
| 656 | case jpiEqual: /* path == scalar */ |
| 657 | { |
| 658 | JsonPathItem left_item; |
| 659 | JsonPathItem right_item; |
| 660 | JsonPathItem *path_item; |
| 661 | JsonPathItem *scalar_item; |
| 662 | JsonbValue scalar; |
| 663 | |
| 664 | if (not) |
| 665 | return NULL; |
| 666 | |
| 667 | jspGetLeftArg(jsp, &left_item); |
| 668 | jspGetRightArg(jsp, &right_item); |
| 669 | |
| 670 | if (jspIsScalar(left_item.type)) |
| 671 | { |
| 672 | scalar_item = &left_item; |
| 673 | path_item = &right_item; |
| 674 | } |
| 675 | else if (jspIsScalar(right_item.type)) |
| 676 | { |
| 677 | scalar_item = &right_item; |
| 678 | path_item = &left_item; |
| 679 | } |
| 680 | else |
| 681 | return NULL; /* at least one operand should be a scalar */ |
| 682 | |
| 683 | switch (scalar_item->type) |
| 684 | { |
| 685 | case jpiNull: |
| 686 | scalar.type = jbvNull; |
| 687 | break; |
| 688 | case jpiBool: |
| 689 | scalar.type = jbvBool; |
| 690 | scalar.val.boolean = !!*scalar_item->content.value.data; |
| 691 | break; |
| 692 | case jpiNumeric: |
| 693 | scalar.type = jbvNumeric; |
| 694 | scalar.val.numeric = |
| 695 | (Numeric) scalar_item->content.value.data; |
| 696 | break; |
| 697 | case jpiString: |
| 698 | scalar.type = jbvString; |
| 699 | scalar.val.string.val = scalar_item->content.value.data; |
| 700 | scalar.val.string.len = |
| 701 | scalar_item->content.value.datalen; |
| 702 | break; |
| 703 | default: |
| 704 | elog(ERROR, "invalid scalar jsonpath item type: %d" , |
| 705 | scalar_item->type); |
| 706 | return NULL; |
| 707 | } |
| 708 | |
| 709 | return extract_jsp_path_expr(cxt, path, path_item, &scalar); |
| 710 | } |
| 711 | |
| 712 | default: |
| 713 | return NULL; /* not a boolean expression */ |
| 714 | } |
| 715 | } |
| 716 | |
| 717 | /* Recursively emit all GIN entries found in the node tree */ |
| 718 | static void |
| 719 | emit_jsp_gin_entries(JsonPathGinNode *node, GinEntries *entries) |
| 720 | { |
| 721 | check_stack_depth(); |
| 722 | |
| 723 | switch (node->type) |
| 724 | { |
| 725 | case JSP_GIN_ENTRY: |
| 726 | /* replace datum with its index in the array */ |
| 727 | node->val.entryIndex = add_gin_entry(entries, node->val.entryDatum); |
| 728 | break; |
| 729 | |
| 730 | case JSP_GIN_OR: |
| 731 | case JSP_GIN_AND: |
| 732 | { |
| 733 | int i; |
| 734 | |
| 735 | for (i = 0; i < node->val.nargs; i++) |
| 736 | emit_jsp_gin_entries(node->args[i], entries); |
| 737 | |
| 738 | break; |
| 739 | } |
| 740 | } |
| 741 | } |
| 742 | |
| 743 | /* |
| 744 | * Recursively extract GIN entries from jsonpath query. |
| 745 | * Root expression node is put into (*extra_data)[0]. |
| 746 | */ |
| 747 | static Datum * |
| 748 | (JsonPath *jp, StrategyNumber strat, bool pathOps, |
| 749 | int32 *nentries, Pointer **) |
| 750 | { |
| 751 | JsonPathGinContext cxt; |
| 752 | JsonPathItem root; |
| 753 | JsonPathGinNode *node; |
| 754 | JsonPathGinPath path = {0}; |
| 755 | GinEntries entries = {0}; |
| 756 | |
| 757 | cxt.lax = (jp->header & JSONPATH_LAX) != 0; |
| 758 | |
| 759 | if (pathOps) |
| 760 | { |
| 761 | cxt.add_path_item = jsonb_path_ops__add_path_item; |
| 762 | cxt.extract_nodes = jsonb_path_ops__extract_nodes; |
| 763 | } |
| 764 | else |
| 765 | { |
| 766 | cxt.add_path_item = jsonb_ops__add_path_item; |
| 767 | cxt.extract_nodes = jsonb_ops__extract_nodes; |
| 768 | } |
| 769 | |
| 770 | jspInit(&root, jp); |
| 771 | |
| 772 | node = strat == JsonbJsonpathExistsStrategyNumber |
| 773 | ? extract_jsp_path_expr(&cxt, path, &root, NULL) |
| 774 | : extract_jsp_bool_expr(&cxt, path, &root, false); |
| 775 | |
| 776 | if (!node) |
| 777 | { |
| 778 | *nentries = 0; |
| 779 | return NULL; |
| 780 | } |
| 781 | |
| 782 | emit_jsp_gin_entries(node, &entries); |
| 783 | |
| 784 | *nentries = entries.count; |
| 785 | if (!*nentries) |
| 786 | return NULL; |
| 787 | |
| 788 | *extra_data = palloc0(sizeof(**extra_data) * entries.count); |
| 789 | **extra_data = (Pointer) node; |
| 790 | |
| 791 | return entries.buf; |
| 792 | } |
| 793 | |
| 794 | /* |
| 795 | * Recursively execute jsonpath expression. |
| 796 | * 'check' is a bool[] or a GinTernaryValue[] depending on 'ternary' flag. |
| 797 | */ |
| 798 | static GinTernaryValue |
| 799 | execute_jsp_gin_node(JsonPathGinNode *node, void *check, bool ternary) |
| 800 | { |
| 801 | GinTernaryValue res; |
| 802 | GinTernaryValue v; |
| 803 | int i; |
| 804 | |
| 805 | switch (node->type) |
| 806 | { |
| 807 | case JSP_GIN_AND: |
| 808 | res = GIN_TRUE; |
| 809 | for (i = 0; i < node->val.nargs; i++) |
| 810 | { |
| 811 | v = execute_jsp_gin_node(node->args[i], check, ternary); |
| 812 | if (v == GIN_FALSE) |
| 813 | return GIN_FALSE; |
| 814 | else if (v == GIN_MAYBE) |
| 815 | res = GIN_MAYBE; |
| 816 | } |
| 817 | return res; |
| 818 | |
| 819 | case JSP_GIN_OR: |
| 820 | res = GIN_FALSE; |
| 821 | for (i = 0; i < node->val.nargs; i++) |
| 822 | { |
| 823 | v = execute_jsp_gin_node(node->args[i], check, ternary); |
| 824 | if (v == GIN_TRUE) |
| 825 | return GIN_TRUE; |
| 826 | else if (v == GIN_MAYBE) |
| 827 | res = GIN_MAYBE; |
| 828 | } |
| 829 | return res; |
| 830 | |
| 831 | case JSP_GIN_ENTRY: |
| 832 | { |
| 833 | int index = node->val.entryIndex; |
| 834 | |
| 835 | if (ternary) |
| 836 | return ((GinTernaryValue *) check)[index]; |
| 837 | else |
| 838 | return ((bool *) check)[index] ? GIN_TRUE : GIN_FALSE; |
| 839 | } |
| 840 | |
| 841 | default: |
| 842 | elog(ERROR, "invalid jsonpath gin node type: %d" , node->type); |
| 843 | return GIN_FALSE; /* keep compiler quiet */ |
| 844 | } |
| 845 | } |
| 846 | |
| 847 | Datum |
| 848 | (PG_FUNCTION_ARGS) |
| 849 | { |
| 850 | int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
| 851 | StrategyNumber strategy = PG_GETARG_UINT16(2); |
| 852 | int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); |
| 853 | Datum *entries; |
| 854 | |
| 855 | if (strategy == JsonbContainsStrategyNumber) |
| 856 | { |
| 857 | /* Query is a jsonb, so just apply gin_extract_jsonb... */ |
| 858 | entries = (Datum *) |
| 859 | DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb, |
| 860 | PG_GETARG_DATUM(0), |
| 861 | PointerGetDatum(nentries))); |
| 862 | /* ...although "contains {}" requires a full index scan */ |
| 863 | if (*nentries == 0) |
| 864 | *searchMode = GIN_SEARCH_MODE_ALL; |
| 865 | } |
| 866 | else if (strategy == JsonbExistsStrategyNumber) |
| 867 | { |
| 868 | /* Query is a text string, which we treat as a key */ |
| 869 | text *query = PG_GETARG_TEXT_PP(0); |
| 870 | |
| 871 | *nentries = 1; |
| 872 | entries = (Datum *) palloc(sizeof(Datum)); |
| 873 | entries[0] = make_text_key(JGINFLAG_KEY, |
| 874 | VARDATA_ANY(query), |
| 875 | VARSIZE_ANY_EXHDR(query)); |
| 876 | } |
| 877 | else if (strategy == JsonbExistsAnyStrategyNumber || |
| 878 | strategy == JsonbExistsAllStrategyNumber) |
| 879 | { |
| 880 | /* Query is a text array; each element is treated as a key */ |
| 881 | ArrayType *query = PG_GETARG_ARRAYTYPE_P(0); |
| 882 | Datum *key_datums; |
| 883 | bool *key_nulls; |
| 884 | int key_count; |
| 885 | int i, |
| 886 | j; |
| 887 | |
| 888 | deconstruct_array(query, |
| 889 | TEXTOID, -1, false, 'i', |
| 890 | &key_datums, &key_nulls, &key_count); |
| 891 | |
| 892 | entries = (Datum *) palloc(sizeof(Datum) * key_count); |
| 893 | |
| 894 | for (i = 0, j = 0; i < key_count; i++) |
| 895 | { |
| 896 | /* Nulls in the array are ignored */ |
| 897 | if (key_nulls[i]) |
| 898 | continue; |
| 899 | entries[j++] = make_text_key(JGINFLAG_KEY, |
| 900 | VARDATA(key_datums[i]), |
| 901 | VARSIZE(key_datums[i]) - VARHDRSZ); |
| 902 | } |
| 903 | |
| 904 | *nentries = j; |
| 905 | /* ExistsAll with no keys should match everything */ |
| 906 | if (j == 0 && strategy == JsonbExistsAllStrategyNumber) |
| 907 | *searchMode = GIN_SEARCH_MODE_ALL; |
| 908 | } |
| 909 | else if (strategy == JsonbJsonpathPredicateStrategyNumber || |
| 910 | strategy == JsonbJsonpathExistsStrategyNumber) |
| 911 | { |
| 912 | JsonPath *jp = PG_GETARG_JSONPATH_P(0); |
| 913 | Pointer ** = (Pointer **) PG_GETARG_POINTER(4); |
| 914 | |
| 915 | entries = extract_jsp_query(jp, strategy, false, nentries, extra_data); |
| 916 | |
| 917 | if (!entries) |
| 918 | *searchMode = GIN_SEARCH_MODE_ALL; |
| 919 | } |
| 920 | else |
| 921 | { |
| 922 | elog(ERROR, "unrecognized strategy number: %d" , strategy); |
| 923 | entries = NULL; /* keep compiler quiet */ |
| 924 | } |
| 925 | |
| 926 | PG_RETURN_POINTER(entries); |
| 927 | } |
| 928 | |
| 929 | Datum |
| 930 | gin_consistent_jsonb(PG_FUNCTION_ARGS) |
| 931 | { |
| 932 | bool *check = (bool *) PG_GETARG_POINTER(0); |
| 933 | StrategyNumber strategy = PG_GETARG_UINT16(1); |
| 934 | |
| 935 | /* Jsonb *query = PG_GETARG_JSONB_P(2); */ |
| 936 | int32 nkeys = PG_GETARG_INT32(3); |
| 937 | |
| 938 | Pointer * = (Pointer *) PG_GETARG_POINTER(4); |
| 939 | bool *recheck = (bool *) PG_GETARG_POINTER(5); |
| 940 | bool res = true; |
| 941 | int32 i; |
| 942 | |
| 943 | if (strategy == JsonbContainsStrategyNumber) |
| 944 | { |
| 945 | /* |
| 946 | * We must always recheck, since we can't tell from the index whether |
| 947 | * the positions of the matched items match the structure of the query |
| 948 | * object. (Even if we could, we'd also have to worry about hashed |
| 949 | * keys and the index's failure to distinguish keys from string array |
| 950 | * elements.) However, the tuple certainly doesn't match unless it |
| 951 | * contains all the query keys. |
| 952 | */ |
| 953 | *recheck = true; |
| 954 | for (i = 0; i < nkeys; i++) |
| 955 | { |
| 956 | if (!check[i]) |
| 957 | { |
| 958 | res = false; |
| 959 | break; |
| 960 | } |
| 961 | } |
| 962 | } |
| 963 | else if (strategy == JsonbExistsStrategyNumber) |
| 964 | { |
| 965 | /* |
| 966 | * Although the key is certainly present in the index, we must recheck |
| 967 | * because (1) the key might be hashed, and (2) the index match might |
| 968 | * be for a key that's not at top level of the JSON object. For (1), |
| 969 | * we could look at the query key to see if it's hashed and not |
| 970 | * recheck if not, but the index lacks enough info to tell about (2). |
| 971 | */ |
| 972 | *recheck = true; |
| 973 | res = true; |
| 974 | } |
| 975 | else if (strategy == JsonbExistsAnyStrategyNumber) |
| 976 | { |
| 977 | /* As for plain exists, we must recheck */ |
| 978 | *recheck = true; |
| 979 | res = true; |
| 980 | } |
| 981 | else if (strategy == JsonbExistsAllStrategyNumber) |
| 982 | { |
| 983 | /* As for plain exists, we must recheck */ |
| 984 | *recheck = true; |
| 985 | /* ... but unless all the keys are present, we can say "false" */ |
| 986 | for (i = 0; i < nkeys; i++) |
| 987 | { |
| 988 | if (!check[i]) |
| 989 | { |
| 990 | res = false; |
| 991 | break; |
| 992 | } |
| 993 | } |
| 994 | } |
| 995 | else if (strategy == JsonbJsonpathPredicateStrategyNumber || |
| 996 | strategy == JsonbJsonpathExistsStrategyNumber) |
| 997 | { |
| 998 | *recheck = true; |
| 999 | |
| 1000 | if (nkeys > 0) |
| 1001 | { |
| 1002 | Assert(extra_data && extra_data[0]); |
| 1003 | res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check, |
| 1004 | false) != GIN_FALSE; |
| 1005 | } |
| 1006 | } |
| 1007 | else |
| 1008 | elog(ERROR, "unrecognized strategy number: %d" , strategy); |
| 1009 | |
| 1010 | PG_RETURN_BOOL(res); |
| 1011 | } |
| 1012 | |
| 1013 | Datum |
| 1014 | gin_triconsistent_jsonb(PG_FUNCTION_ARGS) |
| 1015 | { |
| 1016 | GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); |
| 1017 | StrategyNumber strategy = PG_GETARG_UINT16(1); |
| 1018 | |
| 1019 | /* Jsonb *query = PG_GETARG_JSONB_P(2); */ |
| 1020 | int32 nkeys = PG_GETARG_INT32(3); |
| 1021 | Pointer * = (Pointer *) PG_GETARG_POINTER(4); |
| 1022 | GinTernaryValue res = GIN_MAYBE; |
| 1023 | int32 i; |
| 1024 | |
| 1025 | /* |
| 1026 | * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this |
| 1027 | * corresponds to always forcing recheck in the regular consistent |
| 1028 | * function, for the reasons listed there. |
| 1029 | */ |
| 1030 | if (strategy == JsonbContainsStrategyNumber || |
| 1031 | strategy == JsonbExistsAllStrategyNumber) |
| 1032 | { |
| 1033 | /* All extracted keys must be present */ |
| 1034 | for (i = 0; i < nkeys; i++) |
| 1035 | { |
| 1036 | if (check[i] == GIN_FALSE) |
| 1037 | { |
| 1038 | res = GIN_FALSE; |
| 1039 | break; |
| 1040 | } |
| 1041 | } |
| 1042 | } |
| 1043 | else if (strategy == JsonbExistsStrategyNumber || |
| 1044 | strategy == JsonbExistsAnyStrategyNumber) |
| 1045 | { |
| 1046 | /* At least one extracted key must be present */ |
| 1047 | res = GIN_FALSE; |
| 1048 | for (i = 0; i < nkeys; i++) |
| 1049 | { |
| 1050 | if (check[i] == GIN_TRUE || |
| 1051 | check[i] == GIN_MAYBE) |
| 1052 | { |
| 1053 | res = GIN_MAYBE; |
| 1054 | break; |
| 1055 | } |
| 1056 | } |
| 1057 | } |
| 1058 | else if (strategy == JsonbJsonpathPredicateStrategyNumber || |
| 1059 | strategy == JsonbJsonpathExistsStrategyNumber) |
| 1060 | { |
| 1061 | if (nkeys > 0) |
| 1062 | { |
| 1063 | Assert(extra_data && extra_data[0]); |
| 1064 | res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check, |
| 1065 | true); |
| 1066 | |
| 1067 | /* Should always recheck the result */ |
| 1068 | if (res == GIN_TRUE) |
| 1069 | res = GIN_MAYBE; |
| 1070 | } |
| 1071 | } |
| 1072 | else |
| 1073 | elog(ERROR, "unrecognized strategy number: %d" , strategy); |
| 1074 | |
| 1075 | PG_RETURN_GIN_TERNARY_VALUE(res); |
| 1076 | } |
| 1077 | |
| 1078 | /* |
| 1079 | * |
| 1080 | * jsonb_path_ops GIN opclass support functions |
| 1081 | * |
| 1082 | * In a jsonb_path_ops index, the GIN keys are uint32 hashes, one per JSON |
| 1083 | * value; but the JSON key(s) leading to each value are also included in its |
| 1084 | * hash computation. This means we can only support containment queries, |
| 1085 | * but the index can distinguish, for example, {"foo": 42} from {"bar": 42} |
| 1086 | * since different hashes will be generated. |
| 1087 | * |
| 1088 | */ |
| 1089 | |
| 1090 | Datum |
| 1091 | (PG_FUNCTION_ARGS) |
| 1092 | { |
| 1093 | Jsonb *jb = PG_GETARG_JSONB_P(0); |
| 1094 | int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
| 1095 | int total = JB_ROOT_COUNT(jb); |
| 1096 | JsonbIterator *it; |
| 1097 | JsonbValue v; |
| 1098 | JsonbIteratorToken r; |
| 1099 | PathHashStack tail; |
| 1100 | PathHashStack *stack; |
| 1101 | GinEntries entries; |
| 1102 | |
| 1103 | /* If the root level is empty, we certainly have no keys */ |
| 1104 | if (total == 0) |
| 1105 | { |
| 1106 | *nentries = 0; |
| 1107 | PG_RETURN_POINTER(NULL); |
| 1108 | } |
| 1109 | |
| 1110 | /* Otherwise, use 2 * root count as initial estimate of result size */ |
| 1111 | init_gin_entries(&entries, 2 * total); |
| 1112 | |
| 1113 | /* We keep a stack of partial hashes corresponding to parent key levels */ |
| 1114 | tail.parent = NULL; |
| 1115 | tail.hash = 0; |
| 1116 | stack = &tail; |
| 1117 | |
| 1118 | it = JsonbIteratorInit(&jb->root); |
| 1119 | |
| 1120 | while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) |
| 1121 | { |
| 1122 | PathHashStack *parent; |
| 1123 | |
| 1124 | switch (r) |
| 1125 | { |
| 1126 | case WJB_BEGIN_ARRAY: |
| 1127 | case WJB_BEGIN_OBJECT: |
| 1128 | /* Push a stack level for this object */ |
| 1129 | parent = stack; |
| 1130 | stack = (PathHashStack *) palloc(sizeof(PathHashStack)); |
| 1131 | |
| 1132 | /* |
| 1133 | * We pass forward hashes from outer nesting levels so that |
| 1134 | * the hashes for nested values will include outer keys as |
| 1135 | * well as their own keys. |
| 1136 | * |
| 1137 | * Nesting an array within another array will not alter |
| 1138 | * innermost scalar element hash values, but that seems |
| 1139 | * inconsequential. |
| 1140 | */ |
| 1141 | stack->hash = parent->hash; |
| 1142 | stack->parent = parent; |
| 1143 | break; |
| 1144 | case WJB_KEY: |
| 1145 | /* mix this key into the current outer hash */ |
| 1146 | JsonbHashScalarValue(&v, &stack->hash); |
| 1147 | /* hash is now ready to incorporate the value */ |
| 1148 | break; |
| 1149 | case WJB_ELEM: |
| 1150 | case WJB_VALUE: |
| 1151 | /* mix the element or value's hash into the prepared hash */ |
| 1152 | JsonbHashScalarValue(&v, &stack->hash); |
| 1153 | /* and emit an index entry */ |
| 1154 | add_gin_entry(&entries, UInt32GetDatum(stack->hash)); |
| 1155 | /* reset hash for next key, value, or sub-object */ |
| 1156 | stack->hash = stack->parent->hash; |
| 1157 | break; |
| 1158 | case WJB_END_ARRAY: |
| 1159 | case WJB_END_OBJECT: |
| 1160 | /* Pop the stack */ |
| 1161 | parent = stack->parent; |
| 1162 | pfree(stack); |
| 1163 | stack = parent; |
| 1164 | /* reset hash for next key, value, or sub-object */ |
| 1165 | if (stack->parent) |
| 1166 | stack->hash = stack->parent->hash; |
| 1167 | else |
| 1168 | stack->hash = 0; |
| 1169 | break; |
| 1170 | default: |
| 1171 | elog(ERROR, "invalid JsonbIteratorNext rc: %d" , (int) r); |
| 1172 | } |
| 1173 | } |
| 1174 | |
| 1175 | *nentries = entries.count; |
| 1176 | |
| 1177 | PG_RETURN_POINTER(entries.buf); |
| 1178 | } |
| 1179 | |
| 1180 | Datum |
| 1181 | (PG_FUNCTION_ARGS) |
| 1182 | { |
| 1183 | int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
| 1184 | StrategyNumber strategy = PG_GETARG_UINT16(2); |
| 1185 | int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); |
| 1186 | Datum *entries; |
| 1187 | |
| 1188 | if (strategy == JsonbContainsStrategyNumber) |
| 1189 | { |
| 1190 | /* Query is a jsonb, so just apply gin_extract_jsonb_path ... */ |
| 1191 | entries = (Datum *) |
| 1192 | DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_path, |
| 1193 | PG_GETARG_DATUM(0), |
| 1194 | PointerGetDatum(nentries))); |
| 1195 | |
| 1196 | /* ... although "contains {}" requires a full index scan */ |
| 1197 | if (*nentries == 0) |
| 1198 | *searchMode = GIN_SEARCH_MODE_ALL; |
| 1199 | } |
| 1200 | else if (strategy == JsonbJsonpathPredicateStrategyNumber || |
| 1201 | strategy == JsonbJsonpathExistsStrategyNumber) |
| 1202 | { |
| 1203 | JsonPath *jp = PG_GETARG_JSONPATH_P(0); |
| 1204 | Pointer ** = (Pointer **) PG_GETARG_POINTER(4); |
| 1205 | |
| 1206 | entries = extract_jsp_query(jp, strategy, true, nentries, extra_data); |
| 1207 | |
| 1208 | if (!entries) |
| 1209 | *searchMode = GIN_SEARCH_MODE_ALL; |
| 1210 | } |
| 1211 | else |
| 1212 | { |
| 1213 | elog(ERROR, "unrecognized strategy number: %d" , strategy); |
| 1214 | entries = NULL; |
| 1215 | } |
| 1216 | |
| 1217 | PG_RETURN_POINTER(entries); |
| 1218 | } |
| 1219 | |
| 1220 | Datum |
| 1221 | gin_consistent_jsonb_path(PG_FUNCTION_ARGS) |
| 1222 | { |
| 1223 | bool *check = (bool *) PG_GETARG_POINTER(0); |
| 1224 | StrategyNumber strategy = PG_GETARG_UINT16(1); |
| 1225 | |
| 1226 | /* Jsonb *query = PG_GETARG_JSONB_P(2); */ |
| 1227 | int32 nkeys = PG_GETARG_INT32(3); |
| 1228 | Pointer * = (Pointer *) PG_GETARG_POINTER(4); |
| 1229 | bool *recheck = (bool *) PG_GETARG_POINTER(5); |
| 1230 | bool res = true; |
| 1231 | int32 i; |
| 1232 | |
| 1233 | if (strategy == JsonbContainsStrategyNumber) |
| 1234 | { |
| 1235 | /* |
| 1236 | * jsonb_path_ops is necessarily lossy, not only because of hash |
| 1237 | * collisions but also because it doesn't preserve complete |
| 1238 | * information about the structure of the JSON object. Besides, there |
| 1239 | * are some special rules around the containment of raw scalars in |
| 1240 | * arrays that are not handled here. So we must always recheck a |
| 1241 | * match. However, if not all of the keys are present, the tuple |
| 1242 | * certainly doesn't match. |
| 1243 | */ |
| 1244 | *recheck = true; |
| 1245 | for (i = 0; i < nkeys; i++) |
| 1246 | { |
| 1247 | if (!check[i]) |
| 1248 | { |
| 1249 | res = false; |
| 1250 | break; |
| 1251 | } |
| 1252 | } |
| 1253 | } |
| 1254 | else if (strategy == JsonbJsonpathPredicateStrategyNumber || |
| 1255 | strategy == JsonbJsonpathExistsStrategyNumber) |
| 1256 | { |
| 1257 | *recheck = true; |
| 1258 | |
| 1259 | if (nkeys > 0) |
| 1260 | { |
| 1261 | Assert(extra_data && extra_data[0]); |
| 1262 | res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check, |
| 1263 | false) != GIN_FALSE; |
| 1264 | } |
| 1265 | } |
| 1266 | else |
| 1267 | elog(ERROR, "unrecognized strategy number: %d" , strategy); |
| 1268 | |
| 1269 | PG_RETURN_BOOL(res); |
| 1270 | } |
| 1271 | |
| 1272 | Datum |
| 1273 | gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS) |
| 1274 | { |
| 1275 | GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); |
| 1276 | StrategyNumber strategy = PG_GETARG_UINT16(1); |
| 1277 | |
| 1278 | /* Jsonb *query = PG_GETARG_JSONB_P(2); */ |
| 1279 | int32 nkeys = PG_GETARG_INT32(3); |
| 1280 | Pointer * = (Pointer *) PG_GETARG_POINTER(4); |
| 1281 | GinTernaryValue res = GIN_MAYBE; |
| 1282 | int32 i; |
| 1283 | |
| 1284 | if (strategy == JsonbContainsStrategyNumber) |
| 1285 | { |
| 1286 | /* |
| 1287 | * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; |
| 1288 | * this corresponds to always forcing recheck in the regular |
| 1289 | * consistent function, for the reasons listed there. |
| 1290 | */ |
| 1291 | for (i = 0; i < nkeys; i++) |
| 1292 | { |
| 1293 | if (check[i] == GIN_FALSE) |
| 1294 | { |
| 1295 | res = GIN_FALSE; |
| 1296 | break; |
| 1297 | } |
| 1298 | } |
| 1299 | } |
| 1300 | else if (strategy == JsonbJsonpathPredicateStrategyNumber || |
| 1301 | strategy == JsonbJsonpathExistsStrategyNumber) |
| 1302 | { |
| 1303 | if (nkeys > 0) |
| 1304 | { |
| 1305 | Assert(extra_data && extra_data[0]); |
| 1306 | res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check, |
| 1307 | true); |
| 1308 | |
| 1309 | /* Should always recheck the result */ |
| 1310 | if (res == GIN_TRUE) |
| 1311 | res = GIN_MAYBE; |
| 1312 | } |
| 1313 | } |
| 1314 | else |
| 1315 | elog(ERROR, "unrecognized strategy number: %d" , strategy); |
| 1316 | |
| 1317 | PG_RETURN_GIN_TERNARY_VALUE(res); |
| 1318 | } |
| 1319 | |
| 1320 | /* |
| 1321 | * Construct a jsonb_ops GIN key from a flag byte and a textual representation |
| 1322 | * (which need not be null-terminated). This function is responsible |
| 1323 | * for hashing overlength text representations; it will add the |
| 1324 | * JGINFLAG_HASHED bit to the flag value if it does that. |
| 1325 | */ |
| 1326 | static Datum |
| 1327 | make_text_key(char flag, const char *str, int len) |
| 1328 | { |
| 1329 | text *item; |
| 1330 | char hashbuf[10]; |
| 1331 | |
| 1332 | if (len > JGIN_MAXLENGTH) |
| 1333 | { |
| 1334 | uint32 hashval; |
| 1335 | |
| 1336 | hashval = DatumGetUInt32(hash_any((const unsigned char *) str, len)); |
| 1337 | snprintf(hashbuf, sizeof(hashbuf), "%08x" , hashval); |
| 1338 | str = hashbuf; |
| 1339 | len = 8; |
| 1340 | flag |= JGINFLAG_HASHED; |
| 1341 | } |
| 1342 | |
| 1343 | /* |
| 1344 | * Now build the text Datum. For simplicity we build a 4-byte-header |
| 1345 | * varlena text Datum here, but we expect it will get converted to short |
| 1346 | * header format when stored in the index. |
| 1347 | */ |
| 1348 | item = (text *) palloc(VARHDRSZ + len + 1); |
| 1349 | SET_VARSIZE(item, VARHDRSZ + len + 1); |
| 1350 | |
| 1351 | *VARDATA(item) = flag; |
| 1352 | |
| 1353 | memcpy(VARDATA(item) + 1, str, len); |
| 1354 | |
| 1355 | return PointerGetDatum(item); |
| 1356 | } |
| 1357 | |
| 1358 | /* |
| 1359 | * Create a textual representation of a JsonbValue that will serve as a GIN |
| 1360 | * key in a jsonb_ops index. is_key is true if the JsonbValue is a key, |
| 1361 | * or if it is a string array element (since we pretend those are keys, |
| 1362 | * see jsonb.h). |
| 1363 | */ |
| 1364 | static Datum |
| 1365 | make_scalar_key(const JsonbValue *scalarVal, bool is_key) |
| 1366 | { |
| 1367 | Datum item; |
| 1368 | char *cstr; |
| 1369 | |
| 1370 | switch (scalarVal->type) |
| 1371 | { |
| 1372 | case jbvNull: |
| 1373 | Assert(!is_key); |
| 1374 | item = make_text_key(JGINFLAG_NULL, "" , 0); |
| 1375 | break; |
| 1376 | case jbvBool: |
| 1377 | Assert(!is_key); |
| 1378 | item = make_text_key(JGINFLAG_BOOL, |
| 1379 | scalarVal->val.boolean ? "t" : "f" , 1); |
| 1380 | break; |
| 1381 | case jbvNumeric: |
| 1382 | Assert(!is_key); |
| 1383 | |
| 1384 | /* |
| 1385 | * A normalized textual representation, free of trailing zeroes, |
| 1386 | * is required so that numerically equal values will produce equal |
| 1387 | * strings. |
| 1388 | * |
| 1389 | * It isn't ideal that numerics are stored in a relatively bulky |
| 1390 | * textual format. However, it's a notationally convenient way of |
| 1391 | * storing a "union" type in the GIN B-Tree, and indexing Jsonb |
| 1392 | * strings takes precedence. |
| 1393 | */ |
| 1394 | cstr = numeric_normalize(scalarVal->val.numeric); |
| 1395 | item = make_text_key(JGINFLAG_NUM, cstr, strlen(cstr)); |
| 1396 | pfree(cstr); |
| 1397 | break; |
| 1398 | case jbvString: |
| 1399 | item = make_text_key(is_key ? JGINFLAG_KEY : JGINFLAG_STR, |
| 1400 | scalarVal->val.string.val, |
| 1401 | scalarVal->val.string.len); |
| 1402 | break; |
| 1403 | default: |
| 1404 | elog(ERROR, "unrecognized jsonb scalar type: %d" , scalarVal->type); |
| 1405 | item = 0; /* keep compiler quiet */ |
| 1406 | break; |
| 1407 | } |
| 1408 | |
| 1409 | return item; |
| 1410 | } |
| 1411 | |