| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * spgtextproc.c |
| 4 | * implementation of radix tree (compressed trie) over text |
| 5 | * |
| 6 | * In a text_ops SPGiST index, inner tuples can have a prefix which is the |
| 7 | * common prefix of all strings indexed under that tuple. The node labels |
| 8 | * represent the next byte of the string(s) after the prefix. Assuming we |
| 9 | * always use the longest possible prefix, we will get more than one node |
| 10 | * label unless the prefix length is restricted by SPGIST_MAX_PREFIX_LENGTH. |
| 11 | * |
| 12 | * To reconstruct the indexed string for any index entry, concatenate the |
| 13 | * inner-tuple prefixes and node labels starting at the root and working |
| 14 | * down to the leaf entry, then append the datum in the leaf entry. |
| 15 | * (While descending the tree, "level" is the number of bytes reconstructed |
| 16 | * so far.) |
| 17 | * |
| 18 | * However, there are two special cases for node labels: -1 indicates that |
| 19 | * there are no more bytes after the prefix-so-far, and -2 indicates that we |
| 20 | * had to split an existing allTheSame tuple (in such a case we have to create |
| 21 | * a node label that doesn't correspond to any string byte). In either case, |
| 22 | * the node label does not contribute anything to the reconstructed string. |
| 23 | * |
| 24 | * Previously, we used a node label of zero for both special cases, but |
| 25 | * this was problematic because one can't tell whether a string ending at |
| 26 | * the current level can be pushed down into such a child node. For |
| 27 | * backwards compatibility, we still support such node labels for reading; |
| 28 | * but no new entries will ever be pushed down into a zero-labeled child. |
| 29 | * No new entries ever get pushed into a -2-labeled child, either. |
| 30 | * |
| 31 | * |
| 32 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 33 | * Portions Copyright (c) 1994, Regents of the University of California |
| 34 | * |
| 35 | * IDENTIFICATION |
| 36 | * src/backend/access/spgist/spgtextproc.c |
| 37 | * |
| 38 | *------------------------------------------------------------------------- |
| 39 | */ |
| 40 | #include "postgres.h" |
| 41 | |
| 42 | #include "access/spgist.h" |
| 43 | #include "catalog/pg_type.h" |
| 44 | #include "mb/pg_wchar.h" |
| 45 | #include "utils/builtins.h" |
| 46 | #include "utils/datum.h" |
| 47 | #include "utils/pg_locale.h" |
| 48 | #include "utils/varlena.h" |
| 49 | |
| 50 | |
| 51 | /* |
| 52 | * In the worst case, an inner tuple in a text radix tree could have as many |
| 53 | * as 258 nodes (one for each possible byte value, plus the two special |
| 54 | * cases). Each node can take 16 bytes on MAXALIGN=8 machines. The inner |
| 55 | * tuple must fit on an index page of size BLCKSZ. Rather than assuming we |
| 56 | * know the exact amount of overhead imposed by page headers, tuple headers, |
| 57 | * etc, we leave 100 bytes for that (the actual overhead should be no more |
| 58 | * than 56 bytes at this writing, so there is slop in this number). |
| 59 | * So we can safely create prefixes up to BLCKSZ - 258 * 16 - 100 bytes long. |
| 60 | * Unfortunately, because 258 * 16 is over 4K, there is no safe prefix length |
| 61 | * when BLCKSZ is less than 8K; it is always possible to get "SPGiST inner |
| 62 | * tuple size exceeds maximum" if there are too many distinct next-byte values |
| 63 | * at a given place in the tree. Since use of nonstandard block sizes appears |
| 64 | * to be negligible in the field, we just live with that fact for now, |
| 65 | * choosing a max prefix size of 32 bytes when BLCKSZ is configured smaller |
| 66 | * than default. |
| 67 | */ |
| 68 | #define SPGIST_MAX_PREFIX_LENGTH Max((int) (BLCKSZ - 258 * 16 - 100), 32) |
| 69 | |
| 70 | /* |
| 71 | * Strategy for collation aware operator on text is equal to btree strategy |
| 72 | * plus value of 10. |
| 73 | * |
| 74 | * Current collation aware strategies and their corresponding btree strategies: |
| 75 | * 11 BTLessStrategyNumber |
| 76 | * 12 BTLessEqualStrategyNumber |
| 77 | * 14 BTGreaterEqualStrategyNumber |
| 78 | * 15 BTGreaterStrategyNumber |
| 79 | */ |
| 80 | #define SPG_STRATEGY_ADDITION (10) |
| 81 | #define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \ |
| 82 | && (s) != RTPrefixStrategyNumber) |
| 83 | |
| 84 | /* Struct for sorting values in picksplit */ |
| 85 | typedef struct spgNodePtr |
| 86 | { |
| 87 | Datum d; |
| 88 | int i; |
| 89 | int16 c; |
| 90 | } spgNodePtr; |
| 91 | |
| 92 | |
| 93 | Datum |
| 94 | spg_text_config(PG_FUNCTION_ARGS) |
| 95 | { |
| 96 | /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */ |
| 97 | spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1); |
| 98 | |
| 99 | cfg->prefixType = TEXTOID; |
| 100 | cfg->labelType = INT2OID; |
| 101 | cfg->canReturnData = true; |
| 102 | cfg->longValuesOK = true; /* suffixing will shorten long values */ |
| 103 | PG_RETURN_VOID(); |
| 104 | } |
| 105 | |
| 106 | /* |
| 107 | * Form a text datum from the given not-necessarily-null-terminated string, |
| 108 | * using short varlena header format if possible |
| 109 | */ |
| 110 | static Datum |
| 111 | formTextDatum(const char *data, int datalen) |
| 112 | { |
| 113 | char *p; |
| 114 | |
| 115 | p = (char *) palloc(datalen + VARHDRSZ); |
| 116 | |
| 117 | if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX) |
| 118 | { |
| 119 | SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT); |
| 120 | if (datalen) |
| 121 | memcpy(p + VARHDRSZ_SHORT, data, datalen); |
| 122 | } |
| 123 | else |
| 124 | { |
| 125 | SET_VARSIZE(p, datalen + VARHDRSZ); |
| 126 | memcpy(p + VARHDRSZ, data, datalen); |
| 127 | } |
| 128 | |
| 129 | return PointerGetDatum(p); |
| 130 | } |
| 131 | |
| 132 | /* |
| 133 | * Find the length of the common prefix of a and b |
| 134 | */ |
| 135 | static int |
| 136 | commonPrefix(const char *a, const char *b, int lena, int lenb) |
| 137 | { |
| 138 | int i = 0; |
| 139 | |
| 140 | while (i < lena && i < lenb && *a == *b) |
| 141 | { |
| 142 | a++; |
| 143 | b++; |
| 144 | i++; |
| 145 | } |
| 146 | |
| 147 | return i; |
| 148 | } |
| 149 | |
| 150 | /* |
| 151 | * Binary search an array of int16 datums for a match to c |
| 152 | * |
| 153 | * On success, *i gets the match location; on failure, it gets where to insert |
| 154 | */ |
| 155 | static bool |
| 156 | searchChar(Datum *nodeLabels, int nNodes, int16 c, int *i) |
| 157 | { |
| 158 | int StopLow = 0, |
| 159 | StopHigh = nNodes; |
| 160 | |
| 161 | while (StopLow < StopHigh) |
| 162 | { |
| 163 | int StopMiddle = (StopLow + StopHigh) >> 1; |
| 164 | int16 middle = DatumGetInt16(nodeLabels[StopMiddle]); |
| 165 | |
| 166 | if (c < middle) |
| 167 | StopHigh = StopMiddle; |
| 168 | else if (c > middle) |
| 169 | StopLow = StopMiddle + 1; |
| 170 | else |
| 171 | { |
| 172 | *i = StopMiddle; |
| 173 | return true; |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | *i = StopHigh; |
| 178 | return false; |
| 179 | } |
| 180 | |
| 181 | Datum |
| 182 | spg_text_choose(PG_FUNCTION_ARGS) |
| 183 | { |
| 184 | spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0); |
| 185 | spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1); |
| 186 | text *inText = DatumGetTextPP(in->datum); |
| 187 | char *inStr = VARDATA_ANY(inText); |
| 188 | int inSize = VARSIZE_ANY_EXHDR(inText); |
| 189 | char *prefixStr = NULL; |
| 190 | int prefixSize = 0; |
| 191 | int commonLen = 0; |
| 192 | int16 nodeChar = 0; |
| 193 | int i = 0; |
| 194 | |
| 195 | /* Check for prefix match, set nodeChar to first byte after prefix */ |
| 196 | if (in->hasPrefix) |
| 197 | { |
| 198 | text *prefixText = DatumGetTextPP(in->prefixDatum); |
| 199 | |
| 200 | prefixStr = VARDATA_ANY(prefixText); |
| 201 | prefixSize = VARSIZE_ANY_EXHDR(prefixText); |
| 202 | |
| 203 | commonLen = commonPrefix(inStr + in->level, |
| 204 | prefixStr, |
| 205 | inSize - in->level, |
| 206 | prefixSize); |
| 207 | |
| 208 | if (commonLen == prefixSize) |
| 209 | { |
| 210 | if (inSize - in->level > commonLen) |
| 211 | nodeChar = *(unsigned char *) (inStr + in->level + commonLen); |
| 212 | else |
| 213 | nodeChar = -1; |
| 214 | } |
| 215 | else |
| 216 | { |
| 217 | /* Must split tuple because incoming value doesn't match prefix */ |
| 218 | out->resultType = spgSplitTuple; |
| 219 | |
| 220 | if (commonLen == 0) |
| 221 | { |
| 222 | out->result.splitTuple.prefixHasPrefix = false; |
| 223 | } |
| 224 | else |
| 225 | { |
| 226 | out->result.splitTuple.prefixHasPrefix = true; |
| 227 | out->result.splitTuple.prefixPrefixDatum = |
| 228 | formTextDatum(prefixStr, commonLen); |
| 229 | } |
| 230 | out->result.splitTuple.prefixNNodes = 1; |
| 231 | out->result.splitTuple.prefixNodeLabels = |
| 232 | (Datum *) palloc(sizeof(Datum)); |
| 233 | out->result.splitTuple.prefixNodeLabels[0] = |
| 234 | Int16GetDatum(*(unsigned char *) (prefixStr + commonLen)); |
| 235 | |
| 236 | out->result.splitTuple.childNodeN = 0; |
| 237 | |
| 238 | if (prefixSize - commonLen == 1) |
| 239 | { |
| 240 | out->result.splitTuple.postfixHasPrefix = false; |
| 241 | } |
| 242 | else |
| 243 | { |
| 244 | out->result.splitTuple.postfixHasPrefix = true; |
| 245 | out->result.splitTuple.postfixPrefixDatum = |
| 246 | formTextDatum(prefixStr + commonLen + 1, |
| 247 | prefixSize - commonLen - 1); |
| 248 | } |
| 249 | |
| 250 | PG_RETURN_VOID(); |
| 251 | } |
| 252 | } |
| 253 | else if (inSize > in->level) |
| 254 | { |
| 255 | nodeChar = *(unsigned char *) (inStr + in->level); |
| 256 | } |
| 257 | else |
| 258 | { |
| 259 | nodeChar = -1; |
| 260 | } |
| 261 | |
| 262 | /* Look up nodeChar in the node label array */ |
| 263 | if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i)) |
| 264 | { |
| 265 | /* |
| 266 | * Descend to existing node. (If in->allTheSame, the core code will |
| 267 | * ignore our nodeN specification here, but that's OK. We still have |
| 268 | * to provide the correct levelAdd and restDatum values, and those are |
| 269 | * the same regardless of which node gets chosen by core.) |
| 270 | */ |
| 271 | int levelAdd; |
| 272 | |
| 273 | out->resultType = spgMatchNode; |
| 274 | out->result.matchNode.nodeN = i; |
| 275 | levelAdd = commonLen; |
| 276 | if (nodeChar >= 0) |
| 277 | levelAdd++; |
| 278 | out->result.matchNode.levelAdd = levelAdd; |
| 279 | if (inSize - in->level - levelAdd > 0) |
| 280 | out->result.matchNode.restDatum = |
| 281 | formTextDatum(inStr + in->level + levelAdd, |
| 282 | inSize - in->level - levelAdd); |
| 283 | else |
| 284 | out->result.matchNode.restDatum = |
| 285 | formTextDatum(NULL, 0); |
| 286 | } |
| 287 | else if (in->allTheSame) |
| 288 | { |
| 289 | /* |
| 290 | * Can't use AddNode action, so split the tuple. The upper tuple has |
| 291 | * the same prefix as before and uses a dummy node label -2 for the |
| 292 | * lower tuple. The lower tuple has no prefix and the same node |
| 293 | * labels as the original tuple. |
| 294 | * |
| 295 | * Note: it might seem tempting to shorten the upper tuple's prefix, |
| 296 | * if it has one, then use its last byte as label for the lower tuple. |
| 297 | * But that doesn't win since we know the incoming value matches the |
| 298 | * whole prefix: we'd just end up splitting the lower tuple again. |
| 299 | */ |
| 300 | out->resultType = spgSplitTuple; |
| 301 | out->result.splitTuple.prefixHasPrefix = in->hasPrefix; |
| 302 | out->result.splitTuple.prefixPrefixDatum = in->prefixDatum; |
| 303 | out->result.splitTuple.prefixNNodes = 1; |
| 304 | out->result.splitTuple.prefixNodeLabels = (Datum *) palloc(sizeof(Datum)); |
| 305 | out->result.splitTuple.prefixNodeLabels[0] = Int16GetDatum(-2); |
| 306 | out->result.splitTuple.childNodeN = 0; |
| 307 | out->result.splitTuple.postfixHasPrefix = false; |
| 308 | } |
| 309 | else |
| 310 | { |
| 311 | /* Add a node for the not-previously-seen nodeChar value */ |
| 312 | out->resultType = spgAddNode; |
| 313 | out->result.addNode.nodeLabel = Int16GetDatum(nodeChar); |
| 314 | out->result.addNode.nodeN = i; |
| 315 | } |
| 316 | |
| 317 | PG_RETURN_VOID(); |
| 318 | } |
| 319 | |
| 320 | /* qsort comparator to sort spgNodePtr structs by "c" */ |
| 321 | static int |
| 322 | cmpNodePtr(const void *a, const void *b) |
| 323 | { |
| 324 | const spgNodePtr *aa = (const spgNodePtr *) a; |
| 325 | const spgNodePtr *bb = (const spgNodePtr *) b; |
| 326 | |
| 327 | return aa->c - bb->c; |
| 328 | } |
| 329 | |
| 330 | Datum |
| 331 | spg_text_picksplit(PG_FUNCTION_ARGS) |
| 332 | { |
| 333 | spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0); |
| 334 | spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1); |
| 335 | text *text0 = DatumGetTextPP(in->datums[0]); |
| 336 | int i, |
| 337 | commonLen; |
| 338 | spgNodePtr *nodes; |
| 339 | |
| 340 | /* Identify longest common prefix, if any */ |
| 341 | commonLen = VARSIZE_ANY_EXHDR(text0); |
| 342 | for (i = 1; i < in->nTuples && commonLen > 0; i++) |
| 343 | { |
| 344 | text *texti = DatumGetTextPP(in->datums[i]); |
| 345 | int tmp = commonPrefix(VARDATA_ANY(text0), |
| 346 | VARDATA_ANY(texti), |
| 347 | VARSIZE_ANY_EXHDR(text0), |
| 348 | VARSIZE_ANY_EXHDR(texti)); |
| 349 | |
| 350 | if (tmp < commonLen) |
| 351 | commonLen = tmp; |
| 352 | } |
| 353 | |
| 354 | /* |
| 355 | * Limit the prefix length, if necessary, to ensure that the resulting |
| 356 | * inner tuple will fit on a page. |
| 357 | */ |
| 358 | commonLen = Min(commonLen, SPGIST_MAX_PREFIX_LENGTH); |
| 359 | |
| 360 | /* Set node prefix to be that string, if it's not empty */ |
| 361 | if (commonLen == 0) |
| 362 | { |
| 363 | out->hasPrefix = false; |
| 364 | } |
| 365 | else |
| 366 | { |
| 367 | out->hasPrefix = true; |
| 368 | out->prefixDatum = formTextDatum(VARDATA_ANY(text0), commonLen); |
| 369 | } |
| 370 | |
| 371 | /* Extract the node label (first non-common byte) from each value */ |
| 372 | nodes = (spgNodePtr *) palloc(sizeof(spgNodePtr) * in->nTuples); |
| 373 | |
| 374 | for (i = 0; i < in->nTuples; i++) |
| 375 | { |
| 376 | text *texti = DatumGetTextPP(in->datums[i]); |
| 377 | |
| 378 | if (commonLen < VARSIZE_ANY_EXHDR(texti)) |
| 379 | nodes[i].c = *(unsigned char *) (VARDATA_ANY(texti) + commonLen); |
| 380 | else |
| 381 | nodes[i].c = -1; /* use -1 if string is all common */ |
| 382 | nodes[i].i = i; |
| 383 | nodes[i].d = in->datums[i]; |
| 384 | } |
| 385 | |
| 386 | /* |
| 387 | * Sort by label values so that we can group the values into nodes. This |
| 388 | * also ensures that the nodes are ordered by label value, allowing the |
| 389 | * use of binary search in searchChar. |
| 390 | */ |
| 391 | qsort(nodes, in->nTuples, sizeof(*nodes), cmpNodePtr); |
| 392 | |
| 393 | /* And emit results */ |
| 394 | out->nNodes = 0; |
| 395 | out->nodeLabels = (Datum *) palloc(sizeof(Datum) * in->nTuples); |
| 396 | out->mapTuplesToNodes = (int *) palloc(sizeof(int) * in->nTuples); |
| 397 | out->leafTupleDatums = (Datum *) palloc(sizeof(Datum) * in->nTuples); |
| 398 | |
| 399 | for (i = 0; i < in->nTuples; i++) |
| 400 | { |
| 401 | text *texti = DatumGetTextPP(nodes[i].d); |
| 402 | Datum leafD; |
| 403 | |
| 404 | if (i == 0 || nodes[i].c != nodes[i - 1].c) |
| 405 | { |
| 406 | out->nodeLabels[out->nNodes] = Int16GetDatum(nodes[i].c); |
| 407 | out->nNodes++; |
| 408 | } |
| 409 | |
| 410 | if (commonLen < VARSIZE_ANY_EXHDR(texti)) |
| 411 | leafD = formTextDatum(VARDATA_ANY(texti) + commonLen + 1, |
| 412 | VARSIZE_ANY_EXHDR(texti) - commonLen - 1); |
| 413 | else |
| 414 | leafD = formTextDatum(NULL, 0); |
| 415 | |
| 416 | out->leafTupleDatums[nodes[i].i] = leafD; |
| 417 | out->mapTuplesToNodes[nodes[i].i] = out->nNodes - 1; |
| 418 | } |
| 419 | |
| 420 | PG_RETURN_VOID(); |
| 421 | } |
| 422 | |
| 423 | Datum |
| 424 | spg_text_inner_consistent(PG_FUNCTION_ARGS) |
| 425 | { |
| 426 | spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0); |
| 427 | spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1); |
| 428 | bool collate_is_c = lc_collate_is_c(PG_GET_COLLATION()); |
| 429 | text *reconstructedValue; |
| 430 | text *reconstrText; |
| 431 | int maxReconstrLen; |
| 432 | text *prefixText = NULL; |
| 433 | int prefixSize = 0; |
| 434 | int i; |
| 435 | |
| 436 | /* |
| 437 | * Reconstruct values represented at this tuple, including parent data, |
| 438 | * prefix of this tuple if any, and the node label if it's non-dummy. |
| 439 | * in->level should be the length of the previously reconstructed value, |
| 440 | * and the number of bytes added here is prefixSize or prefixSize + 1. |
| 441 | * |
| 442 | * Note: we assume that in->reconstructedValue isn't toasted and doesn't |
| 443 | * have a short varlena header. This is okay because it must have been |
| 444 | * created by a previous invocation of this routine, and we always emit |
| 445 | * long-format reconstructed values. |
| 446 | */ |
| 447 | reconstructedValue = (text *) DatumGetPointer(in->reconstructedValue); |
| 448 | Assert(reconstructedValue == NULL ? in->level == 0 : |
| 449 | VARSIZE_ANY_EXHDR(reconstructedValue) == in->level); |
| 450 | |
| 451 | maxReconstrLen = in->level + 1; |
| 452 | if (in->hasPrefix) |
| 453 | { |
| 454 | prefixText = DatumGetTextPP(in->prefixDatum); |
| 455 | prefixSize = VARSIZE_ANY_EXHDR(prefixText); |
| 456 | maxReconstrLen += prefixSize; |
| 457 | } |
| 458 | |
| 459 | reconstrText = palloc(VARHDRSZ + maxReconstrLen); |
| 460 | SET_VARSIZE(reconstrText, VARHDRSZ + maxReconstrLen); |
| 461 | |
| 462 | if (in->level) |
| 463 | memcpy(VARDATA(reconstrText), |
| 464 | VARDATA(reconstructedValue), |
| 465 | in->level); |
| 466 | if (prefixSize) |
| 467 | memcpy(((char *) VARDATA(reconstrText)) + in->level, |
| 468 | VARDATA_ANY(prefixText), |
| 469 | prefixSize); |
| 470 | /* last byte of reconstrText will be filled in below */ |
| 471 | |
| 472 | /* |
| 473 | * Scan the child nodes. For each one, complete the reconstructed value |
| 474 | * and see if it's consistent with the query. If so, emit an entry into |
| 475 | * the output arrays. |
| 476 | */ |
| 477 | out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes); |
| 478 | out->levelAdds = (int *) palloc(sizeof(int) * in->nNodes); |
| 479 | out->reconstructedValues = (Datum *) palloc(sizeof(Datum) * in->nNodes); |
| 480 | out->nNodes = 0; |
| 481 | |
| 482 | for (i = 0; i < in->nNodes; i++) |
| 483 | { |
| 484 | int16 nodeChar = DatumGetInt16(in->nodeLabels[i]); |
| 485 | int thisLen; |
| 486 | bool res = true; |
| 487 | int j; |
| 488 | |
| 489 | /* If nodeChar is a dummy value, don't include it in data */ |
| 490 | if (nodeChar <= 0) |
| 491 | thisLen = maxReconstrLen - 1; |
| 492 | else |
| 493 | { |
| 494 | ((unsigned char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar; |
| 495 | thisLen = maxReconstrLen; |
| 496 | } |
| 497 | |
| 498 | for (j = 0; j < in->nkeys; j++) |
| 499 | { |
| 500 | StrategyNumber strategy = in->scankeys[j].sk_strategy; |
| 501 | text *inText; |
| 502 | int inSize; |
| 503 | int r; |
| 504 | |
| 505 | /* |
| 506 | * If it's a collation-aware operator, but the collation is C, we |
| 507 | * can treat it as non-collation-aware. With non-C collation we |
| 508 | * need to traverse whole tree :-( so there's no point in making |
| 509 | * any check here. (Note also that our reconstructed value may |
| 510 | * well end with a partial multibyte character, so that applying |
| 511 | * any encoding-sensitive test to it would be risky anyhow.) |
| 512 | */ |
| 513 | if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy)) |
| 514 | { |
| 515 | if (collate_is_c) |
| 516 | strategy -= SPG_STRATEGY_ADDITION; |
| 517 | else |
| 518 | continue; |
| 519 | } |
| 520 | |
| 521 | inText = DatumGetTextPP(in->scankeys[j].sk_argument); |
| 522 | inSize = VARSIZE_ANY_EXHDR(inText); |
| 523 | |
| 524 | r = memcmp(VARDATA(reconstrText), VARDATA_ANY(inText), |
| 525 | Min(inSize, thisLen)); |
| 526 | |
| 527 | switch (strategy) |
| 528 | { |
| 529 | case BTLessStrategyNumber: |
| 530 | case BTLessEqualStrategyNumber: |
| 531 | if (r > 0) |
| 532 | res = false; |
| 533 | break; |
| 534 | case BTEqualStrategyNumber: |
| 535 | if (r != 0 || inSize < thisLen) |
| 536 | res = false; |
| 537 | break; |
| 538 | case BTGreaterEqualStrategyNumber: |
| 539 | case BTGreaterStrategyNumber: |
| 540 | if (r < 0) |
| 541 | res = false; |
| 542 | break; |
| 543 | case RTPrefixStrategyNumber: |
| 544 | if (r != 0) |
| 545 | res = false; |
| 546 | break; |
| 547 | default: |
| 548 | elog(ERROR, "unrecognized strategy number: %d" , |
| 549 | in->scankeys[j].sk_strategy); |
| 550 | break; |
| 551 | } |
| 552 | |
| 553 | if (!res) |
| 554 | break; /* no need to consider remaining conditions */ |
| 555 | } |
| 556 | |
| 557 | if (res) |
| 558 | { |
| 559 | out->nodeNumbers[out->nNodes] = i; |
| 560 | out->levelAdds[out->nNodes] = thisLen - in->level; |
| 561 | SET_VARSIZE(reconstrText, VARHDRSZ + thisLen); |
| 562 | out->reconstructedValues[out->nNodes] = |
| 563 | datumCopy(PointerGetDatum(reconstrText), false, -1); |
| 564 | out->nNodes++; |
| 565 | } |
| 566 | } |
| 567 | |
| 568 | PG_RETURN_VOID(); |
| 569 | } |
| 570 | |
| 571 | Datum |
| 572 | spg_text_leaf_consistent(PG_FUNCTION_ARGS) |
| 573 | { |
| 574 | spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0); |
| 575 | spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1); |
| 576 | int level = in->level; |
| 577 | text *leafValue, |
| 578 | *reconstrValue = NULL; |
| 579 | char *fullValue; |
| 580 | int fullLen; |
| 581 | bool res; |
| 582 | int j; |
| 583 | |
| 584 | /* all tests are exact */ |
| 585 | out->recheck = false; |
| 586 | |
| 587 | leafValue = DatumGetTextPP(in->leafDatum); |
| 588 | |
| 589 | /* As above, in->reconstructedValue isn't toasted or short. */ |
| 590 | if (DatumGetPointer(in->reconstructedValue)) |
| 591 | reconstrValue = (text *) DatumGetPointer(in->reconstructedValue); |
| 592 | |
| 593 | Assert(reconstrValue == NULL ? level == 0 : |
| 594 | VARSIZE_ANY_EXHDR(reconstrValue) == level); |
| 595 | |
| 596 | /* Reconstruct the full string represented by this leaf tuple */ |
| 597 | fullLen = level + VARSIZE_ANY_EXHDR(leafValue); |
| 598 | if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0) |
| 599 | { |
| 600 | fullValue = VARDATA(reconstrValue); |
| 601 | out->leafValue = PointerGetDatum(reconstrValue); |
| 602 | } |
| 603 | else |
| 604 | { |
| 605 | text *fullText = palloc(VARHDRSZ + fullLen); |
| 606 | |
| 607 | SET_VARSIZE(fullText, VARHDRSZ + fullLen); |
| 608 | fullValue = VARDATA(fullText); |
| 609 | if (level) |
| 610 | memcpy(fullValue, VARDATA(reconstrValue), level); |
| 611 | if (VARSIZE_ANY_EXHDR(leafValue) > 0) |
| 612 | memcpy(fullValue + level, VARDATA_ANY(leafValue), |
| 613 | VARSIZE_ANY_EXHDR(leafValue)); |
| 614 | out->leafValue = PointerGetDatum(fullText); |
| 615 | } |
| 616 | |
| 617 | /* Perform the required comparison(s) */ |
| 618 | res = true; |
| 619 | for (j = 0; j < in->nkeys; j++) |
| 620 | { |
| 621 | StrategyNumber strategy = in->scankeys[j].sk_strategy; |
| 622 | text *query = DatumGetTextPP(in->scankeys[j].sk_argument); |
| 623 | int queryLen = VARSIZE_ANY_EXHDR(query); |
| 624 | int r; |
| 625 | |
| 626 | if (strategy == RTPrefixStrategyNumber) |
| 627 | { |
| 628 | /* |
| 629 | * if level >= length of query then reconstrValue must begin with |
| 630 | * query (prefix) string, so we don't need to check it again. |
| 631 | */ |
| 632 | res = (level >= queryLen) || |
| 633 | DatumGetBool(DirectFunctionCall2Coll(text_starts_with, |
| 634 | PG_GET_COLLATION(), |
| 635 | out->leafValue, |
| 636 | PointerGetDatum(query))); |
| 637 | |
| 638 | if (!res) /* no need to consider remaining conditions */ |
| 639 | break; |
| 640 | |
| 641 | continue; |
| 642 | } |
| 643 | |
| 644 | if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy)) |
| 645 | { |
| 646 | /* Collation-aware comparison */ |
| 647 | strategy -= SPG_STRATEGY_ADDITION; |
| 648 | |
| 649 | /* If asserts enabled, verify encoding of reconstructed string */ |
| 650 | Assert(pg_verifymbstr(fullValue, fullLen, false)); |
| 651 | |
| 652 | r = varstr_cmp(fullValue, fullLen, |
| 653 | VARDATA_ANY(query), queryLen, |
| 654 | PG_GET_COLLATION()); |
| 655 | } |
| 656 | else |
| 657 | { |
| 658 | /* Non-collation-aware comparison */ |
| 659 | r = memcmp(fullValue, VARDATA_ANY(query), Min(queryLen, fullLen)); |
| 660 | |
| 661 | if (r == 0) |
| 662 | { |
| 663 | if (queryLen > fullLen) |
| 664 | r = -1; |
| 665 | else if (queryLen < fullLen) |
| 666 | r = 1; |
| 667 | } |
| 668 | } |
| 669 | |
| 670 | switch (strategy) |
| 671 | { |
| 672 | case BTLessStrategyNumber: |
| 673 | res = (r < 0); |
| 674 | break; |
| 675 | case BTLessEqualStrategyNumber: |
| 676 | res = (r <= 0); |
| 677 | break; |
| 678 | case BTEqualStrategyNumber: |
| 679 | res = (r == 0); |
| 680 | break; |
| 681 | case BTGreaterEqualStrategyNumber: |
| 682 | res = (r >= 0); |
| 683 | break; |
| 684 | case BTGreaterStrategyNumber: |
| 685 | res = (r > 0); |
| 686 | break; |
| 687 | default: |
| 688 | elog(ERROR, "unrecognized strategy number: %d" , |
| 689 | in->scankeys[j].sk_strategy); |
| 690 | res = false; |
| 691 | break; |
| 692 | } |
| 693 | |
| 694 | if (!res) |
| 695 | break; /* no need to consider remaining conditions */ |
| 696 | } |
| 697 | |
| 698 | PG_RETURN_BOOL(res); |
| 699 | } |
| 700 | |