1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * spgtextproc.c |
4 | * implementation of radix tree (compressed trie) over text |
5 | * |
6 | * In a text_ops SPGiST index, inner tuples can have a prefix which is the |
7 | * common prefix of all strings indexed under that tuple. The node labels |
8 | * represent the next byte of the string(s) after the prefix. Assuming we |
9 | * always use the longest possible prefix, we will get more than one node |
10 | * label unless the prefix length is restricted by SPGIST_MAX_PREFIX_LENGTH. |
11 | * |
12 | * To reconstruct the indexed string for any index entry, concatenate the |
13 | * inner-tuple prefixes and node labels starting at the root and working |
14 | * down to the leaf entry, then append the datum in the leaf entry. |
15 | * (While descending the tree, "level" is the number of bytes reconstructed |
16 | * so far.) |
17 | * |
18 | * However, there are two special cases for node labels: -1 indicates that |
19 | * there are no more bytes after the prefix-so-far, and -2 indicates that we |
20 | * had to split an existing allTheSame tuple (in such a case we have to create |
21 | * a node label that doesn't correspond to any string byte). In either case, |
22 | * the node label does not contribute anything to the reconstructed string. |
23 | * |
24 | * Previously, we used a node label of zero for both special cases, but |
25 | * this was problematic because one can't tell whether a string ending at |
26 | * the current level can be pushed down into such a child node. For |
27 | * backwards compatibility, we still support such node labels for reading; |
28 | * but no new entries will ever be pushed down into a zero-labeled child. |
29 | * No new entries ever get pushed into a -2-labeled child, either. |
30 | * |
31 | * |
32 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
33 | * Portions Copyright (c) 1994, Regents of the University of California |
34 | * |
35 | * IDENTIFICATION |
36 | * src/backend/access/spgist/spgtextproc.c |
37 | * |
38 | *------------------------------------------------------------------------- |
39 | */ |
40 | #include "postgres.h" |
41 | |
42 | #include "access/spgist.h" |
43 | #include "catalog/pg_type.h" |
44 | #include "mb/pg_wchar.h" |
45 | #include "utils/builtins.h" |
46 | #include "utils/datum.h" |
47 | #include "utils/pg_locale.h" |
48 | #include "utils/varlena.h" |
49 | |
50 | |
51 | /* |
52 | * In the worst case, an inner tuple in a text radix tree could have as many |
53 | * as 258 nodes (one for each possible byte value, plus the two special |
54 | * cases). Each node can take 16 bytes on MAXALIGN=8 machines. The inner |
55 | * tuple must fit on an index page of size BLCKSZ. Rather than assuming we |
56 | * know the exact amount of overhead imposed by page headers, tuple headers, |
57 | * etc, we leave 100 bytes for that (the actual overhead should be no more |
58 | * than 56 bytes at this writing, so there is slop in this number). |
59 | * So we can safely create prefixes up to BLCKSZ - 258 * 16 - 100 bytes long. |
60 | * Unfortunately, because 258 * 16 is over 4K, there is no safe prefix length |
61 | * when BLCKSZ is less than 8K; it is always possible to get "SPGiST inner |
62 | * tuple size exceeds maximum" if there are too many distinct next-byte values |
63 | * at a given place in the tree. Since use of nonstandard block sizes appears |
64 | * to be negligible in the field, we just live with that fact for now, |
65 | * choosing a max prefix size of 32 bytes when BLCKSZ is configured smaller |
66 | * than default. |
67 | */ |
68 | #define SPGIST_MAX_PREFIX_LENGTH Max((int) (BLCKSZ - 258 * 16 - 100), 32) |
69 | |
70 | /* |
71 | * Strategy for collation aware operator on text is equal to btree strategy |
72 | * plus value of 10. |
73 | * |
74 | * Current collation aware strategies and their corresponding btree strategies: |
75 | * 11 BTLessStrategyNumber |
76 | * 12 BTLessEqualStrategyNumber |
77 | * 14 BTGreaterEqualStrategyNumber |
78 | * 15 BTGreaterStrategyNumber |
79 | */ |
80 | #define SPG_STRATEGY_ADDITION (10) |
81 | #define SPG_IS_COLLATION_AWARE_STRATEGY(s) ((s) > SPG_STRATEGY_ADDITION \ |
82 | && (s) != RTPrefixStrategyNumber) |
83 | |
84 | /* Struct for sorting values in picksplit */ |
85 | typedef struct spgNodePtr |
86 | { |
87 | Datum d; |
88 | int i; |
89 | int16 c; |
90 | } spgNodePtr; |
91 | |
92 | |
93 | Datum |
94 | spg_text_config(PG_FUNCTION_ARGS) |
95 | { |
96 | /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */ |
97 | spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1); |
98 | |
99 | cfg->prefixType = TEXTOID; |
100 | cfg->labelType = INT2OID; |
101 | cfg->canReturnData = true; |
102 | cfg->longValuesOK = true; /* suffixing will shorten long values */ |
103 | PG_RETURN_VOID(); |
104 | } |
105 | |
106 | /* |
107 | * Form a text datum from the given not-necessarily-null-terminated string, |
108 | * using short varlena header format if possible |
109 | */ |
110 | static Datum |
111 | formTextDatum(const char *data, int datalen) |
112 | { |
113 | char *p; |
114 | |
115 | p = (char *) palloc(datalen + VARHDRSZ); |
116 | |
117 | if (datalen + VARHDRSZ_SHORT <= VARATT_SHORT_MAX) |
118 | { |
119 | SET_VARSIZE_SHORT(p, datalen + VARHDRSZ_SHORT); |
120 | if (datalen) |
121 | memcpy(p + VARHDRSZ_SHORT, data, datalen); |
122 | } |
123 | else |
124 | { |
125 | SET_VARSIZE(p, datalen + VARHDRSZ); |
126 | memcpy(p + VARHDRSZ, data, datalen); |
127 | } |
128 | |
129 | return PointerGetDatum(p); |
130 | } |
131 | |
132 | /* |
133 | * Find the length of the common prefix of a and b |
134 | */ |
135 | static int |
136 | commonPrefix(const char *a, const char *b, int lena, int lenb) |
137 | { |
138 | int i = 0; |
139 | |
140 | while (i < lena && i < lenb && *a == *b) |
141 | { |
142 | a++; |
143 | b++; |
144 | i++; |
145 | } |
146 | |
147 | return i; |
148 | } |
149 | |
150 | /* |
151 | * Binary search an array of int16 datums for a match to c |
152 | * |
153 | * On success, *i gets the match location; on failure, it gets where to insert |
154 | */ |
155 | static bool |
156 | searchChar(Datum *nodeLabels, int nNodes, int16 c, int *i) |
157 | { |
158 | int StopLow = 0, |
159 | StopHigh = nNodes; |
160 | |
161 | while (StopLow < StopHigh) |
162 | { |
163 | int StopMiddle = (StopLow + StopHigh) >> 1; |
164 | int16 middle = DatumGetInt16(nodeLabels[StopMiddle]); |
165 | |
166 | if (c < middle) |
167 | StopHigh = StopMiddle; |
168 | else if (c > middle) |
169 | StopLow = StopMiddle + 1; |
170 | else |
171 | { |
172 | *i = StopMiddle; |
173 | return true; |
174 | } |
175 | } |
176 | |
177 | *i = StopHigh; |
178 | return false; |
179 | } |
180 | |
181 | Datum |
182 | spg_text_choose(PG_FUNCTION_ARGS) |
183 | { |
184 | spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0); |
185 | spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1); |
186 | text *inText = DatumGetTextPP(in->datum); |
187 | char *inStr = VARDATA_ANY(inText); |
188 | int inSize = VARSIZE_ANY_EXHDR(inText); |
189 | char *prefixStr = NULL; |
190 | int prefixSize = 0; |
191 | int commonLen = 0; |
192 | int16 nodeChar = 0; |
193 | int i = 0; |
194 | |
195 | /* Check for prefix match, set nodeChar to first byte after prefix */ |
196 | if (in->hasPrefix) |
197 | { |
198 | text *prefixText = DatumGetTextPP(in->prefixDatum); |
199 | |
200 | prefixStr = VARDATA_ANY(prefixText); |
201 | prefixSize = VARSIZE_ANY_EXHDR(prefixText); |
202 | |
203 | commonLen = commonPrefix(inStr + in->level, |
204 | prefixStr, |
205 | inSize - in->level, |
206 | prefixSize); |
207 | |
208 | if (commonLen == prefixSize) |
209 | { |
210 | if (inSize - in->level > commonLen) |
211 | nodeChar = *(unsigned char *) (inStr + in->level + commonLen); |
212 | else |
213 | nodeChar = -1; |
214 | } |
215 | else |
216 | { |
217 | /* Must split tuple because incoming value doesn't match prefix */ |
218 | out->resultType = spgSplitTuple; |
219 | |
220 | if (commonLen == 0) |
221 | { |
222 | out->result.splitTuple.prefixHasPrefix = false; |
223 | } |
224 | else |
225 | { |
226 | out->result.splitTuple.prefixHasPrefix = true; |
227 | out->result.splitTuple.prefixPrefixDatum = |
228 | formTextDatum(prefixStr, commonLen); |
229 | } |
230 | out->result.splitTuple.prefixNNodes = 1; |
231 | out->result.splitTuple.prefixNodeLabels = |
232 | (Datum *) palloc(sizeof(Datum)); |
233 | out->result.splitTuple.prefixNodeLabels[0] = |
234 | Int16GetDatum(*(unsigned char *) (prefixStr + commonLen)); |
235 | |
236 | out->result.splitTuple.childNodeN = 0; |
237 | |
238 | if (prefixSize - commonLen == 1) |
239 | { |
240 | out->result.splitTuple.postfixHasPrefix = false; |
241 | } |
242 | else |
243 | { |
244 | out->result.splitTuple.postfixHasPrefix = true; |
245 | out->result.splitTuple.postfixPrefixDatum = |
246 | formTextDatum(prefixStr + commonLen + 1, |
247 | prefixSize - commonLen - 1); |
248 | } |
249 | |
250 | PG_RETURN_VOID(); |
251 | } |
252 | } |
253 | else if (inSize > in->level) |
254 | { |
255 | nodeChar = *(unsigned char *) (inStr + in->level); |
256 | } |
257 | else |
258 | { |
259 | nodeChar = -1; |
260 | } |
261 | |
262 | /* Look up nodeChar in the node label array */ |
263 | if (searchChar(in->nodeLabels, in->nNodes, nodeChar, &i)) |
264 | { |
265 | /* |
266 | * Descend to existing node. (If in->allTheSame, the core code will |
267 | * ignore our nodeN specification here, but that's OK. We still have |
268 | * to provide the correct levelAdd and restDatum values, and those are |
269 | * the same regardless of which node gets chosen by core.) |
270 | */ |
271 | int levelAdd; |
272 | |
273 | out->resultType = spgMatchNode; |
274 | out->result.matchNode.nodeN = i; |
275 | levelAdd = commonLen; |
276 | if (nodeChar >= 0) |
277 | levelAdd++; |
278 | out->result.matchNode.levelAdd = levelAdd; |
279 | if (inSize - in->level - levelAdd > 0) |
280 | out->result.matchNode.restDatum = |
281 | formTextDatum(inStr + in->level + levelAdd, |
282 | inSize - in->level - levelAdd); |
283 | else |
284 | out->result.matchNode.restDatum = |
285 | formTextDatum(NULL, 0); |
286 | } |
287 | else if (in->allTheSame) |
288 | { |
289 | /* |
290 | * Can't use AddNode action, so split the tuple. The upper tuple has |
291 | * the same prefix as before and uses a dummy node label -2 for the |
292 | * lower tuple. The lower tuple has no prefix and the same node |
293 | * labels as the original tuple. |
294 | * |
295 | * Note: it might seem tempting to shorten the upper tuple's prefix, |
296 | * if it has one, then use its last byte as label for the lower tuple. |
297 | * But that doesn't win since we know the incoming value matches the |
298 | * whole prefix: we'd just end up splitting the lower tuple again. |
299 | */ |
300 | out->resultType = spgSplitTuple; |
301 | out->result.splitTuple.prefixHasPrefix = in->hasPrefix; |
302 | out->result.splitTuple.prefixPrefixDatum = in->prefixDatum; |
303 | out->result.splitTuple.prefixNNodes = 1; |
304 | out->result.splitTuple.prefixNodeLabels = (Datum *) palloc(sizeof(Datum)); |
305 | out->result.splitTuple.prefixNodeLabels[0] = Int16GetDatum(-2); |
306 | out->result.splitTuple.childNodeN = 0; |
307 | out->result.splitTuple.postfixHasPrefix = false; |
308 | } |
309 | else |
310 | { |
311 | /* Add a node for the not-previously-seen nodeChar value */ |
312 | out->resultType = spgAddNode; |
313 | out->result.addNode.nodeLabel = Int16GetDatum(nodeChar); |
314 | out->result.addNode.nodeN = i; |
315 | } |
316 | |
317 | PG_RETURN_VOID(); |
318 | } |
319 | |
320 | /* qsort comparator to sort spgNodePtr structs by "c" */ |
321 | static int |
322 | cmpNodePtr(const void *a, const void *b) |
323 | { |
324 | const spgNodePtr *aa = (const spgNodePtr *) a; |
325 | const spgNodePtr *bb = (const spgNodePtr *) b; |
326 | |
327 | return aa->c - bb->c; |
328 | } |
329 | |
330 | Datum |
331 | spg_text_picksplit(PG_FUNCTION_ARGS) |
332 | { |
333 | spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0); |
334 | spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1); |
335 | text *text0 = DatumGetTextPP(in->datums[0]); |
336 | int i, |
337 | commonLen; |
338 | spgNodePtr *nodes; |
339 | |
340 | /* Identify longest common prefix, if any */ |
341 | commonLen = VARSIZE_ANY_EXHDR(text0); |
342 | for (i = 1; i < in->nTuples && commonLen > 0; i++) |
343 | { |
344 | text *texti = DatumGetTextPP(in->datums[i]); |
345 | int tmp = commonPrefix(VARDATA_ANY(text0), |
346 | VARDATA_ANY(texti), |
347 | VARSIZE_ANY_EXHDR(text0), |
348 | VARSIZE_ANY_EXHDR(texti)); |
349 | |
350 | if (tmp < commonLen) |
351 | commonLen = tmp; |
352 | } |
353 | |
354 | /* |
355 | * Limit the prefix length, if necessary, to ensure that the resulting |
356 | * inner tuple will fit on a page. |
357 | */ |
358 | commonLen = Min(commonLen, SPGIST_MAX_PREFIX_LENGTH); |
359 | |
360 | /* Set node prefix to be that string, if it's not empty */ |
361 | if (commonLen == 0) |
362 | { |
363 | out->hasPrefix = false; |
364 | } |
365 | else |
366 | { |
367 | out->hasPrefix = true; |
368 | out->prefixDatum = formTextDatum(VARDATA_ANY(text0), commonLen); |
369 | } |
370 | |
371 | /* Extract the node label (first non-common byte) from each value */ |
372 | nodes = (spgNodePtr *) palloc(sizeof(spgNodePtr) * in->nTuples); |
373 | |
374 | for (i = 0; i < in->nTuples; i++) |
375 | { |
376 | text *texti = DatumGetTextPP(in->datums[i]); |
377 | |
378 | if (commonLen < VARSIZE_ANY_EXHDR(texti)) |
379 | nodes[i].c = *(unsigned char *) (VARDATA_ANY(texti) + commonLen); |
380 | else |
381 | nodes[i].c = -1; /* use -1 if string is all common */ |
382 | nodes[i].i = i; |
383 | nodes[i].d = in->datums[i]; |
384 | } |
385 | |
386 | /* |
387 | * Sort by label values so that we can group the values into nodes. This |
388 | * also ensures that the nodes are ordered by label value, allowing the |
389 | * use of binary search in searchChar. |
390 | */ |
391 | qsort(nodes, in->nTuples, sizeof(*nodes), cmpNodePtr); |
392 | |
393 | /* And emit results */ |
394 | out->nNodes = 0; |
395 | out->nodeLabels = (Datum *) palloc(sizeof(Datum) * in->nTuples); |
396 | out->mapTuplesToNodes = (int *) palloc(sizeof(int) * in->nTuples); |
397 | out->leafTupleDatums = (Datum *) palloc(sizeof(Datum) * in->nTuples); |
398 | |
399 | for (i = 0; i < in->nTuples; i++) |
400 | { |
401 | text *texti = DatumGetTextPP(nodes[i].d); |
402 | Datum leafD; |
403 | |
404 | if (i == 0 || nodes[i].c != nodes[i - 1].c) |
405 | { |
406 | out->nodeLabels[out->nNodes] = Int16GetDatum(nodes[i].c); |
407 | out->nNodes++; |
408 | } |
409 | |
410 | if (commonLen < VARSIZE_ANY_EXHDR(texti)) |
411 | leafD = formTextDatum(VARDATA_ANY(texti) + commonLen + 1, |
412 | VARSIZE_ANY_EXHDR(texti) - commonLen - 1); |
413 | else |
414 | leafD = formTextDatum(NULL, 0); |
415 | |
416 | out->leafTupleDatums[nodes[i].i] = leafD; |
417 | out->mapTuplesToNodes[nodes[i].i] = out->nNodes - 1; |
418 | } |
419 | |
420 | PG_RETURN_VOID(); |
421 | } |
422 | |
423 | Datum |
424 | spg_text_inner_consistent(PG_FUNCTION_ARGS) |
425 | { |
426 | spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0); |
427 | spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1); |
428 | bool collate_is_c = lc_collate_is_c(PG_GET_COLLATION()); |
429 | text *reconstructedValue; |
430 | text *reconstrText; |
431 | int maxReconstrLen; |
432 | text *prefixText = NULL; |
433 | int prefixSize = 0; |
434 | int i; |
435 | |
436 | /* |
437 | * Reconstruct values represented at this tuple, including parent data, |
438 | * prefix of this tuple if any, and the node label if it's non-dummy. |
439 | * in->level should be the length of the previously reconstructed value, |
440 | * and the number of bytes added here is prefixSize or prefixSize + 1. |
441 | * |
442 | * Note: we assume that in->reconstructedValue isn't toasted and doesn't |
443 | * have a short varlena header. This is okay because it must have been |
444 | * created by a previous invocation of this routine, and we always emit |
445 | * long-format reconstructed values. |
446 | */ |
447 | reconstructedValue = (text *) DatumGetPointer(in->reconstructedValue); |
448 | Assert(reconstructedValue == NULL ? in->level == 0 : |
449 | VARSIZE_ANY_EXHDR(reconstructedValue) == in->level); |
450 | |
451 | maxReconstrLen = in->level + 1; |
452 | if (in->hasPrefix) |
453 | { |
454 | prefixText = DatumGetTextPP(in->prefixDatum); |
455 | prefixSize = VARSIZE_ANY_EXHDR(prefixText); |
456 | maxReconstrLen += prefixSize; |
457 | } |
458 | |
459 | reconstrText = palloc(VARHDRSZ + maxReconstrLen); |
460 | SET_VARSIZE(reconstrText, VARHDRSZ + maxReconstrLen); |
461 | |
462 | if (in->level) |
463 | memcpy(VARDATA(reconstrText), |
464 | VARDATA(reconstructedValue), |
465 | in->level); |
466 | if (prefixSize) |
467 | memcpy(((char *) VARDATA(reconstrText)) + in->level, |
468 | VARDATA_ANY(prefixText), |
469 | prefixSize); |
470 | /* last byte of reconstrText will be filled in below */ |
471 | |
472 | /* |
473 | * Scan the child nodes. For each one, complete the reconstructed value |
474 | * and see if it's consistent with the query. If so, emit an entry into |
475 | * the output arrays. |
476 | */ |
477 | out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes); |
478 | out->levelAdds = (int *) palloc(sizeof(int) * in->nNodes); |
479 | out->reconstructedValues = (Datum *) palloc(sizeof(Datum) * in->nNodes); |
480 | out->nNodes = 0; |
481 | |
482 | for (i = 0; i < in->nNodes; i++) |
483 | { |
484 | int16 nodeChar = DatumGetInt16(in->nodeLabels[i]); |
485 | int thisLen; |
486 | bool res = true; |
487 | int j; |
488 | |
489 | /* If nodeChar is a dummy value, don't include it in data */ |
490 | if (nodeChar <= 0) |
491 | thisLen = maxReconstrLen - 1; |
492 | else |
493 | { |
494 | ((unsigned char *) VARDATA(reconstrText))[maxReconstrLen - 1] = nodeChar; |
495 | thisLen = maxReconstrLen; |
496 | } |
497 | |
498 | for (j = 0; j < in->nkeys; j++) |
499 | { |
500 | StrategyNumber strategy = in->scankeys[j].sk_strategy; |
501 | text *inText; |
502 | int inSize; |
503 | int r; |
504 | |
505 | /* |
506 | * If it's a collation-aware operator, but the collation is C, we |
507 | * can treat it as non-collation-aware. With non-C collation we |
508 | * need to traverse whole tree :-( so there's no point in making |
509 | * any check here. (Note also that our reconstructed value may |
510 | * well end with a partial multibyte character, so that applying |
511 | * any encoding-sensitive test to it would be risky anyhow.) |
512 | */ |
513 | if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy)) |
514 | { |
515 | if (collate_is_c) |
516 | strategy -= SPG_STRATEGY_ADDITION; |
517 | else |
518 | continue; |
519 | } |
520 | |
521 | inText = DatumGetTextPP(in->scankeys[j].sk_argument); |
522 | inSize = VARSIZE_ANY_EXHDR(inText); |
523 | |
524 | r = memcmp(VARDATA(reconstrText), VARDATA_ANY(inText), |
525 | Min(inSize, thisLen)); |
526 | |
527 | switch (strategy) |
528 | { |
529 | case BTLessStrategyNumber: |
530 | case BTLessEqualStrategyNumber: |
531 | if (r > 0) |
532 | res = false; |
533 | break; |
534 | case BTEqualStrategyNumber: |
535 | if (r != 0 || inSize < thisLen) |
536 | res = false; |
537 | break; |
538 | case BTGreaterEqualStrategyNumber: |
539 | case BTGreaterStrategyNumber: |
540 | if (r < 0) |
541 | res = false; |
542 | break; |
543 | case RTPrefixStrategyNumber: |
544 | if (r != 0) |
545 | res = false; |
546 | break; |
547 | default: |
548 | elog(ERROR, "unrecognized strategy number: %d" , |
549 | in->scankeys[j].sk_strategy); |
550 | break; |
551 | } |
552 | |
553 | if (!res) |
554 | break; /* no need to consider remaining conditions */ |
555 | } |
556 | |
557 | if (res) |
558 | { |
559 | out->nodeNumbers[out->nNodes] = i; |
560 | out->levelAdds[out->nNodes] = thisLen - in->level; |
561 | SET_VARSIZE(reconstrText, VARHDRSZ + thisLen); |
562 | out->reconstructedValues[out->nNodes] = |
563 | datumCopy(PointerGetDatum(reconstrText), false, -1); |
564 | out->nNodes++; |
565 | } |
566 | } |
567 | |
568 | PG_RETURN_VOID(); |
569 | } |
570 | |
571 | Datum |
572 | spg_text_leaf_consistent(PG_FUNCTION_ARGS) |
573 | { |
574 | spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0); |
575 | spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1); |
576 | int level = in->level; |
577 | text *leafValue, |
578 | *reconstrValue = NULL; |
579 | char *fullValue; |
580 | int fullLen; |
581 | bool res; |
582 | int j; |
583 | |
584 | /* all tests are exact */ |
585 | out->recheck = false; |
586 | |
587 | leafValue = DatumGetTextPP(in->leafDatum); |
588 | |
589 | /* As above, in->reconstructedValue isn't toasted or short. */ |
590 | if (DatumGetPointer(in->reconstructedValue)) |
591 | reconstrValue = (text *) DatumGetPointer(in->reconstructedValue); |
592 | |
593 | Assert(reconstrValue == NULL ? level == 0 : |
594 | VARSIZE_ANY_EXHDR(reconstrValue) == level); |
595 | |
596 | /* Reconstruct the full string represented by this leaf tuple */ |
597 | fullLen = level + VARSIZE_ANY_EXHDR(leafValue); |
598 | if (VARSIZE_ANY_EXHDR(leafValue) == 0 && level > 0) |
599 | { |
600 | fullValue = VARDATA(reconstrValue); |
601 | out->leafValue = PointerGetDatum(reconstrValue); |
602 | } |
603 | else |
604 | { |
605 | text *fullText = palloc(VARHDRSZ + fullLen); |
606 | |
607 | SET_VARSIZE(fullText, VARHDRSZ + fullLen); |
608 | fullValue = VARDATA(fullText); |
609 | if (level) |
610 | memcpy(fullValue, VARDATA(reconstrValue), level); |
611 | if (VARSIZE_ANY_EXHDR(leafValue) > 0) |
612 | memcpy(fullValue + level, VARDATA_ANY(leafValue), |
613 | VARSIZE_ANY_EXHDR(leafValue)); |
614 | out->leafValue = PointerGetDatum(fullText); |
615 | } |
616 | |
617 | /* Perform the required comparison(s) */ |
618 | res = true; |
619 | for (j = 0; j < in->nkeys; j++) |
620 | { |
621 | StrategyNumber strategy = in->scankeys[j].sk_strategy; |
622 | text *query = DatumGetTextPP(in->scankeys[j].sk_argument); |
623 | int queryLen = VARSIZE_ANY_EXHDR(query); |
624 | int r; |
625 | |
626 | if (strategy == RTPrefixStrategyNumber) |
627 | { |
628 | /* |
629 | * if level >= length of query then reconstrValue must begin with |
630 | * query (prefix) string, so we don't need to check it again. |
631 | */ |
632 | res = (level >= queryLen) || |
633 | DatumGetBool(DirectFunctionCall2Coll(text_starts_with, |
634 | PG_GET_COLLATION(), |
635 | out->leafValue, |
636 | PointerGetDatum(query))); |
637 | |
638 | if (!res) /* no need to consider remaining conditions */ |
639 | break; |
640 | |
641 | continue; |
642 | } |
643 | |
644 | if (SPG_IS_COLLATION_AWARE_STRATEGY(strategy)) |
645 | { |
646 | /* Collation-aware comparison */ |
647 | strategy -= SPG_STRATEGY_ADDITION; |
648 | |
649 | /* If asserts enabled, verify encoding of reconstructed string */ |
650 | Assert(pg_verifymbstr(fullValue, fullLen, false)); |
651 | |
652 | r = varstr_cmp(fullValue, fullLen, |
653 | VARDATA_ANY(query), queryLen, |
654 | PG_GET_COLLATION()); |
655 | } |
656 | else |
657 | { |
658 | /* Non-collation-aware comparison */ |
659 | r = memcmp(fullValue, VARDATA_ANY(query), Min(queryLen, fullLen)); |
660 | |
661 | if (r == 0) |
662 | { |
663 | if (queryLen > fullLen) |
664 | r = -1; |
665 | else if (queryLen < fullLen) |
666 | r = 1; |
667 | } |
668 | } |
669 | |
670 | switch (strategy) |
671 | { |
672 | case BTLessStrategyNumber: |
673 | res = (r < 0); |
674 | break; |
675 | case BTLessEqualStrategyNumber: |
676 | res = (r <= 0); |
677 | break; |
678 | case BTEqualStrategyNumber: |
679 | res = (r == 0); |
680 | break; |
681 | case BTGreaterEqualStrategyNumber: |
682 | res = (r >= 0); |
683 | break; |
684 | case BTGreaterStrategyNumber: |
685 | res = (r > 0); |
686 | break; |
687 | default: |
688 | elog(ERROR, "unrecognized strategy number: %d" , |
689 | in->scankeys[j].sk_strategy); |
690 | res = false; |
691 | break; |
692 | } |
693 | |
694 | if (!res) |
695 | break; /* no need to consider remaining conditions */ |
696 | } |
697 | |
698 | PG_RETURN_BOOL(res); |
699 | } |
700 | |