1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * rangetypes_spgist.c |
4 | * implementation of quad tree over ranges mapped to 2d-points for SP-GiST. |
5 | * |
6 | * Quad tree is a data structure similar to a binary tree, but is adapted to |
7 | * 2d data. Each inner node of a quad tree contains a point (centroid) which |
8 | * divides the 2d-space into 4 quadrants. Each quadrant is associated with a |
9 | * child node. |
10 | * |
11 | * Ranges are mapped to 2d-points so that the lower bound is one dimension, |
12 | * and the upper bound is another. By convention, we visualize the lower bound |
13 | * to be the horizontal axis, and upper bound the vertical axis. |
14 | * |
15 | * One quirk with this mapping is the handling of empty ranges. An empty range |
16 | * doesn't have lower and upper bounds, so it cannot be mapped to 2d space in |
17 | * a straightforward way. To cope with that, the root node can have a 5th |
18 | * quadrant, which is reserved for empty ranges. Furthermore, there can be |
19 | * inner nodes in the tree with no centroid. They contain only two child nodes, |
20 | * one for empty ranges and another for non-empty ones. Such a node can appear |
21 | * as the root node, or in the tree under the 5th child of the root node (in |
22 | * which case it will only contain empty nodes). |
23 | * |
24 | * The SP-GiST picksplit function uses medians along both axes as the centroid. |
25 | * This implementation only uses the comparison function of the range element |
26 | * datatype, therefore it works for any range type. |
27 | * |
28 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
29 | * Portions Copyright (c) 1994, Regents of the University of California |
30 | * |
31 | * IDENTIFICATION |
32 | * src/backend/utils/adt/rangetypes_spgist.c |
33 | * |
34 | *------------------------------------------------------------------------- |
35 | */ |
36 | |
37 | #include "postgres.h" |
38 | |
39 | #include "access/spgist.h" |
40 | #include "access/stratnum.h" |
41 | #include "catalog/pg_type.h" |
42 | #include "utils/builtins.h" |
43 | #include "utils/datum.h" |
44 | #include "utils/rangetypes.h" |
45 | |
46 | static int16 getQuadrant(TypeCacheEntry *typcache, RangeType *centroid, |
47 | RangeType *tst); |
48 | static int bound_cmp(const void *a, const void *b, void *arg); |
49 | |
50 | static int adjacent_inner_consistent(TypeCacheEntry *typcache, |
51 | RangeBound *arg, RangeBound *centroid, |
52 | RangeBound *prev); |
53 | static int adjacent_cmp_bounds(TypeCacheEntry *typcache, RangeBound *arg, |
54 | RangeBound *centroid); |
55 | |
56 | /* |
57 | * SP-GiST 'config' interface function. |
58 | */ |
59 | Datum |
60 | spg_range_quad_config(PG_FUNCTION_ARGS) |
61 | { |
62 | /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */ |
63 | spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1); |
64 | |
65 | cfg->prefixType = ANYRANGEOID; |
66 | cfg->labelType = VOIDOID; /* we don't need node labels */ |
67 | cfg->canReturnData = true; |
68 | cfg->longValuesOK = false; |
69 | PG_RETURN_VOID(); |
70 | } |
71 | |
72 | /*---------- |
73 | * Determine which quadrant a 2d-mapped range falls into, relative to the |
74 | * centroid. |
75 | * |
76 | * Quadrants are numbered like this: |
77 | * |
78 | * 4 | 1 |
79 | * ----+---- |
80 | * 3 | 2 |
81 | * |
82 | * Where the lower bound of range is the horizontal axis and upper bound the |
83 | * vertical axis. |
84 | * |
85 | * Ranges on one of the axes are taken to lie in the quadrant with higher value |
86 | * along perpendicular axis. That is, a value on the horizontal axis is taken |
87 | * to belong to quadrant 1 or 4, and a value on the vertical axis is taken to |
88 | * belong to quadrant 1 or 2. A range equal to centroid is taken to lie in |
89 | * quadrant 1. |
90 | * |
91 | * Empty ranges are taken to lie in the special quadrant 5. |
92 | *---------- |
93 | */ |
94 | static int16 |
95 | getQuadrant(TypeCacheEntry *typcache, RangeType *centroid, RangeType *tst) |
96 | { |
97 | RangeBound centroidLower, |
98 | centroidUpper; |
99 | bool centroidEmpty; |
100 | RangeBound lower, |
101 | upper; |
102 | bool empty; |
103 | |
104 | range_deserialize(typcache, centroid, ¢roidLower, ¢roidUpper, |
105 | ¢roidEmpty); |
106 | range_deserialize(typcache, tst, &lower, &upper, &empty); |
107 | |
108 | if (empty) |
109 | return 5; |
110 | |
111 | if (range_cmp_bounds(typcache, &lower, ¢roidLower) >= 0) |
112 | { |
113 | if (range_cmp_bounds(typcache, &upper, ¢roidUpper) >= 0) |
114 | return 1; |
115 | else |
116 | return 2; |
117 | } |
118 | else |
119 | { |
120 | if (range_cmp_bounds(typcache, &upper, ¢roidUpper) >= 0) |
121 | return 4; |
122 | else |
123 | return 3; |
124 | } |
125 | } |
126 | |
127 | /* |
128 | * Choose SP-GiST function: choose path for addition of new range. |
129 | */ |
130 | Datum |
131 | spg_range_quad_choose(PG_FUNCTION_ARGS) |
132 | { |
133 | spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0); |
134 | spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1); |
135 | RangeType *inRange = DatumGetRangeTypeP(in->datum), |
136 | *centroid; |
137 | int16 quadrant; |
138 | TypeCacheEntry *typcache; |
139 | |
140 | if (in->allTheSame) |
141 | { |
142 | out->resultType = spgMatchNode; |
143 | /* nodeN will be set by core */ |
144 | out->result.matchNode.levelAdd = 0; |
145 | out->result.matchNode.restDatum = RangeTypePGetDatum(inRange); |
146 | PG_RETURN_VOID(); |
147 | } |
148 | |
149 | typcache = range_get_typcache(fcinfo, RangeTypeGetOid(inRange)); |
150 | |
151 | /* |
152 | * A node with no centroid divides ranges purely on whether they're empty |
153 | * or not. All empty ranges go to child node 0, all non-empty ranges go to |
154 | * node 1. |
155 | */ |
156 | if (!in->hasPrefix) |
157 | { |
158 | out->resultType = spgMatchNode; |
159 | if (RangeIsEmpty(inRange)) |
160 | out->result.matchNode.nodeN = 0; |
161 | else |
162 | out->result.matchNode.nodeN = 1; |
163 | out->result.matchNode.levelAdd = 1; |
164 | out->result.matchNode.restDatum = RangeTypePGetDatum(inRange); |
165 | PG_RETURN_VOID(); |
166 | } |
167 | |
168 | centroid = DatumGetRangeTypeP(in->prefixDatum); |
169 | quadrant = getQuadrant(typcache, centroid, inRange); |
170 | |
171 | Assert(quadrant <= in->nNodes); |
172 | |
173 | /* Select node matching to quadrant number */ |
174 | out->resultType = spgMatchNode; |
175 | out->result.matchNode.nodeN = quadrant - 1; |
176 | out->result.matchNode.levelAdd = 1; |
177 | out->result.matchNode.restDatum = RangeTypePGetDatum(inRange); |
178 | |
179 | PG_RETURN_VOID(); |
180 | } |
181 | |
182 | /* |
183 | * Bound comparison for sorting. |
184 | */ |
185 | static int |
186 | bound_cmp(const void *a, const void *b, void *arg) |
187 | { |
188 | RangeBound *ba = (RangeBound *) a; |
189 | RangeBound *bb = (RangeBound *) b; |
190 | TypeCacheEntry *typcache = (TypeCacheEntry *) arg; |
191 | |
192 | return range_cmp_bounds(typcache, ba, bb); |
193 | } |
194 | |
195 | /* |
196 | * Picksplit SP-GiST function: split ranges into nodes. Select "centroid" |
197 | * range and distribute ranges according to quadrants. |
198 | */ |
199 | Datum |
200 | spg_range_quad_picksplit(PG_FUNCTION_ARGS) |
201 | { |
202 | spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0); |
203 | spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1); |
204 | int i; |
205 | int j; |
206 | int nonEmptyCount; |
207 | RangeType *centroid; |
208 | bool empty; |
209 | TypeCacheEntry *typcache; |
210 | |
211 | /* Use the median values of lower and upper bounds as the centroid range */ |
212 | RangeBound *lowerBounds, |
213 | *upperBounds; |
214 | |
215 | typcache = range_get_typcache(fcinfo, |
216 | RangeTypeGetOid(DatumGetRangeTypeP(in->datums[0]))); |
217 | |
218 | /* Allocate memory for bounds */ |
219 | lowerBounds = palloc(sizeof(RangeBound) * in->nTuples); |
220 | upperBounds = palloc(sizeof(RangeBound) * in->nTuples); |
221 | j = 0; |
222 | |
223 | /* Deserialize bounds of ranges, count non-empty ranges */ |
224 | for (i = 0; i < in->nTuples; i++) |
225 | { |
226 | range_deserialize(typcache, DatumGetRangeTypeP(in->datums[i]), |
227 | &lowerBounds[j], &upperBounds[j], &empty); |
228 | if (!empty) |
229 | j++; |
230 | } |
231 | nonEmptyCount = j; |
232 | |
233 | /* |
234 | * All the ranges are empty. The best we can do is to construct an inner |
235 | * node with no centroid, and put all ranges into node 0. If non-empty |
236 | * ranges are added later, they will be routed to node 1. |
237 | */ |
238 | if (nonEmptyCount == 0) |
239 | { |
240 | out->nNodes = 2; |
241 | out->hasPrefix = false; |
242 | /* Prefix is empty */ |
243 | out->prefixDatum = PointerGetDatum(NULL); |
244 | out->nodeLabels = NULL; |
245 | |
246 | out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples); |
247 | out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples); |
248 | |
249 | /* Place all ranges into node 0 */ |
250 | for (i = 0; i < in->nTuples; i++) |
251 | { |
252 | RangeType *range = DatumGetRangeTypeP(in->datums[i]); |
253 | |
254 | out->leafTupleDatums[i] = RangeTypePGetDatum(range); |
255 | out->mapTuplesToNodes[i] = 0; |
256 | } |
257 | PG_RETURN_VOID(); |
258 | } |
259 | |
260 | /* Sort range bounds in order to find medians */ |
261 | qsort_arg(lowerBounds, nonEmptyCount, sizeof(RangeBound), |
262 | bound_cmp, typcache); |
263 | qsort_arg(upperBounds, nonEmptyCount, sizeof(RangeBound), |
264 | bound_cmp, typcache); |
265 | |
266 | /* Construct "centroid" range from medians of lower and upper bounds */ |
267 | centroid = range_serialize(typcache, &lowerBounds[nonEmptyCount / 2], |
268 | &upperBounds[nonEmptyCount / 2], false); |
269 | out->hasPrefix = true; |
270 | out->prefixDatum = RangeTypePGetDatum(centroid); |
271 | |
272 | /* Create node for empty ranges only if it is a root node */ |
273 | out->nNodes = (in->level == 0) ? 5 : 4; |
274 | out->nodeLabels = NULL; /* we don't need node labels */ |
275 | |
276 | out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples); |
277 | out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples); |
278 | |
279 | /* |
280 | * Assign ranges to corresponding nodes according to quadrants relative to |
281 | * "centroid" range. |
282 | */ |
283 | for (i = 0; i < in->nTuples; i++) |
284 | { |
285 | RangeType *range = DatumGetRangeTypeP(in->datums[i]); |
286 | int16 quadrant = getQuadrant(typcache, centroid, range); |
287 | |
288 | out->leafTupleDatums[i] = RangeTypePGetDatum(range); |
289 | out->mapTuplesToNodes[i] = quadrant - 1; |
290 | } |
291 | |
292 | PG_RETURN_VOID(); |
293 | } |
294 | |
295 | /* |
296 | * SP-GiST consistent function for inner nodes: check which nodes are |
297 | * consistent with given set of queries. |
298 | */ |
299 | Datum |
300 | spg_range_quad_inner_consistent(PG_FUNCTION_ARGS) |
301 | { |
302 | spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0); |
303 | spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1); |
304 | int which; |
305 | int i; |
306 | MemoryContext oldCtx; |
307 | |
308 | /* |
309 | * For adjacent search we need also previous centroid (if any) to improve |
310 | * the precision of the consistent check. In this case needPrevious flag |
311 | * is set and centroid is passed into traversalValue. |
312 | */ |
313 | bool needPrevious = false; |
314 | |
315 | if (in->allTheSame) |
316 | { |
317 | /* Report that all nodes should be visited */ |
318 | out->nNodes = in->nNodes; |
319 | out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes); |
320 | for (i = 0; i < in->nNodes; i++) |
321 | out->nodeNumbers[i] = i; |
322 | PG_RETURN_VOID(); |
323 | } |
324 | |
325 | if (!in->hasPrefix) |
326 | { |
327 | /* |
328 | * No centroid on this inner node. Such a node has two child nodes, |
329 | * the first for empty ranges, and the second for non-empty ones. |
330 | */ |
331 | Assert(in->nNodes == 2); |
332 | |
333 | /* |
334 | * Nth bit of which variable means that (N - 1)th node should be |
335 | * visited. Initially all bits are set. Bits of nodes which should be |
336 | * skipped will be unset. |
337 | */ |
338 | which = (1 << 1) | (1 << 2); |
339 | for (i = 0; i < in->nkeys; i++) |
340 | { |
341 | StrategyNumber strategy = in->scankeys[i].sk_strategy; |
342 | bool empty; |
343 | |
344 | /* |
345 | * The only strategy when second argument of operator is not range |
346 | * is RANGESTRAT_CONTAINS_ELEM. |
347 | */ |
348 | if (strategy != RANGESTRAT_CONTAINS_ELEM) |
349 | empty = RangeIsEmpty( |
350 | DatumGetRangeTypeP(in->scankeys[i].sk_argument)); |
351 | else |
352 | empty = false; |
353 | |
354 | switch (strategy) |
355 | { |
356 | case RANGESTRAT_BEFORE: |
357 | case RANGESTRAT_OVERLEFT: |
358 | case RANGESTRAT_OVERLAPS: |
359 | case RANGESTRAT_OVERRIGHT: |
360 | case RANGESTRAT_AFTER: |
361 | case RANGESTRAT_ADJACENT: |
362 | /* These strategies return false if any argument is empty */ |
363 | if (empty) |
364 | which = 0; |
365 | else |
366 | which &= (1 << 2); |
367 | break; |
368 | |
369 | case RANGESTRAT_CONTAINS: |
370 | |
371 | /* |
372 | * All ranges contain an empty range. Only non-empty |
373 | * ranges can contain a non-empty range. |
374 | */ |
375 | if (!empty) |
376 | which &= (1 << 2); |
377 | break; |
378 | |
379 | case RANGESTRAT_CONTAINED_BY: |
380 | |
381 | /* |
382 | * Only an empty range is contained by an empty range. |
383 | * Both empty and non-empty ranges can be contained by a |
384 | * non-empty range. |
385 | */ |
386 | if (empty) |
387 | which &= (1 << 1); |
388 | break; |
389 | |
390 | case RANGESTRAT_CONTAINS_ELEM: |
391 | which &= (1 << 2); |
392 | break; |
393 | |
394 | case RANGESTRAT_EQ: |
395 | if (empty) |
396 | which &= (1 << 1); |
397 | else |
398 | which &= (1 << 2); |
399 | break; |
400 | |
401 | default: |
402 | elog(ERROR, "unrecognized range strategy: %d" , strategy); |
403 | break; |
404 | } |
405 | if (which == 0) |
406 | break; /* no need to consider remaining conditions */ |
407 | } |
408 | } |
409 | else |
410 | { |
411 | RangeBound centroidLower, |
412 | centroidUpper; |
413 | bool centroidEmpty; |
414 | TypeCacheEntry *typcache; |
415 | RangeType *centroid; |
416 | |
417 | /* This node has a centroid. Fetch it. */ |
418 | centroid = DatumGetRangeTypeP(in->prefixDatum); |
419 | typcache = range_get_typcache(fcinfo, |
420 | RangeTypeGetOid(DatumGetRangeTypeP(centroid))); |
421 | range_deserialize(typcache, centroid, ¢roidLower, ¢roidUpper, |
422 | ¢roidEmpty); |
423 | |
424 | Assert(in->nNodes == 4 || in->nNodes == 5); |
425 | |
426 | /* |
427 | * Nth bit of which variable means that (N - 1)th node (Nth quadrant) |
428 | * should be visited. Initially all bits are set. Bits of nodes which |
429 | * can be skipped will be unset. |
430 | */ |
431 | which = (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4) | (1 << 5); |
432 | |
433 | for (i = 0; i < in->nkeys; i++) |
434 | { |
435 | StrategyNumber strategy; |
436 | RangeBound lower, |
437 | upper; |
438 | bool empty; |
439 | RangeType *range = NULL; |
440 | |
441 | RangeType *prevCentroid = NULL; |
442 | RangeBound prevLower, |
443 | prevUpper; |
444 | bool prevEmpty; |
445 | |
446 | /* Restrictions on range bounds according to scan strategy */ |
447 | RangeBound *minLower = NULL, |
448 | *maxLower = NULL, |
449 | *minUpper = NULL, |
450 | *maxUpper = NULL; |
451 | |
452 | /* Are the restrictions on range bounds inclusive? */ |
453 | bool inclusive = true; |
454 | bool strictEmpty = true; |
455 | int cmp, |
456 | which1, |
457 | which2; |
458 | |
459 | strategy = in->scankeys[i].sk_strategy; |
460 | |
461 | /* |
462 | * RANGESTRAT_CONTAINS_ELEM is just like RANGESTRAT_CONTAINS, but |
463 | * the argument is a single element. Expand the single element to |
464 | * a range containing only the element, and treat it like |
465 | * RANGESTRAT_CONTAINS. |
466 | */ |
467 | if (strategy == RANGESTRAT_CONTAINS_ELEM) |
468 | { |
469 | lower.inclusive = true; |
470 | lower.infinite = false; |
471 | lower.lower = true; |
472 | lower.val = in->scankeys[i].sk_argument; |
473 | |
474 | upper.inclusive = true; |
475 | upper.infinite = false; |
476 | upper.lower = false; |
477 | upper.val = in->scankeys[i].sk_argument; |
478 | |
479 | empty = false; |
480 | |
481 | strategy = RANGESTRAT_CONTAINS; |
482 | } |
483 | else |
484 | { |
485 | range = DatumGetRangeTypeP(in->scankeys[i].sk_argument); |
486 | range_deserialize(typcache, range, &lower, &upper, &empty); |
487 | } |
488 | |
489 | /* |
490 | * Most strategies are handled by forming a bounding box from the |
491 | * search key, defined by a minLower, maxLower, minUpper, |
492 | * maxUpper. Some modify 'which' directly, to specify exactly |
493 | * which quadrants need to be visited. |
494 | * |
495 | * For most strategies, nothing matches an empty search key, and |
496 | * an empty range never matches a non-empty key. If a strategy |
497 | * does not behave like that wrt. empty ranges, set strictEmpty to |
498 | * false. |
499 | */ |
500 | switch (strategy) |
501 | { |
502 | case RANGESTRAT_BEFORE: |
503 | |
504 | /* |
505 | * Range A is before range B if upper bound of A is lower |
506 | * than lower bound of B. |
507 | */ |
508 | maxUpper = &lower; |
509 | inclusive = false; |
510 | break; |
511 | |
512 | case RANGESTRAT_OVERLEFT: |
513 | |
514 | /* |
515 | * Range A is overleft to range B if upper bound of A is |
516 | * less or equal to upper bound of B. |
517 | */ |
518 | maxUpper = &upper; |
519 | break; |
520 | |
521 | case RANGESTRAT_OVERLAPS: |
522 | |
523 | /* |
524 | * Non-empty ranges overlap, if lower bound of each range |
525 | * is lower or equal to upper bound of the other range. |
526 | */ |
527 | maxLower = &upper; |
528 | minUpper = &lower; |
529 | break; |
530 | |
531 | case RANGESTRAT_OVERRIGHT: |
532 | |
533 | /* |
534 | * Range A is overright to range B if lower bound of A is |
535 | * greater or equal to lower bound of B. |
536 | */ |
537 | minLower = &lower; |
538 | break; |
539 | |
540 | case RANGESTRAT_AFTER: |
541 | |
542 | /* |
543 | * Range A is after range B if lower bound of A is greater |
544 | * than upper bound of B. |
545 | */ |
546 | minLower = &upper; |
547 | inclusive = false; |
548 | break; |
549 | |
550 | case RANGESTRAT_ADJACENT: |
551 | if (empty) |
552 | break; /* Skip to strictEmpty check. */ |
553 | |
554 | /* |
555 | * Previously selected quadrant could exclude possibility |
556 | * for lower or upper bounds to be adjacent. Deserialize |
557 | * previous centroid range if present for checking this. |
558 | */ |
559 | if (in->traversalValue) |
560 | { |
561 | prevCentroid = DatumGetRangeTypeP(in->traversalValue); |
562 | range_deserialize(typcache, prevCentroid, |
563 | &prevLower, &prevUpper, &prevEmpty); |
564 | } |
565 | |
566 | /* |
567 | * For a range's upper bound to be adjacent to the |
568 | * argument's lower bound, it will be found along the line |
569 | * adjacent to (and just below) Y=lower. Therefore, if the |
570 | * argument's lower bound is less than the centroid's |
571 | * upper bound, the line falls in quadrants 2 and 3; if |
572 | * greater, the line falls in quadrants 1 and 4. (see |
573 | * adjacent_cmp_bounds for description of edge cases). |
574 | */ |
575 | cmp = adjacent_inner_consistent(typcache, &lower, |
576 | ¢roidUpper, |
577 | prevCentroid ? &prevUpper : NULL); |
578 | if (cmp > 0) |
579 | which1 = (1 << 1) | (1 << 4); |
580 | else if (cmp < 0) |
581 | which1 = (1 << 2) | (1 << 3); |
582 | else |
583 | which1 = 0; |
584 | |
585 | /* |
586 | * Also search for ranges's adjacent to argument's upper |
587 | * bound. They will be found along the line adjacent to |
588 | * (and just right of) X=upper, which falls in quadrants 3 |
589 | * and 4, or 1 and 2. |
590 | */ |
591 | cmp = adjacent_inner_consistent(typcache, &upper, |
592 | ¢roidLower, |
593 | prevCentroid ? &prevLower : NULL); |
594 | if (cmp > 0) |
595 | which2 = (1 << 1) | (1 << 2); |
596 | else if (cmp < 0) |
597 | which2 = (1 << 3) | (1 << 4); |
598 | else |
599 | which2 = 0; |
600 | |
601 | /* We must chase down ranges adjacent to either bound. */ |
602 | which &= which1 | which2; |
603 | |
604 | needPrevious = true; |
605 | break; |
606 | |
607 | case RANGESTRAT_CONTAINS: |
608 | |
609 | /* |
610 | * Non-empty range A contains non-empty range B if lower |
611 | * bound of A is lower or equal to lower bound of range B |
612 | * and upper bound of range A is greater or equal to upper |
613 | * bound of range A. |
614 | * |
615 | * All non-empty ranges contain an empty range. |
616 | */ |
617 | strictEmpty = false; |
618 | if (!empty) |
619 | { |
620 | which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4); |
621 | maxLower = &lower; |
622 | minUpper = &upper; |
623 | } |
624 | break; |
625 | |
626 | case RANGESTRAT_CONTAINED_BY: |
627 | /* The opposite of contains. */ |
628 | strictEmpty = false; |
629 | if (empty) |
630 | { |
631 | /* An empty range is only contained by an empty range */ |
632 | which &= (1 << 5); |
633 | } |
634 | else |
635 | { |
636 | minLower = &lower; |
637 | maxUpper = &upper; |
638 | } |
639 | break; |
640 | |
641 | case RANGESTRAT_EQ: |
642 | |
643 | /* |
644 | * Equal range can be only in the same quadrant where |
645 | * argument would be placed to. |
646 | */ |
647 | strictEmpty = false; |
648 | which &= (1 << getQuadrant(typcache, centroid, range)); |
649 | break; |
650 | |
651 | default: |
652 | elog(ERROR, "unrecognized range strategy: %d" , strategy); |
653 | break; |
654 | } |
655 | |
656 | if (strictEmpty) |
657 | { |
658 | if (empty) |
659 | { |
660 | /* Scan key is empty, no branches are satisfying */ |
661 | which = 0; |
662 | break; |
663 | } |
664 | else |
665 | { |
666 | /* Shouldn't visit tree branch with empty ranges */ |
667 | which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4); |
668 | } |
669 | } |
670 | |
671 | /* |
672 | * Using the bounding box, see which quadrants we have to descend |
673 | * into. |
674 | */ |
675 | if (minLower) |
676 | { |
677 | /* |
678 | * If the centroid's lower bound is less than or equal to the |
679 | * minimum lower bound, anything in the 3rd and 4th quadrants |
680 | * will have an even smaller lower bound, and thus can't |
681 | * match. |
682 | */ |
683 | if (range_cmp_bounds(typcache, ¢roidLower, minLower) <= 0) |
684 | which &= (1 << 1) | (1 << 2) | (1 << 5); |
685 | } |
686 | if (maxLower) |
687 | { |
688 | /* |
689 | * If the centroid's lower bound is greater than the maximum |
690 | * lower bound, anything in the 1st and 2nd quadrants will |
691 | * also have a greater than or equal lower bound, and thus |
692 | * can't match. If the centroid's lower bound is equal to the |
693 | * maximum lower bound, we can still exclude the 1st and 2nd |
694 | * quadrants if we're looking for a value strictly greater |
695 | * than the maximum. |
696 | */ |
697 | int cmp; |
698 | |
699 | cmp = range_cmp_bounds(typcache, ¢roidLower, maxLower); |
700 | if (cmp > 0 || (!inclusive && cmp == 0)) |
701 | which &= (1 << 3) | (1 << 4) | (1 << 5); |
702 | } |
703 | if (minUpper) |
704 | { |
705 | /* |
706 | * If the centroid's upper bound is less than or equal to the |
707 | * minimum upper bound, anything in the 2nd and 3rd quadrants |
708 | * will have an even smaller upper bound, and thus can't |
709 | * match. |
710 | */ |
711 | if (range_cmp_bounds(typcache, ¢roidUpper, minUpper) <= 0) |
712 | which &= (1 << 1) | (1 << 4) | (1 << 5); |
713 | } |
714 | if (maxUpper) |
715 | { |
716 | /* |
717 | * If the centroid's upper bound is greater than the maximum |
718 | * upper bound, anything in the 1st and 4th quadrants will |
719 | * also have a greater than or equal upper bound, and thus |
720 | * can't match. If the centroid's upper bound is equal to the |
721 | * maximum upper bound, we can still exclude the 1st and 4th |
722 | * quadrants if we're looking for a value strictly greater |
723 | * than the maximum. |
724 | */ |
725 | int cmp; |
726 | |
727 | cmp = range_cmp_bounds(typcache, ¢roidUpper, maxUpper); |
728 | if (cmp > 0 || (!inclusive && cmp == 0)) |
729 | which &= (1 << 2) | (1 << 3) | (1 << 5); |
730 | } |
731 | |
732 | if (which == 0) |
733 | break; /* no need to consider remaining conditions */ |
734 | } |
735 | } |
736 | |
737 | /* We must descend into the quadrant(s) identified by 'which' */ |
738 | out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes); |
739 | if (needPrevious) |
740 | out->traversalValues = (void **) palloc(sizeof(void *) * in->nNodes); |
741 | out->nNodes = 0; |
742 | |
743 | /* |
744 | * Elements of traversalValues should be allocated in |
745 | * traversalMemoryContext |
746 | */ |
747 | oldCtx = MemoryContextSwitchTo(in->traversalMemoryContext); |
748 | |
749 | for (i = 1; i <= in->nNodes; i++) |
750 | { |
751 | if (which & (1 << i)) |
752 | { |
753 | /* Save previous prefix if needed */ |
754 | if (needPrevious) |
755 | { |
756 | Datum previousCentroid; |
757 | |
758 | /* |
759 | * We know, that in->prefixDatum in this place is varlena, |
760 | * because it's range |
761 | */ |
762 | previousCentroid = datumCopy(in->prefixDatum, false, -1); |
763 | out->traversalValues[out->nNodes] = (void *) previousCentroid; |
764 | } |
765 | out->nodeNumbers[out->nNodes] = i - 1; |
766 | out->nNodes++; |
767 | } |
768 | } |
769 | |
770 | MemoryContextSwitchTo(oldCtx); |
771 | |
772 | PG_RETURN_VOID(); |
773 | } |
774 | |
775 | /* |
776 | * adjacent_cmp_bounds |
777 | * |
778 | * Given an argument and centroid bound, this function determines if any |
779 | * bounds that are adjacent to the argument are smaller than, or greater than |
780 | * or equal to centroid. For brevity, we call the arg < centroid "left", and |
781 | * arg >= centroid case "right". This corresponds to how the quadrants are |
782 | * arranged, if you imagine that "left" is equivalent to "down" and "right" |
783 | * is equivalent to "up". |
784 | * |
785 | * For the "left" case, returns -1, and for the "right" case, returns 1. |
786 | */ |
787 | static int |
788 | adjacent_cmp_bounds(TypeCacheEntry *typcache, RangeBound *arg, |
789 | RangeBound *centroid) |
790 | { |
791 | int cmp; |
792 | |
793 | Assert(arg->lower != centroid->lower); |
794 | |
795 | cmp = range_cmp_bounds(typcache, arg, centroid); |
796 | |
797 | if (centroid->lower) |
798 | { |
799 | /*------ |
800 | * The argument is an upper bound, we are searching for adjacent lower |
801 | * bounds. A matching adjacent lower bound must be *larger* than the |
802 | * argument, but only just. |
803 | * |
804 | * The following table illustrates the desired result with a fixed |
805 | * argument bound, and different centroids. The CMP column shows |
806 | * the value of 'cmp' variable, and ADJ shows whether the argument |
807 | * and centroid are adjacent, per bounds_adjacent(). (N) means we |
808 | * don't need to check for that case, because it's implied by CMP. |
809 | * With the argument range [..., 500), the adjacent range we're |
810 | * searching for is [500, ...): |
811 | * |
812 | * ARGUMENT CENTROID CMP ADJ |
813 | * [..., 500) [498, ...) > (N) [500, ...) is to the right |
814 | * [..., 500) [499, ...) = (N) [500, ...) is to the right |
815 | * [..., 500) [500, ...) < Y [500, ...) is to the right |
816 | * [..., 500) [501, ...) < N [500, ...) is to the left |
817 | * |
818 | * So, we must search left when the argument is smaller than, and not |
819 | * adjacent, to the centroid. Otherwise search right. |
820 | *------ |
821 | */ |
822 | if (cmp < 0 && !bounds_adjacent(typcache, *arg, *centroid)) |
823 | return -1; |
824 | else |
825 | return 1; |
826 | } |
827 | else |
828 | { |
829 | /*------ |
830 | * The argument is a lower bound, we are searching for adjacent upper |
831 | * bounds. A matching adjacent upper bound must be *smaller* than the |
832 | * argument, but only just. |
833 | * |
834 | * ARGUMENT CENTROID CMP ADJ |
835 | * [500, ...) [..., 499) > (N) [..., 500) is to the right |
836 | * [500, ...) [..., 500) > (Y) [..., 500) is to the right |
837 | * [500, ...) [..., 501) = (N) [..., 500) is to the left |
838 | * [500, ...) [..., 502) < (N) [..., 500) is to the left |
839 | * |
840 | * We must search left when the argument is smaller than or equal to |
841 | * the centroid. Otherwise search right. We don't need to check |
842 | * whether the argument is adjacent with the centroid, because it |
843 | * doesn't matter. |
844 | *------ |
845 | */ |
846 | if (cmp <= 0) |
847 | return -1; |
848 | else |
849 | return 1; |
850 | } |
851 | } |
852 | |
853 | /*---------- |
854 | * adjacent_inner_consistent |
855 | * |
856 | * Like adjacent_cmp_bounds, but also takes into account the previous |
857 | * level's centroid. We might've traversed left (or right) at the previous |
858 | * node, in search for ranges adjacent to the other bound, even though we |
859 | * already ruled out the possibility for any matches in that direction for |
860 | * this bound. By comparing the argument with the previous centroid, and |
861 | * the previous centroid with the current centroid, we can determine which |
862 | * direction we should've moved in at previous level, and which direction we |
863 | * actually moved. |
864 | * |
865 | * If there can be any matches to the left, returns -1. If to the right, |
866 | * returns 1. If there can be no matches below this centroid, because we |
867 | * already ruled them out at the previous level, returns 0. |
868 | * |
869 | * XXX: Comparing just the previous and current level isn't foolproof; we |
870 | * might still search some branches unnecessarily. For example, imagine that |
871 | * we are searching for value 15, and we traverse the following centroids |
872 | * (only considering one bound for the moment): |
873 | * |
874 | * Level 1: 20 |
875 | * Level 2: 50 |
876 | * Level 3: 25 |
877 | * |
878 | * At this point, previous centroid is 50, current centroid is 25, and the |
879 | * target value is to the left. But because we already moved right from |
880 | * centroid 20 to 50 in the first level, there cannot be any values < 20 in |
881 | * the current branch. But we don't know that just by looking at the previous |
882 | * and current centroid, so we traverse left, unnecessarily. The reason we are |
883 | * down this branch is that we're searching for matches with the *other* |
884 | * bound. If we kept track of which bound we are searching for explicitly, |
885 | * instead of deducing that from the previous and current centroid, we could |
886 | * avoid some unnecessary work. |
887 | *---------- |
888 | */ |
889 | static int |
890 | adjacent_inner_consistent(TypeCacheEntry *typcache, RangeBound *arg, |
891 | RangeBound *centroid, RangeBound *prev) |
892 | { |
893 | if (prev) |
894 | { |
895 | int prevcmp; |
896 | int cmp; |
897 | |
898 | /* |
899 | * Which direction were we supposed to traverse at previous level, |
900 | * left or right? |
901 | */ |
902 | prevcmp = adjacent_cmp_bounds(typcache, arg, prev); |
903 | |
904 | /* and which direction did we actually go? */ |
905 | cmp = range_cmp_bounds(typcache, centroid, prev); |
906 | |
907 | /* if the two don't agree, there's nothing to see here */ |
908 | if ((prevcmp < 0 && cmp >= 0) || (prevcmp > 0 && cmp < 0)) |
909 | return 0; |
910 | } |
911 | |
912 | return adjacent_cmp_bounds(typcache, arg, centroid); |
913 | } |
914 | |
915 | /* |
916 | * SP-GiST consistent function for leaf nodes: check leaf value against query |
917 | * using corresponding function. |
918 | */ |
919 | Datum |
920 | spg_range_quad_leaf_consistent(PG_FUNCTION_ARGS) |
921 | { |
922 | spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0); |
923 | spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1); |
924 | RangeType *leafRange = DatumGetRangeTypeP(in->leafDatum); |
925 | TypeCacheEntry *typcache; |
926 | bool res; |
927 | int i; |
928 | |
929 | /* all tests are exact */ |
930 | out->recheck = false; |
931 | |
932 | /* leafDatum is what it is... */ |
933 | out->leafValue = in->leafDatum; |
934 | |
935 | typcache = range_get_typcache(fcinfo, RangeTypeGetOid(leafRange)); |
936 | |
937 | /* Perform the required comparison(s) */ |
938 | res = true; |
939 | for (i = 0; i < in->nkeys; i++) |
940 | { |
941 | Datum keyDatum = in->scankeys[i].sk_argument; |
942 | |
943 | /* Call the function corresponding to the scan strategy */ |
944 | switch (in->scankeys[i].sk_strategy) |
945 | { |
946 | case RANGESTRAT_BEFORE: |
947 | res = range_before_internal(typcache, leafRange, |
948 | DatumGetRangeTypeP(keyDatum)); |
949 | break; |
950 | case RANGESTRAT_OVERLEFT: |
951 | res = range_overleft_internal(typcache, leafRange, |
952 | DatumGetRangeTypeP(keyDatum)); |
953 | break; |
954 | case RANGESTRAT_OVERLAPS: |
955 | res = range_overlaps_internal(typcache, leafRange, |
956 | DatumGetRangeTypeP(keyDatum)); |
957 | break; |
958 | case RANGESTRAT_OVERRIGHT: |
959 | res = range_overright_internal(typcache, leafRange, |
960 | DatumGetRangeTypeP(keyDatum)); |
961 | break; |
962 | case RANGESTRAT_AFTER: |
963 | res = range_after_internal(typcache, leafRange, |
964 | DatumGetRangeTypeP(keyDatum)); |
965 | break; |
966 | case RANGESTRAT_ADJACENT: |
967 | res = range_adjacent_internal(typcache, leafRange, |
968 | DatumGetRangeTypeP(keyDatum)); |
969 | break; |
970 | case RANGESTRAT_CONTAINS: |
971 | res = range_contains_internal(typcache, leafRange, |
972 | DatumGetRangeTypeP(keyDatum)); |
973 | break; |
974 | case RANGESTRAT_CONTAINED_BY: |
975 | res = range_contained_by_internal(typcache, leafRange, |
976 | DatumGetRangeTypeP(keyDatum)); |
977 | break; |
978 | case RANGESTRAT_CONTAINS_ELEM: |
979 | res = range_contains_elem_internal(typcache, leafRange, |
980 | keyDatum); |
981 | break; |
982 | case RANGESTRAT_EQ: |
983 | res = range_eq_internal(typcache, leafRange, |
984 | DatumGetRangeTypeP(keyDatum)); |
985 | break; |
986 | default: |
987 | elog(ERROR, "unrecognized range strategy: %d" , |
988 | in->scankeys[i].sk_strategy); |
989 | break; |
990 | } |
991 | |
992 | /* |
993 | * If leaf datum doesn't match to a query key, no need to check |
994 | * subsequent keys. |
995 | */ |
996 | if (!res) |
997 | break; |
998 | } |
999 | |
1000 | PG_RETURN_BOOL(res); |
1001 | } |
1002 | |