1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * partbounds.c |
4 | * Support routines for manipulating partition bounds |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | * |
9 | * IDENTIFICATION |
10 | * src/backend/partitioning/partbounds.c |
11 | * |
12 | *------------------------------------------------------------------------- |
13 | */ |
14 | |
15 | #include "postgres.h" |
16 | |
17 | #include "access/relation.h" |
18 | #include "access/table.h" |
19 | #include "access/tableam.h" |
20 | #include "catalog/partition.h" |
21 | #include "catalog/pg_inherits.h" |
22 | #include "catalog/pg_type.h" |
23 | #include "commands/tablecmds.h" |
24 | #include "executor/executor.h" |
25 | #include "miscadmin.h" |
26 | #include "nodes/makefuncs.h" |
27 | #include "nodes/nodeFuncs.h" |
28 | #include "parser/parse_coerce.h" |
29 | #include "partitioning/partbounds.h" |
30 | #include "partitioning/partdesc.h" |
31 | #include "partitioning/partprune.h" |
32 | #include "utils/builtins.h" |
33 | #include "utils/datum.h" |
34 | #include "utils/fmgroids.h" |
35 | #include "utils/hashutils.h" |
36 | #include "utils/lsyscache.h" |
37 | #include "utils/partcache.h" |
38 | #include "utils/rel.h" |
39 | #include "utils/snapmgr.h" |
40 | #include "utils/ruleutils.h" |
41 | #include "utils/syscache.h" |
42 | |
43 | /* |
44 | * When qsort'ing partition bounds after reading from the catalog, each bound |
45 | * is represented with one of the following structs. |
46 | */ |
47 | |
48 | /* One bound of a hash partition */ |
49 | typedef struct PartitionHashBound |
50 | { |
51 | int modulus; |
52 | int remainder; |
53 | int index; |
54 | } PartitionHashBound; |
55 | |
56 | /* One value coming from some (index'th) list partition */ |
57 | typedef struct PartitionListValue |
58 | { |
59 | int index; |
60 | Datum value; |
61 | } PartitionListValue; |
62 | |
63 | /* One bound of a range partition */ |
64 | typedef struct PartitionRangeBound |
65 | { |
66 | int index; |
67 | Datum *datums; /* range bound datums */ |
68 | PartitionRangeDatumKind *kind; /* the kind of each datum */ |
69 | bool lower; /* this is the lower (vs upper) bound */ |
70 | } PartitionRangeBound; |
71 | |
72 | static int32 qsort_partition_hbound_cmp(const void *a, const void *b); |
73 | static int32 qsort_partition_list_value_cmp(const void *a, const void *b, |
74 | void *arg); |
75 | static int32 qsort_partition_rbound_cmp(const void *a, const void *b, |
76 | void *arg); |
77 | static PartitionBoundInfo create_hash_bounds(PartitionBoundSpec **boundspecs, |
78 | int nparts, PartitionKey key, int **mapping); |
79 | static PartitionBoundInfo create_list_bounds(PartitionBoundSpec **boundspecs, |
80 | int nparts, PartitionKey key, int **mapping); |
81 | static PartitionBoundInfo create_range_bounds(PartitionBoundSpec **boundspecs, |
82 | int nparts, PartitionKey key, int **mapping); |
83 | static PartitionRangeBound *make_one_partition_rbound(PartitionKey key, int index, |
84 | List *datums, bool lower); |
85 | static int32 partition_hbound_cmp(int modulus1, int remainder1, int modulus2, |
86 | int remainder2); |
87 | static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc, |
88 | Oid *partcollation, Datum *datums1, |
89 | PartitionRangeDatumKind *kind1, bool lower1, |
90 | PartitionRangeBound *b2); |
91 | static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc, |
92 | Oid *partcollation, |
93 | PartitionBoundInfo boundinfo, |
94 | PartitionRangeBound *probe, bool *is_equal); |
95 | static int get_partition_bound_num_indexes(PartitionBoundInfo b); |
96 | static Expr *make_partition_op_expr(PartitionKey key, int keynum, |
97 | uint16 strategy, Expr *arg1, Expr *arg2); |
98 | static Oid get_partition_operator(PartitionKey key, int col, |
99 | StrategyNumber strategy, bool *need_relabel); |
100 | static List *get_qual_for_hash(Relation parent, PartitionBoundSpec *spec); |
101 | static List *get_qual_for_list(Relation parent, PartitionBoundSpec *spec); |
102 | static List *get_qual_for_range(Relation parent, PartitionBoundSpec *spec, |
103 | bool for_default); |
104 | static void get_range_key_properties(PartitionKey key, int keynum, |
105 | PartitionRangeDatum *ldatum, |
106 | PartitionRangeDatum *udatum, |
107 | ListCell **partexprs_item, |
108 | Expr **keyCol, |
109 | Const **lower_val, Const **upper_val); |
110 | static List *get_range_nulltest(PartitionKey key); |
111 | |
112 | /* |
113 | * get_qual_from_partbound |
114 | * Given a parser node for partition bound, return the list of executable |
115 | * expressions as partition constraint |
116 | */ |
117 | List * |
118 | get_qual_from_partbound(Relation rel, Relation parent, |
119 | PartitionBoundSpec *spec) |
120 | { |
121 | PartitionKey key = RelationGetPartitionKey(parent); |
122 | List *my_qual = NIL; |
123 | |
124 | Assert(key != NULL); |
125 | |
126 | switch (key->strategy) |
127 | { |
128 | case PARTITION_STRATEGY_HASH: |
129 | Assert(spec->strategy == PARTITION_STRATEGY_HASH); |
130 | my_qual = get_qual_for_hash(parent, spec); |
131 | break; |
132 | |
133 | case PARTITION_STRATEGY_LIST: |
134 | Assert(spec->strategy == PARTITION_STRATEGY_LIST); |
135 | my_qual = get_qual_for_list(parent, spec); |
136 | break; |
137 | |
138 | case PARTITION_STRATEGY_RANGE: |
139 | Assert(spec->strategy == PARTITION_STRATEGY_RANGE); |
140 | my_qual = get_qual_for_range(parent, spec, false); |
141 | break; |
142 | |
143 | default: |
144 | elog(ERROR, "unexpected partition strategy: %d" , |
145 | (int) key->strategy); |
146 | } |
147 | |
148 | return my_qual; |
149 | } |
150 | |
151 | /* |
152 | * partition_bounds_create |
153 | * Build a PartitionBoundInfo struct from a list of PartitionBoundSpec |
154 | * nodes |
155 | * |
156 | * This function creates a PartitionBoundInfo and fills the values of its |
157 | * various members based on the input list. Importantly, 'datums' array will |
158 | * contain Datum representation of individual bounds (possibly after |
159 | * de-duplication as in case of range bounds), sorted in a canonical order |
160 | * defined by qsort_partition_* functions of respective partitioning methods. |
161 | * 'indexes' array will contain as many elements as there are bounds (specific |
162 | * exceptions to this rule are listed in the function body), which represent |
163 | * the 0-based canonical positions of partitions. |
164 | * |
165 | * Upon return from this function, *mapping is set to an array of |
166 | * list_length(boundspecs) elements, each of which maps the original index of |
167 | * a partition to its canonical index. |
168 | * |
169 | * Note: The objects returned by this function are wholly allocated in the |
170 | * current memory context. |
171 | */ |
172 | PartitionBoundInfo |
173 | partition_bounds_create(PartitionBoundSpec **boundspecs, int nparts, |
174 | PartitionKey key, int **mapping) |
175 | { |
176 | int i; |
177 | |
178 | Assert(nparts > 0); |
179 | |
180 | /* |
181 | * For each partitioning method, we first convert the partition bounds |
182 | * from their parser node representation to the internal representation, |
183 | * along with any additional preprocessing (such as de-duplicating range |
184 | * bounds). Resulting bound datums are then added to the 'datums' array |
185 | * in PartitionBoundInfo. For each datum added, an integer indicating the |
186 | * canonical partition index is added to the 'indexes' array. |
187 | * |
188 | * For each bound, we remember its partition's position (0-based) in the |
189 | * original list to later map it to the canonical index. |
190 | */ |
191 | |
192 | /* |
193 | * Initialize mapping array with invalid values, this is filled within |
194 | * each sub-routine below depending on the bound type. |
195 | */ |
196 | *mapping = (int *) palloc(sizeof(int) * nparts); |
197 | for (i = 0; i < nparts; i++) |
198 | (*mapping)[i] = -1; |
199 | |
200 | switch (key->strategy) |
201 | { |
202 | case PARTITION_STRATEGY_HASH: |
203 | return create_hash_bounds(boundspecs, nparts, key, mapping); |
204 | |
205 | case PARTITION_STRATEGY_LIST: |
206 | return create_list_bounds(boundspecs, nparts, key, mapping); |
207 | |
208 | case PARTITION_STRATEGY_RANGE: |
209 | return create_range_bounds(boundspecs, nparts, key, mapping); |
210 | |
211 | default: |
212 | elog(ERROR, "unexpected partition strategy: %d" , |
213 | (int) key->strategy); |
214 | break; |
215 | } |
216 | |
217 | Assert(false); |
218 | return NULL; /* keep compiler quiet */ |
219 | } |
220 | |
221 | /* |
222 | * create_hash_bounds |
223 | * Create a PartitionBoundInfo for a hash partitioned table |
224 | */ |
225 | static PartitionBoundInfo |
226 | create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts, |
227 | PartitionKey key, int **mapping) |
228 | { |
229 | PartitionBoundInfo boundinfo; |
230 | PartitionHashBound **hbounds = NULL; |
231 | int i; |
232 | int ndatums = 0; |
233 | int greatest_modulus; |
234 | |
235 | boundinfo = (PartitionBoundInfoData *) |
236 | palloc0(sizeof(PartitionBoundInfoData)); |
237 | boundinfo->strategy = key->strategy; |
238 | /* No special hash partitions. */ |
239 | boundinfo->null_index = -1; |
240 | boundinfo->default_index = -1; |
241 | |
242 | ndatums = nparts; |
243 | hbounds = (PartitionHashBound **) |
244 | palloc(nparts * sizeof(PartitionHashBound *)); |
245 | |
246 | /* Convert from node to the internal representation */ |
247 | for (i = 0; i < nparts; i++) |
248 | { |
249 | PartitionBoundSpec *spec = boundspecs[i]; |
250 | |
251 | if (spec->strategy != PARTITION_STRATEGY_HASH) |
252 | elog(ERROR, "invalid strategy in partition bound spec" ); |
253 | |
254 | hbounds[i] = (PartitionHashBound *) palloc(sizeof(PartitionHashBound)); |
255 | hbounds[i]->modulus = spec->modulus; |
256 | hbounds[i]->remainder = spec->remainder; |
257 | hbounds[i]->index = i; |
258 | } |
259 | |
260 | /* Sort all the bounds in ascending order */ |
261 | qsort(hbounds, nparts, sizeof(PartitionHashBound *), |
262 | qsort_partition_hbound_cmp); |
263 | |
264 | /* After sorting, moduli are now stored in ascending order. */ |
265 | greatest_modulus = hbounds[ndatums - 1]->modulus; |
266 | |
267 | boundinfo->ndatums = ndatums; |
268 | boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *)); |
269 | boundinfo->indexes = (int *) palloc(greatest_modulus * sizeof(int)); |
270 | for (i = 0; i < greatest_modulus; i++) |
271 | boundinfo->indexes[i] = -1; |
272 | |
273 | /* |
274 | * For hash partitioning, there are as many datums (modulus and remainder |
275 | * pairs) as there are partitions. Indexes are simply values ranging from |
276 | * 0 to (nparts - 1). |
277 | */ |
278 | for (i = 0; i < nparts; i++) |
279 | { |
280 | int modulus = hbounds[i]->modulus; |
281 | int remainder = hbounds[i]->remainder; |
282 | |
283 | boundinfo->datums[i] = (Datum *) palloc(2 * sizeof(Datum)); |
284 | boundinfo->datums[i][0] = Int32GetDatum(modulus); |
285 | boundinfo->datums[i][1] = Int32GetDatum(remainder); |
286 | |
287 | while (remainder < greatest_modulus) |
288 | { |
289 | /* overlap? */ |
290 | Assert(boundinfo->indexes[remainder] == -1); |
291 | boundinfo->indexes[remainder] = i; |
292 | remainder += modulus; |
293 | } |
294 | |
295 | (*mapping)[hbounds[i]->index] = i; |
296 | pfree(hbounds[i]); |
297 | } |
298 | pfree(hbounds); |
299 | |
300 | return boundinfo; |
301 | } |
302 | |
303 | /* |
304 | * create_list_bounds |
305 | * Create a PartitionBoundInfo for a list partitioned table |
306 | */ |
307 | static PartitionBoundInfo |
308 | create_list_bounds(PartitionBoundSpec **boundspecs, int nparts, |
309 | PartitionKey key, int **mapping) |
310 | { |
311 | PartitionBoundInfo boundinfo; |
312 | PartitionListValue **all_values = NULL; |
313 | ListCell *cell; |
314 | int i = 0; |
315 | int ndatums = 0; |
316 | int next_index = 0; |
317 | int default_index = -1; |
318 | int null_index = -1; |
319 | List *non_null_values = NIL; |
320 | |
321 | boundinfo = (PartitionBoundInfoData *) |
322 | palloc0(sizeof(PartitionBoundInfoData)); |
323 | boundinfo->strategy = key->strategy; |
324 | /* Will be set correctly below. */ |
325 | boundinfo->null_index = -1; |
326 | boundinfo->default_index = -1; |
327 | |
328 | /* Create a unified list of non-null values across all partitions. */ |
329 | for (i = 0; i < nparts; i++) |
330 | { |
331 | PartitionBoundSpec *spec = boundspecs[i]; |
332 | ListCell *c; |
333 | |
334 | if (spec->strategy != PARTITION_STRATEGY_LIST) |
335 | elog(ERROR, "invalid strategy in partition bound spec" ); |
336 | |
337 | /* |
338 | * Note the index of the partition bound spec for the default |
339 | * partition. There's no datum to add to the list on non-null datums |
340 | * for this partition. |
341 | */ |
342 | if (spec->is_default) |
343 | { |
344 | default_index = i; |
345 | continue; |
346 | } |
347 | |
348 | foreach(c, spec->listdatums) |
349 | { |
350 | Const *val = castNode(Const, lfirst(c)); |
351 | PartitionListValue *list_value = NULL; |
352 | |
353 | if (!val->constisnull) |
354 | { |
355 | list_value = (PartitionListValue *) |
356 | palloc0(sizeof(PartitionListValue)); |
357 | list_value->index = i; |
358 | list_value->value = val->constvalue; |
359 | } |
360 | else |
361 | { |
362 | /* |
363 | * Never put a null into the values array, flag instead for |
364 | * the code further down below where we construct the actual |
365 | * relcache struct. |
366 | */ |
367 | if (null_index != -1) |
368 | elog(ERROR, "found null more than once" ); |
369 | null_index = i; |
370 | } |
371 | |
372 | if (list_value) |
373 | non_null_values = lappend(non_null_values, list_value); |
374 | } |
375 | } |
376 | |
377 | ndatums = list_length(non_null_values); |
378 | |
379 | /* |
380 | * Collect all list values in one array. Alongside the value, we also save |
381 | * the index of partition the value comes from. |
382 | */ |
383 | all_values = (PartitionListValue **) |
384 | palloc(ndatums * sizeof(PartitionListValue *)); |
385 | i = 0; |
386 | foreach(cell, non_null_values) |
387 | { |
388 | PartitionListValue *src = lfirst(cell); |
389 | |
390 | all_values[i] = (PartitionListValue *) |
391 | palloc(sizeof(PartitionListValue)); |
392 | all_values[i]->value = src->value; |
393 | all_values[i]->index = src->index; |
394 | i++; |
395 | } |
396 | |
397 | qsort_arg(all_values, ndatums, sizeof(PartitionListValue *), |
398 | qsort_partition_list_value_cmp, (void *) key); |
399 | |
400 | boundinfo->ndatums = ndatums; |
401 | boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *)); |
402 | boundinfo->indexes = (int *) palloc(ndatums * sizeof(int)); |
403 | |
404 | /* |
405 | * Copy values. Canonical indexes are values ranging from 0 to (nparts - |
406 | * 1) assigned to each partition such that all datums of a given partition |
407 | * receive the same value. The value for a given partition is the index of |
408 | * that partition's smallest datum in the all_values[] array. |
409 | */ |
410 | for (i = 0; i < ndatums; i++) |
411 | { |
412 | int orig_index = all_values[i]->index; |
413 | |
414 | boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum)); |
415 | boundinfo->datums[i][0] = datumCopy(all_values[i]->value, |
416 | key->parttypbyval[0], |
417 | key->parttyplen[0]); |
418 | |
419 | /* If the old index has no mapping, assign one */ |
420 | if ((*mapping)[orig_index] == -1) |
421 | (*mapping)[orig_index] = next_index++; |
422 | |
423 | boundinfo->indexes[i] = (*mapping)[orig_index]; |
424 | } |
425 | |
426 | /* |
427 | * Set the canonical value for null_index, if any. |
428 | * |
429 | * It is possible that the null-accepting partition has not been assigned |
430 | * an index yet, which could happen if such partition accepts only null |
431 | * and hence not handled in the above loop which only looked at non-null |
432 | * values. |
433 | */ |
434 | if (null_index != -1) |
435 | { |
436 | Assert(null_index >= 0); |
437 | if ((*mapping)[null_index] == -1) |
438 | (*mapping)[null_index] = next_index++; |
439 | boundinfo->null_index = (*mapping)[null_index]; |
440 | } |
441 | |
442 | /* Set the canonical value for default_index, if any. */ |
443 | if (default_index != -1) |
444 | { |
445 | /* |
446 | * The default partition accepts any value not specified in the lists |
447 | * of other partitions, hence it should not get mapped index while |
448 | * assigning those for non-null datums. |
449 | */ |
450 | Assert(default_index >= 0); |
451 | Assert((*mapping)[default_index] == -1); |
452 | (*mapping)[default_index] = next_index++; |
453 | boundinfo->default_index = (*mapping)[default_index]; |
454 | } |
455 | |
456 | /* All partitions must now have been assigned canonical indexes. */ |
457 | Assert(next_index == nparts); |
458 | return boundinfo; |
459 | } |
460 | |
461 | /* |
462 | * create_range_bounds |
463 | * Create a PartitionBoundInfo for a range partitioned table |
464 | */ |
465 | static PartitionBoundInfo |
466 | create_range_bounds(PartitionBoundSpec **boundspecs, int nparts, |
467 | PartitionKey key, int **mapping) |
468 | { |
469 | PartitionBoundInfo boundinfo; |
470 | PartitionRangeBound **rbounds = NULL; |
471 | PartitionRangeBound **all_bounds, |
472 | *prev; |
473 | int i, |
474 | k; |
475 | int ndatums = 0; |
476 | int default_index = -1; |
477 | int next_index = 0; |
478 | |
479 | boundinfo = (PartitionBoundInfoData *) |
480 | palloc0(sizeof(PartitionBoundInfoData)); |
481 | boundinfo->strategy = key->strategy; |
482 | /* There is no special null-accepting range partition. */ |
483 | boundinfo->null_index = -1; |
484 | /* Will be set correctly below. */ |
485 | boundinfo->default_index = -1; |
486 | |
487 | all_bounds = (PartitionRangeBound **) |
488 | palloc0(2 * nparts * sizeof(PartitionRangeBound *)); |
489 | |
490 | /* Create a unified list of range bounds across all the partitions. */ |
491 | ndatums = 0; |
492 | for (i = 0; i < nparts; i++) |
493 | { |
494 | PartitionBoundSpec *spec = boundspecs[i]; |
495 | PartitionRangeBound *lower, |
496 | *upper; |
497 | |
498 | if (spec->strategy != PARTITION_STRATEGY_RANGE) |
499 | elog(ERROR, "invalid strategy in partition bound spec" ); |
500 | |
501 | /* |
502 | * Note the index of the partition bound spec for the default |
503 | * partition. There's no datum to add to the all_bounds array for |
504 | * this partition. |
505 | */ |
506 | if (spec->is_default) |
507 | { |
508 | default_index = i; |
509 | continue; |
510 | } |
511 | |
512 | lower = make_one_partition_rbound(key, i, spec->lowerdatums, true); |
513 | upper = make_one_partition_rbound(key, i, spec->upperdatums, false); |
514 | all_bounds[ndatums++] = lower; |
515 | all_bounds[ndatums++] = upper; |
516 | } |
517 | |
518 | Assert(ndatums == nparts * 2 || |
519 | (default_index != -1 && ndatums == (nparts - 1) * 2)); |
520 | |
521 | /* Sort all the bounds in ascending order */ |
522 | qsort_arg(all_bounds, ndatums, |
523 | sizeof(PartitionRangeBound *), |
524 | qsort_partition_rbound_cmp, |
525 | (void *) key); |
526 | |
527 | /* Save distinct bounds from all_bounds into rbounds. */ |
528 | rbounds = (PartitionRangeBound **) |
529 | palloc(ndatums * sizeof(PartitionRangeBound *)); |
530 | k = 0; |
531 | prev = NULL; |
532 | for (i = 0; i < ndatums; i++) |
533 | { |
534 | PartitionRangeBound *cur = all_bounds[i]; |
535 | bool is_distinct = false; |
536 | int j; |
537 | |
538 | /* Is the current bound distinct from the previous one? */ |
539 | for (j = 0; j < key->partnatts; j++) |
540 | { |
541 | Datum cmpval; |
542 | |
543 | if (prev == NULL || cur->kind[j] != prev->kind[j]) |
544 | { |
545 | is_distinct = true; |
546 | break; |
547 | } |
548 | |
549 | /* |
550 | * If the bounds are both MINVALUE or MAXVALUE, stop now and treat |
551 | * them as equal, since any values after this point must be |
552 | * ignored. |
553 | */ |
554 | if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE) |
555 | break; |
556 | |
557 | cmpval = FunctionCall2Coll(&key->partsupfunc[j], |
558 | key->partcollation[j], |
559 | cur->datums[j], |
560 | prev->datums[j]); |
561 | if (DatumGetInt32(cmpval) != 0) |
562 | { |
563 | is_distinct = true; |
564 | break; |
565 | } |
566 | } |
567 | |
568 | /* |
569 | * Only if the bound is distinct save it into a temporary array, i.e, |
570 | * rbounds which is later copied into boundinfo datums array. |
571 | */ |
572 | if (is_distinct) |
573 | rbounds[k++] = all_bounds[i]; |
574 | |
575 | prev = cur; |
576 | } |
577 | |
578 | /* Update ndatums to hold the count of distinct datums. */ |
579 | ndatums = k; |
580 | |
581 | /* |
582 | * Add datums to boundinfo. Canonical indexes are values ranging from 0 |
583 | * to nparts - 1, assigned in that order to each partition's upper bound. |
584 | * For 'datums' elements that are lower bounds, there is -1 in the |
585 | * 'indexes' array to signify that no partition exists for the values less |
586 | * than such a bound and greater than or equal to the previous upper |
587 | * bound. |
588 | */ |
589 | boundinfo->ndatums = ndatums; |
590 | boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *)); |
591 | boundinfo->kind = (PartitionRangeDatumKind **) |
592 | palloc(ndatums * |
593 | sizeof(PartitionRangeDatumKind *)); |
594 | |
595 | /* |
596 | * For range partitioning, an additional value of -1 is stored as the last |
597 | * element. |
598 | */ |
599 | boundinfo->indexes = (int *) palloc((ndatums + 1) * sizeof(int)); |
600 | |
601 | for (i = 0; i < ndatums; i++) |
602 | { |
603 | int j; |
604 | |
605 | boundinfo->datums[i] = (Datum *) palloc(key->partnatts * |
606 | sizeof(Datum)); |
607 | boundinfo->kind[i] = (PartitionRangeDatumKind *) |
608 | palloc(key->partnatts * |
609 | sizeof(PartitionRangeDatumKind)); |
610 | for (j = 0; j < key->partnatts; j++) |
611 | { |
612 | if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE) |
613 | boundinfo->datums[i][j] = |
614 | datumCopy(rbounds[i]->datums[j], |
615 | key->parttypbyval[j], |
616 | key->parttyplen[j]); |
617 | boundinfo->kind[i][j] = rbounds[i]->kind[j]; |
618 | } |
619 | |
620 | /* |
621 | * There is no mapping for invalid indexes. |
622 | * |
623 | * Any lower bounds in the rbounds array have invalid indexes |
624 | * assigned, because the values between the previous bound (if there |
625 | * is one) and this (lower) bound are not part of the range of any |
626 | * existing partition. |
627 | */ |
628 | if (rbounds[i]->lower) |
629 | boundinfo->indexes[i] = -1; |
630 | else |
631 | { |
632 | int orig_index = rbounds[i]->index; |
633 | |
634 | /* If the old index has no mapping, assign one */ |
635 | if ((*mapping)[orig_index] == -1) |
636 | (*mapping)[orig_index] = next_index++; |
637 | |
638 | boundinfo->indexes[i] = (*mapping)[orig_index]; |
639 | } |
640 | } |
641 | |
642 | /* Set the canonical value for default_index, if any. */ |
643 | if (default_index != -1) |
644 | { |
645 | Assert(default_index >= 0 && (*mapping)[default_index] == -1); |
646 | (*mapping)[default_index] = next_index++; |
647 | boundinfo->default_index = (*mapping)[default_index]; |
648 | } |
649 | |
650 | /* The extra -1 element. */ |
651 | Assert(i == ndatums); |
652 | boundinfo->indexes[i] = -1; |
653 | |
654 | /* All partitions must now have been assigned canonical indexes. */ |
655 | Assert(next_index == nparts); |
656 | return boundinfo; |
657 | } |
658 | |
659 | /* |
660 | * Are two partition bound collections logically equal? |
661 | * |
662 | * Used in the keep logic of relcache.c (ie, in RelationClearRelation()). |
663 | * This is also useful when b1 and b2 are bound collections of two separate |
664 | * relations, respectively, because PartitionBoundInfo is a canonical |
665 | * representation of partition bounds. |
666 | */ |
667 | bool |
668 | partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval, |
669 | PartitionBoundInfo b1, PartitionBoundInfo b2) |
670 | { |
671 | int i; |
672 | |
673 | if (b1->strategy != b2->strategy) |
674 | return false; |
675 | |
676 | if (b1->ndatums != b2->ndatums) |
677 | return false; |
678 | |
679 | if (b1->null_index != b2->null_index) |
680 | return false; |
681 | |
682 | if (b1->default_index != b2->default_index) |
683 | return false; |
684 | |
685 | if (b1->strategy == PARTITION_STRATEGY_HASH) |
686 | { |
687 | int greatest_modulus = get_hash_partition_greatest_modulus(b1); |
688 | |
689 | /* |
690 | * If two hash partitioned tables have different greatest moduli, |
691 | * their partition schemes don't match. |
692 | */ |
693 | if (greatest_modulus != get_hash_partition_greatest_modulus(b2)) |
694 | return false; |
695 | |
696 | /* |
697 | * We arrange the partitions in the ascending order of their moduli |
698 | * and remainders. Also every modulus is factor of next larger |
699 | * modulus. Therefore we can safely store index of a given partition |
700 | * in indexes array at remainder of that partition. Also entries at |
701 | * (remainder + N * modulus) positions in indexes array are all same |
702 | * for (modulus, remainder) specification for any partition. Thus |
703 | * datums array from both the given bounds are same, if and only if |
704 | * their indexes array will be same. So, it suffices to compare |
705 | * indexes array. |
706 | */ |
707 | for (i = 0; i < greatest_modulus; i++) |
708 | if (b1->indexes[i] != b2->indexes[i]) |
709 | return false; |
710 | |
711 | #ifdef USE_ASSERT_CHECKING |
712 | |
713 | /* |
714 | * Nonetheless make sure that the bounds are indeed same when the |
715 | * indexes match. Hash partition bound stores modulus and remainder |
716 | * at b1->datums[i][0] and b1->datums[i][1] position respectively. |
717 | */ |
718 | for (i = 0; i < b1->ndatums; i++) |
719 | Assert((b1->datums[i][0] == b2->datums[i][0] && |
720 | b1->datums[i][1] == b2->datums[i][1])); |
721 | #endif |
722 | } |
723 | else |
724 | { |
725 | for (i = 0; i < b1->ndatums; i++) |
726 | { |
727 | int j; |
728 | |
729 | for (j = 0; j < partnatts; j++) |
730 | { |
731 | /* For range partitions, the bounds might not be finite. */ |
732 | if (b1->kind != NULL) |
733 | { |
734 | /* The different kinds of bound all differ from each other */ |
735 | if (b1->kind[i][j] != b2->kind[i][j]) |
736 | return false; |
737 | |
738 | /* |
739 | * Non-finite bounds are equal without further |
740 | * examination. |
741 | */ |
742 | if (b1->kind[i][j] != PARTITION_RANGE_DATUM_VALUE) |
743 | continue; |
744 | } |
745 | |
746 | /* |
747 | * Compare the actual values. Note that it would be both |
748 | * incorrect and unsafe to invoke the comparison operator |
749 | * derived from the partitioning specification here. It would |
750 | * be incorrect because we want the relcache entry to be |
751 | * updated for ANY change to the partition bounds, not just |
752 | * those that the partitioning operator thinks are |
753 | * significant. It would be unsafe because we might reach |
754 | * this code in the context of an aborted transaction, and an |
755 | * arbitrary partitioning operator might not be safe in that |
756 | * context. datumIsEqual() should be simple enough to be |
757 | * safe. |
758 | */ |
759 | if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j], |
760 | parttypbyval[j], parttyplen[j])) |
761 | return false; |
762 | } |
763 | |
764 | if (b1->indexes[i] != b2->indexes[i]) |
765 | return false; |
766 | } |
767 | |
768 | /* There are ndatums+1 indexes in case of range partitions */ |
769 | if (b1->strategy == PARTITION_STRATEGY_RANGE && |
770 | b1->indexes[i] != b2->indexes[i]) |
771 | return false; |
772 | } |
773 | return true; |
774 | } |
775 | |
776 | /* |
777 | * Return a copy of given PartitionBoundInfo structure. The data types of bounds |
778 | * are described by given partition key specification. |
779 | */ |
780 | PartitionBoundInfo |
781 | partition_bounds_copy(PartitionBoundInfo src, |
782 | PartitionKey key) |
783 | { |
784 | PartitionBoundInfo dest; |
785 | int i; |
786 | int ndatums; |
787 | int partnatts; |
788 | int num_indexes; |
789 | |
790 | dest = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData)); |
791 | |
792 | dest->strategy = src->strategy; |
793 | ndatums = dest->ndatums = src->ndatums; |
794 | partnatts = key->partnatts; |
795 | |
796 | num_indexes = get_partition_bound_num_indexes(src); |
797 | |
798 | /* List partitioned tables have only a single partition key. */ |
799 | Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1); |
800 | |
801 | dest->datums = (Datum **) palloc(sizeof(Datum *) * ndatums); |
802 | |
803 | if (src->kind != NULL) |
804 | { |
805 | dest->kind = (PartitionRangeDatumKind **) palloc(ndatums * |
806 | sizeof(PartitionRangeDatumKind *)); |
807 | for (i = 0; i < ndatums; i++) |
808 | { |
809 | dest->kind[i] = (PartitionRangeDatumKind *) palloc(partnatts * |
810 | sizeof(PartitionRangeDatumKind)); |
811 | |
812 | memcpy(dest->kind[i], src->kind[i], |
813 | sizeof(PartitionRangeDatumKind) * key->partnatts); |
814 | } |
815 | } |
816 | else |
817 | dest->kind = NULL; |
818 | |
819 | for (i = 0; i < ndatums; i++) |
820 | { |
821 | int j; |
822 | |
823 | /* |
824 | * For a corresponding to hash partition, datums array will have two |
825 | * elements - modulus and remainder. |
826 | */ |
827 | bool hash_part = (key->strategy == PARTITION_STRATEGY_HASH); |
828 | int natts = hash_part ? 2 : partnatts; |
829 | |
830 | dest->datums[i] = (Datum *) palloc(sizeof(Datum) * natts); |
831 | |
832 | for (j = 0; j < natts; j++) |
833 | { |
834 | bool byval; |
835 | int typlen; |
836 | |
837 | if (hash_part) |
838 | { |
839 | typlen = sizeof(int32); /* Always int4 */ |
840 | byval = true; /* int4 is pass-by-value */ |
841 | } |
842 | else |
843 | { |
844 | byval = key->parttypbyval[j]; |
845 | typlen = key->parttyplen[j]; |
846 | } |
847 | |
848 | if (dest->kind == NULL || |
849 | dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE) |
850 | dest->datums[i][j] = datumCopy(src->datums[i][j], |
851 | byval, typlen); |
852 | } |
853 | } |
854 | |
855 | dest->indexes = (int *) palloc(sizeof(int) * num_indexes); |
856 | memcpy(dest->indexes, src->indexes, sizeof(int) * num_indexes); |
857 | |
858 | dest->null_index = src->null_index; |
859 | dest->default_index = src->default_index; |
860 | |
861 | return dest; |
862 | } |
863 | |
864 | /* |
865 | * partitions_are_ordered |
866 | * Determine whether the partitions described by 'boundinfo' are ordered, |
867 | * that is partitions appearing earlier in the PartitionDesc sequence |
868 | * contain partition keys strictly less than those appearing later. |
869 | * Also, if NULL values are possible, they must come in the last |
870 | * partition defined in the PartitionDesc. |
871 | * |
872 | * If out of order, or there is insufficient info to know the order, |
873 | * then we return false. |
874 | */ |
875 | bool |
876 | partitions_are_ordered(PartitionBoundInfo boundinfo, int nparts) |
877 | { |
878 | Assert(boundinfo != NULL); |
879 | |
880 | switch (boundinfo->strategy) |
881 | { |
882 | case PARTITION_STRATEGY_RANGE: |
883 | |
884 | /* |
885 | * RANGE-type partitioning guarantees that the partitions can be |
886 | * scanned in the order that they're defined in the PartitionDesc |
887 | * to provide sequential, non-overlapping ranges of tuples. |
888 | * However, if a DEFAULT partition exists then it doesn't work, as |
889 | * that could contain tuples from either below or above the |
890 | * defined range, or tuples belonging to gaps between partitions. |
891 | */ |
892 | if (!partition_bound_has_default(boundinfo)) |
893 | return true; |
894 | break; |
895 | |
896 | case PARTITION_STRATEGY_LIST: |
897 | |
898 | /* |
899 | * LIST partitioning can also guarantee ordering, but only if the |
900 | * partitions don't accept interleaved values. We could likely |
901 | * check for this by looping over the PartitionBound's indexes |
902 | * array to check that the indexes are in order. For now, let's |
903 | * just keep it simple and just accept LIST partitioning when |
904 | * there's no DEFAULT partition, exactly one value per partition, |
905 | * and optionally a NULL partition that does not accept any other |
906 | * values. Such a NULL partition will come last in the |
907 | * PartitionDesc, and the other partitions will be properly |
908 | * ordered. This is a cheap test to make as it does not require |
909 | * any per-partition processing. Maybe we'd like to handle more |
910 | * complex cases in the future. |
911 | */ |
912 | if (partition_bound_has_default(boundinfo)) |
913 | return false; |
914 | |
915 | if (boundinfo->ndatums + partition_bound_accepts_nulls(boundinfo) |
916 | == nparts) |
917 | return true; |
918 | break; |
919 | |
920 | default: |
921 | /* HASH, or some other strategy */ |
922 | break; |
923 | } |
924 | |
925 | return false; |
926 | } |
927 | |
928 | /* |
929 | * check_new_partition_bound |
930 | * |
931 | * Checks if the new partition's bound overlaps any of the existing partitions |
932 | * of parent. Also performs additional checks as necessary per strategy. |
933 | */ |
934 | void |
935 | check_new_partition_bound(char *relname, Relation parent, |
936 | PartitionBoundSpec *spec) |
937 | { |
938 | PartitionKey key = RelationGetPartitionKey(parent); |
939 | PartitionDesc partdesc = RelationGetPartitionDesc(parent); |
940 | PartitionBoundInfo boundinfo = partdesc->boundinfo; |
941 | ParseState *pstate = make_parsestate(NULL); |
942 | int with = -1; |
943 | bool overlap = false; |
944 | |
945 | if (spec->is_default) |
946 | { |
947 | /* |
948 | * The default partition bound never conflicts with any other |
949 | * partition's; if that's what we're attaching, the only possible |
950 | * problem is that one already exists, so check for that and we're |
951 | * done. |
952 | */ |
953 | if (boundinfo == NULL || !partition_bound_has_default(boundinfo)) |
954 | return; |
955 | |
956 | /* Default partition already exists, error out. */ |
957 | ereport(ERROR, |
958 | (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), |
959 | errmsg("partition \"%s\" conflicts with existing default partition \"%s\"" , |
960 | relname, get_rel_name(partdesc->oids[boundinfo->default_index])), |
961 | parser_errposition(pstate, spec->location))); |
962 | } |
963 | |
964 | switch (key->strategy) |
965 | { |
966 | case PARTITION_STRATEGY_HASH: |
967 | { |
968 | Assert(spec->strategy == PARTITION_STRATEGY_HASH); |
969 | Assert(spec->remainder >= 0 && spec->remainder < spec->modulus); |
970 | |
971 | if (partdesc->nparts > 0) |
972 | { |
973 | Datum **datums = boundinfo->datums; |
974 | int ndatums = boundinfo->ndatums; |
975 | int greatest_modulus; |
976 | int remainder; |
977 | int offset; |
978 | bool valid_modulus = true; |
979 | int prev_modulus, /* Previous largest modulus */ |
980 | next_modulus; /* Next largest modulus */ |
981 | |
982 | /* |
983 | * Check rule that every modulus must be a factor of the |
984 | * next larger modulus. For example, if you have a bunch |
985 | * of partitions that all have modulus 5, you can add a |
986 | * new partition with modulus 10 or a new partition with |
987 | * modulus 15, but you cannot add both a partition with |
988 | * modulus 10 and a partition with modulus 15, because 10 |
989 | * is not a factor of 15. |
990 | * |
991 | * Get the greatest (modulus, remainder) pair contained in |
992 | * boundinfo->datums that is less than or equal to the |
993 | * (spec->modulus, spec->remainder) pair. |
994 | */ |
995 | offset = partition_hash_bsearch(boundinfo, |
996 | spec->modulus, |
997 | spec->remainder); |
998 | if (offset < 0) |
999 | { |
1000 | next_modulus = DatumGetInt32(datums[0][0]); |
1001 | valid_modulus = (next_modulus % spec->modulus) == 0; |
1002 | } |
1003 | else |
1004 | { |
1005 | prev_modulus = DatumGetInt32(datums[offset][0]); |
1006 | valid_modulus = (spec->modulus % prev_modulus) == 0; |
1007 | |
1008 | if (valid_modulus && (offset + 1) < ndatums) |
1009 | { |
1010 | next_modulus = DatumGetInt32(datums[offset + 1][0]); |
1011 | valid_modulus = (next_modulus % spec->modulus) == 0; |
1012 | } |
1013 | } |
1014 | |
1015 | if (!valid_modulus) |
1016 | ereport(ERROR, |
1017 | (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), |
1018 | errmsg("every hash partition modulus must be a factor of the next larger modulus" ))); |
1019 | |
1020 | greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); |
1021 | remainder = spec->remainder; |
1022 | |
1023 | /* |
1024 | * Normally, the lowest remainder that could conflict with |
1025 | * the new partition is equal to the remainder specified |
1026 | * for the new partition, but when the new partition has a |
1027 | * modulus higher than any used so far, we need to adjust. |
1028 | */ |
1029 | if (remainder >= greatest_modulus) |
1030 | remainder = remainder % greatest_modulus; |
1031 | |
1032 | /* Check every potentially-conflicting remainder. */ |
1033 | do |
1034 | { |
1035 | if (boundinfo->indexes[remainder] != -1) |
1036 | { |
1037 | overlap = true; |
1038 | with = boundinfo->indexes[remainder]; |
1039 | break; |
1040 | } |
1041 | remainder += spec->modulus; |
1042 | } while (remainder < greatest_modulus); |
1043 | } |
1044 | |
1045 | break; |
1046 | } |
1047 | |
1048 | case PARTITION_STRATEGY_LIST: |
1049 | { |
1050 | Assert(spec->strategy == PARTITION_STRATEGY_LIST); |
1051 | |
1052 | if (partdesc->nparts > 0) |
1053 | { |
1054 | ListCell *cell; |
1055 | |
1056 | Assert(boundinfo && |
1057 | boundinfo->strategy == PARTITION_STRATEGY_LIST && |
1058 | (boundinfo->ndatums > 0 || |
1059 | partition_bound_accepts_nulls(boundinfo) || |
1060 | partition_bound_has_default(boundinfo))); |
1061 | |
1062 | foreach(cell, spec->listdatums) |
1063 | { |
1064 | Const *val = castNode(Const, lfirst(cell)); |
1065 | |
1066 | if (!val->constisnull) |
1067 | { |
1068 | int offset; |
1069 | bool equal; |
1070 | |
1071 | offset = partition_list_bsearch(&key->partsupfunc[0], |
1072 | key->partcollation, |
1073 | boundinfo, |
1074 | val->constvalue, |
1075 | &equal); |
1076 | if (offset >= 0 && equal) |
1077 | { |
1078 | overlap = true; |
1079 | with = boundinfo->indexes[offset]; |
1080 | break; |
1081 | } |
1082 | } |
1083 | else if (partition_bound_accepts_nulls(boundinfo)) |
1084 | { |
1085 | overlap = true; |
1086 | with = boundinfo->null_index; |
1087 | break; |
1088 | } |
1089 | } |
1090 | } |
1091 | |
1092 | break; |
1093 | } |
1094 | |
1095 | case PARTITION_STRATEGY_RANGE: |
1096 | { |
1097 | PartitionRangeBound *lower, |
1098 | *upper; |
1099 | |
1100 | Assert(spec->strategy == PARTITION_STRATEGY_RANGE); |
1101 | lower = make_one_partition_rbound(key, -1, spec->lowerdatums, true); |
1102 | upper = make_one_partition_rbound(key, -1, spec->upperdatums, false); |
1103 | |
1104 | /* |
1105 | * First check if the resulting range would be empty with |
1106 | * specified lower and upper bounds |
1107 | */ |
1108 | if (partition_rbound_cmp(key->partnatts, key->partsupfunc, |
1109 | key->partcollation, lower->datums, |
1110 | lower->kind, true, upper) >= 0) |
1111 | { |
1112 | ereport(ERROR, |
1113 | (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), |
1114 | errmsg("empty range bound specified for partition \"%s\"" , |
1115 | relname), |
1116 | errdetail("Specified lower bound %s is greater than or equal to upper bound %s." , |
1117 | get_range_partbound_string(spec->lowerdatums), |
1118 | get_range_partbound_string(spec->upperdatums)), |
1119 | parser_errposition(pstate, spec->location))); |
1120 | } |
1121 | |
1122 | if (partdesc->nparts > 0) |
1123 | { |
1124 | int offset; |
1125 | bool equal; |
1126 | |
1127 | Assert(boundinfo && |
1128 | boundinfo->strategy == PARTITION_STRATEGY_RANGE && |
1129 | (boundinfo->ndatums > 0 || |
1130 | partition_bound_has_default(boundinfo))); |
1131 | |
1132 | /* |
1133 | * Test whether the new lower bound (which is treated |
1134 | * inclusively as part of the new partition) lies inside |
1135 | * an existing partition, or in a gap. |
1136 | * |
1137 | * If it's inside an existing partition, the bound at |
1138 | * offset + 1 will be the upper bound of that partition, |
1139 | * and its index will be >= 0. |
1140 | * |
1141 | * If it's in a gap, the bound at offset + 1 will be the |
1142 | * lower bound of the next partition, and its index will |
1143 | * be -1. This is also true if there is no next partition, |
1144 | * since the index array is initialised with an extra -1 |
1145 | * at the end. |
1146 | */ |
1147 | offset = partition_range_bsearch(key->partnatts, |
1148 | key->partsupfunc, |
1149 | key->partcollation, |
1150 | boundinfo, lower, |
1151 | &equal); |
1152 | |
1153 | if (boundinfo->indexes[offset + 1] < 0) |
1154 | { |
1155 | /* |
1156 | * Check that the new partition will fit in the gap. |
1157 | * For it to fit, the new upper bound must be less |
1158 | * than or equal to the lower bound of the next |
1159 | * partition, if there is one. |
1160 | */ |
1161 | if (offset + 1 < boundinfo->ndatums) |
1162 | { |
1163 | int32 cmpval; |
1164 | Datum *datums; |
1165 | PartitionRangeDatumKind *kind; |
1166 | bool is_lower; |
1167 | |
1168 | datums = boundinfo->datums[offset + 1]; |
1169 | kind = boundinfo->kind[offset + 1]; |
1170 | is_lower = (boundinfo->indexes[offset + 1] == -1); |
1171 | |
1172 | cmpval = partition_rbound_cmp(key->partnatts, |
1173 | key->partsupfunc, |
1174 | key->partcollation, |
1175 | datums, kind, |
1176 | is_lower, upper); |
1177 | if (cmpval < 0) |
1178 | { |
1179 | /* |
1180 | * The new partition overlaps with the |
1181 | * existing partition between offset + 1 and |
1182 | * offset + 2. |
1183 | */ |
1184 | overlap = true; |
1185 | with = boundinfo->indexes[offset + 2]; |
1186 | } |
1187 | } |
1188 | } |
1189 | else |
1190 | { |
1191 | /* |
1192 | * The new partition overlaps with the existing |
1193 | * partition between offset and offset + 1. |
1194 | */ |
1195 | overlap = true; |
1196 | with = boundinfo->indexes[offset + 1]; |
1197 | } |
1198 | } |
1199 | |
1200 | break; |
1201 | } |
1202 | |
1203 | default: |
1204 | elog(ERROR, "unexpected partition strategy: %d" , |
1205 | (int) key->strategy); |
1206 | } |
1207 | |
1208 | if (overlap) |
1209 | { |
1210 | Assert(with >= 0); |
1211 | ereport(ERROR, |
1212 | (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), |
1213 | errmsg("partition \"%s\" would overlap partition \"%s\"" , |
1214 | relname, get_rel_name(partdesc->oids[with])), |
1215 | parser_errposition(pstate, spec->location))); |
1216 | } |
1217 | } |
1218 | |
1219 | /* |
1220 | * check_default_partition_contents |
1221 | * |
1222 | * This function checks if there exists a row in the default partition that |
1223 | * would properly belong to the new partition being added. If it finds one, |
1224 | * it throws an error. |
1225 | */ |
1226 | void |
1227 | check_default_partition_contents(Relation parent, Relation default_rel, |
1228 | PartitionBoundSpec *new_spec) |
1229 | { |
1230 | List *new_part_constraints; |
1231 | List *def_part_constraints; |
1232 | List *all_parts; |
1233 | ListCell *lc; |
1234 | |
1235 | new_part_constraints = (new_spec->strategy == PARTITION_STRATEGY_LIST) |
1236 | ? get_qual_for_list(parent, new_spec) |
1237 | : get_qual_for_range(parent, new_spec, false); |
1238 | def_part_constraints = |
1239 | get_proposed_default_constraint(new_part_constraints); |
1240 | |
1241 | /* |
1242 | * Map the Vars in the constraint expression from parent's attnos to |
1243 | * default_rel's. |
1244 | */ |
1245 | def_part_constraints = |
1246 | map_partition_varattnos(def_part_constraints, 1, default_rel, |
1247 | parent, NULL); |
1248 | |
1249 | /* |
1250 | * If the existing constraints on the default partition imply that it will |
1251 | * not contain any row that would belong to the new partition, we can |
1252 | * avoid scanning the default partition. |
1253 | */ |
1254 | if (PartConstraintImpliedByRelConstraint(default_rel, def_part_constraints)) |
1255 | { |
1256 | ereport(DEBUG1, |
1257 | (errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints" , |
1258 | RelationGetRelationName(default_rel)))); |
1259 | return; |
1260 | } |
1261 | |
1262 | /* |
1263 | * Scan the default partition and its subpartitions, and check for rows |
1264 | * that do not satisfy the revised partition constraints. |
1265 | */ |
1266 | if (default_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) |
1267 | all_parts = find_all_inheritors(RelationGetRelid(default_rel), |
1268 | AccessExclusiveLock, NULL); |
1269 | else |
1270 | all_parts = list_make1_oid(RelationGetRelid(default_rel)); |
1271 | |
1272 | foreach(lc, all_parts) |
1273 | { |
1274 | Oid part_relid = lfirst_oid(lc); |
1275 | Relation part_rel; |
1276 | Expr *partition_constraint; |
1277 | EState *estate; |
1278 | ExprState *partqualstate = NULL; |
1279 | Snapshot snapshot; |
1280 | ExprContext *econtext; |
1281 | TableScanDesc scan; |
1282 | MemoryContext oldCxt; |
1283 | TupleTableSlot *tupslot; |
1284 | |
1285 | /* Lock already taken above. */ |
1286 | if (part_relid != RelationGetRelid(default_rel)) |
1287 | { |
1288 | part_rel = table_open(part_relid, NoLock); |
1289 | |
1290 | /* |
1291 | * Map the Vars in the constraint expression from default_rel's |
1292 | * the sub-partition's. |
1293 | */ |
1294 | partition_constraint = make_ands_explicit(def_part_constraints); |
1295 | partition_constraint = (Expr *) |
1296 | map_partition_varattnos((List *) partition_constraint, 1, |
1297 | part_rel, default_rel, NULL); |
1298 | |
1299 | /* |
1300 | * If the partition constraints on default partition child imply |
1301 | * that it will not contain any row that would belong to the new |
1302 | * partition, we can avoid scanning the child table. |
1303 | */ |
1304 | if (PartConstraintImpliedByRelConstraint(part_rel, |
1305 | def_part_constraints)) |
1306 | { |
1307 | ereport(DEBUG1, |
1308 | (errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints" , |
1309 | RelationGetRelationName(part_rel)))); |
1310 | |
1311 | table_close(part_rel, NoLock); |
1312 | continue; |
1313 | } |
1314 | } |
1315 | else |
1316 | { |
1317 | part_rel = default_rel; |
1318 | partition_constraint = make_ands_explicit(def_part_constraints); |
1319 | } |
1320 | |
1321 | /* |
1322 | * Only RELKIND_RELATION relations (i.e. leaf partitions) need to be |
1323 | * scanned. |
1324 | */ |
1325 | if (part_rel->rd_rel->relkind != RELKIND_RELATION) |
1326 | { |
1327 | if (part_rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) |
1328 | ereport(WARNING, |
1329 | (errcode(ERRCODE_CHECK_VIOLATION), |
1330 | errmsg("skipped scanning foreign table \"%s\" which is a partition of default partition \"%s\"" , |
1331 | RelationGetRelationName(part_rel), |
1332 | RelationGetRelationName(default_rel)))); |
1333 | |
1334 | if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel)) |
1335 | table_close(part_rel, NoLock); |
1336 | |
1337 | continue; |
1338 | } |
1339 | |
1340 | estate = CreateExecutorState(); |
1341 | |
1342 | /* Build expression execution states for partition check quals */ |
1343 | partqualstate = ExecPrepareExpr(partition_constraint, estate); |
1344 | |
1345 | econtext = GetPerTupleExprContext(estate); |
1346 | snapshot = RegisterSnapshot(GetLatestSnapshot()); |
1347 | tupslot = table_slot_create(part_rel, &estate->es_tupleTable); |
1348 | scan = table_beginscan(part_rel, snapshot, 0, NULL); |
1349 | |
1350 | /* |
1351 | * Switch to per-tuple memory context and reset it for each tuple |
1352 | * produced, so we don't leak memory. |
1353 | */ |
1354 | oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); |
1355 | |
1356 | while (table_scan_getnextslot(scan, ForwardScanDirection, tupslot)) |
1357 | { |
1358 | econtext->ecxt_scantuple = tupslot; |
1359 | |
1360 | if (!ExecCheck(partqualstate, econtext)) |
1361 | ereport(ERROR, |
1362 | (errcode(ERRCODE_CHECK_VIOLATION), |
1363 | errmsg("updated partition constraint for default partition \"%s\" would be violated by some row" , |
1364 | RelationGetRelationName(default_rel)))); |
1365 | |
1366 | ResetExprContext(econtext); |
1367 | CHECK_FOR_INTERRUPTS(); |
1368 | } |
1369 | |
1370 | MemoryContextSwitchTo(oldCxt); |
1371 | table_endscan(scan); |
1372 | UnregisterSnapshot(snapshot); |
1373 | ExecDropSingleTupleTableSlot(tupslot); |
1374 | FreeExecutorState(estate); |
1375 | |
1376 | if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel)) |
1377 | table_close(part_rel, NoLock); /* keep the lock until commit */ |
1378 | } |
1379 | } |
1380 | |
1381 | /* |
1382 | * get_hash_partition_greatest_modulus |
1383 | * |
1384 | * Returns the greatest modulus of the hash partition bound. The greatest |
1385 | * modulus will be at the end of the datums array because hash partitions are |
1386 | * arranged in the ascending order of their moduli and remainders. |
1387 | */ |
1388 | int |
1389 | get_hash_partition_greatest_modulus(PartitionBoundInfo bound) |
1390 | { |
1391 | Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH); |
1392 | Assert(bound->datums && bound->ndatums > 0); |
1393 | Assert(DatumGetInt32(bound->datums[bound->ndatums - 1][0]) > 0); |
1394 | |
1395 | return DatumGetInt32(bound->datums[bound->ndatums - 1][0]); |
1396 | } |
1397 | |
1398 | /* |
1399 | * make_one_partition_rbound |
1400 | * |
1401 | * Return a PartitionRangeBound given a list of PartitionRangeDatum elements |
1402 | * and a flag telling whether the bound is lower or not. Made into a function |
1403 | * because there are multiple sites that want to use this facility. |
1404 | */ |
1405 | static PartitionRangeBound * |
1406 | make_one_partition_rbound(PartitionKey key, int index, List *datums, bool lower) |
1407 | { |
1408 | PartitionRangeBound *bound; |
1409 | ListCell *lc; |
1410 | int i; |
1411 | |
1412 | Assert(datums != NIL); |
1413 | |
1414 | bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound)); |
1415 | bound->index = index; |
1416 | bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum)); |
1417 | bound->kind = (PartitionRangeDatumKind *) palloc0(key->partnatts * |
1418 | sizeof(PartitionRangeDatumKind)); |
1419 | bound->lower = lower; |
1420 | |
1421 | i = 0; |
1422 | foreach(lc, datums) |
1423 | { |
1424 | PartitionRangeDatum *datum = castNode(PartitionRangeDatum, lfirst(lc)); |
1425 | |
1426 | /* What's contained in this range datum? */ |
1427 | bound->kind[i] = datum->kind; |
1428 | |
1429 | if (datum->kind == PARTITION_RANGE_DATUM_VALUE) |
1430 | { |
1431 | Const *val = castNode(Const, datum->value); |
1432 | |
1433 | if (val->constisnull) |
1434 | elog(ERROR, "invalid range bound datum" ); |
1435 | bound->datums[i] = val->constvalue; |
1436 | } |
1437 | |
1438 | i++; |
1439 | } |
1440 | |
1441 | return bound; |
1442 | } |
1443 | |
1444 | /* |
1445 | * partition_rbound_cmp |
1446 | * |
1447 | * Return for two range bounds whether the 1st one (specified in datums1, |
1448 | * kind1, and lower1) is <, =, or > the bound specified in *b2. |
1449 | * |
1450 | * partnatts, partsupfunc and partcollation give the number of attributes in the |
1451 | * bounds to be compared, comparison function to be used and the collations of |
1452 | * attributes, respectively. |
1453 | * |
1454 | * Note that if the values of the two range bounds compare equal, then we take |
1455 | * into account whether they are upper or lower bounds, and an upper bound is |
1456 | * considered to be smaller than a lower bound. This is important to the way |
1457 | * that RelationBuildPartitionDesc() builds the PartitionBoundInfoData |
1458 | * structure, which only stores the upper bound of a common boundary between |
1459 | * two contiguous partitions. |
1460 | */ |
1461 | static int32 |
1462 | partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc, |
1463 | Oid *partcollation, |
1464 | Datum *datums1, PartitionRangeDatumKind *kind1, |
1465 | bool lower1, PartitionRangeBound *b2) |
1466 | { |
1467 | int32 cmpval = 0; /* placate compiler */ |
1468 | int i; |
1469 | Datum *datums2 = b2->datums; |
1470 | PartitionRangeDatumKind *kind2 = b2->kind; |
1471 | bool lower2 = b2->lower; |
1472 | |
1473 | for (i = 0; i < partnatts; i++) |
1474 | { |
1475 | /* |
1476 | * First, handle cases where the column is unbounded, which should not |
1477 | * invoke the comparison procedure, and should not consider any later |
1478 | * columns. Note that the PartitionRangeDatumKind enum elements |
1479 | * compare the same way as the values they represent. |
1480 | */ |
1481 | if (kind1[i] < kind2[i]) |
1482 | return -1; |
1483 | else if (kind1[i] > kind2[i]) |
1484 | return 1; |
1485 | else if (kind1[i] != PARTITION_RANGE_DATUM_VALUE) |
1486 | |
1487 | /* |
1488 | * The column bounds are both MINVALUE or both MAXVALUE. No later |
1489 | * columns should be considered, but we still need to compare |
1490 | * whether they are upper or lower bounds. |
1491 | */ |
1492 | break; |
1493 | |
1494 | cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i], |
1495 | partcollation[i], |
1496 | datums1[i], |
1497 | datums2[i])); |
1498 | if (cmpval != 0) |
1499 | break; |
1500 | } |
1501 | |
1502 | /* |
1503 | * If the comparison is anything other than equal, we're done. If they |
1504 | * compare equal though, we still have to consider whether the boundaries |
1505 | * are inclusive or exclusive. Exclusive one is considered smaller of the |
1506 | * two. |
1507 | */ |
1508 | if (cmpval == 0 && lower1 != lower2) |
1509 | cmpval = lower1 ? 1 : -1; |
1510 | |
1511 | return cmpval; |
1512 | } |
1513 | |
1514 | /* |
1515 | * partition_rbound_datum_cmp |
1516 | * |
1517 | * Return whether range bound (specified in rb_datums, rb_kind, and rb_lower) |
1518 | * is <, =, or > partition key of tuple (tuple_datums) |
1519 | * |
1520 | * n_tuple_datums, partsupfunc and partcollation give number of attributes in |
1521 | * the bounds to be compared, comparison function to be used and the collations |
1522 | * of attributes resp. |
1523 | * |
1524 | */ |
1525 | int32 |
1526 | partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation, |
1527 | Datum *rb_datums, PartitionRangeDatumKind *rb_kind, |
1528 | Datum *tuple_datums, int n_tuple_datums) |
1529 | { |
1530 | int i; |
1531 | int32 cmpval = -1; |
1532 | |
1533 | for (i = 0; i < n_tuple_datums; i++) |
1534 | { |
1535 | if (rb_kind[i] == PARTITION_RANGE_DATUM_MINVALUE) |
1536 | return -1; |
1537 | else if (rb_kind[i] == PARTITION_RANGE_DATUM_MAXVALUE) |
1538 | return 1; |
1539 | |
1540 | cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i], |
1541 | partcollation[i], |
1542 | rb_datums[i], |
1543 | tuple_datums[i])); |
1544 | if (cmpval != 0) |
1545 | break; |
1546 | } |
1547 | |
1548 | return cmpval; |
1549 | } |
1550 | |
1551 | /* |
1552 | * partition_hbound_cmp |
1553 | * |
1554 | * Compares modulus first, then remainder if modulus is equal. |
1555 | */ |
1556 | static int32 |
1557 | partition_hbound_cmp(int modulus1, int remainder1, int modulus2, int remainder2) |
1558 | { |
1559 | if (modulus1 < modulus2) |
1560 | return -1; |
1561 | if (modulus1 > modulus2) |
1562 | return 1; |
1563 | if (modulus1 == modulus2 && remainder1 != remainder2) |
1564 | return (remainder1 > remainder2) ? 1 : -1; |
1565 | return 0; |
1566 | } |
1567 | |
1568 | /* |
1569 | * partition_list_bsearch |
1570 | * Returns the index of the greatest bound datum that is less than equal |
1571 | * to the given value or -1 if all of the bound datums are greater |
1572 | * |
1573 | * *is_equal is set to true if the bound datum at the returned index is equal |
1574 | * to the input value. |
1575 | */ |
1576 | int |
1577 | partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, |
1578 | PartitionBoundInfo boundinfo, |
1579 | Datum value, bool *is_equal) |
1580 | { |
1581 | int lo, |
1582 | hi, |
1583 | mid; |
1584 | |
1585 | lo = -1; |
1586 | hi = boundinfo->ndatums - 1; |
1587 | while (lo < hi) |
1588 | { |
1589 | int32 cmpval; |
1590 | |
1591 | mid = (lo + hi + 1) / 2; |
1592 | cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0], |
1593 | partcollation[0], |
1594 | boundinfo->datums[mid][0], |
1595 | value)); |
1596 | if (cmpval <= 0) |
1597 | { |
1598 | lo = mid; |
1599 | *is_equal = (cmpval == 0); |
1600 | if (*is_equal) |
1601 | break; |
1602 | } |
1603 | else |
1604 | hi = mid - 1; |
1605 | } |
1606 | |
1607 | return lo; |
1608 | } |
1609 | |
1610 | /* |
1611 | * partition_range_bsearch |
1612 | * Returns the index of the greatest range bound that is less than or |
1613 | * equal to the given range bound or -1 if all of the range bounds are |
1614 | * greater |
1615 | * |
1616 | * *is_equal is set to true if the range bound at the returned index is equal |
1617 | * to the input range bound |
1618 | */ |
1619 | static int |
1620 | partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc, |
1621 | Oid *partcollation, |
1622 | PartitionBoundInfo boundinfo, |
1623 | PartitionRangeBound *probe, bool *is_equal) |
1624 | { |
1625 | int lo, |
1626 | hi, |
1627 | mid; |
1628 | |
1629 | lo = -1; |
1630 | hi = boundinfo->ndatums - 1; |
1631 | while (lo < hi) |
1632 | { |
1633 | int32 cmpval; |
1634 | |
1635 | mid = (lo + hi + 1) / 2; |
1636 | cmpval = partition_rbound_cmp(partnatts, partsupfunc, |
1637 | partcollation, |
1638 | boundinfo->datums[mid], |
1639 | boundinfo->kind[mid], |
1640 | (boundinfo->indexes[mid] == -1), |
1641 | probe); |
1642 | if (cmpval <= 0) |
1643 | { |
1644 | lo = mid; |
1645 | *is_equal = (cmpval == 0); |
1646 | |
1647 | if (*is_equal) |
1648 | break; |
1649 | } |
1650 | else |
1651 | hi = mid - 1; |
1652 | } |
1653 | |
1654 | return lo; |
1655 | } |
1656 | |
1657 | /* |
1658 | * partition_range_bsearch |
1659 | * Returns the index of the greatest range bound that is less than or |
1660 | * equal to the given tuple or -1 if all of the range bounds are greater |
1661 | * |
1662 | * *is_equal is set to true if the range bound at the returned index is equal |
1663 | * to the input tuple. |
1664 | */ |
1665 | int |
1666 | partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, |
1667 | PartitionBoundInfo boundinfo, |
1668 | int nvalues, Datum *values, bool *is_equal) |
1669 | { |
1670 | int lo, |
1671 | hi, |
1672 | mid; |
1673 | |
1674 | lo = -1; |
1675 | hi = boundinfo->ndatums - 1; |
1676 | while (lo < hi) |
1677 | { |
1678 | int32 cmpval; |
1679 | |
1680 | mid = (lo + hi + 1) / 2; |
1681 | cmpval = partition_rbound_datum_cmp(partsupfunc, |
1682 | partcollation, |
1683 | boundinfo->datums[mid], |
1684 | boundinfo->kind[mid], |
1685 | values, |
1686 | nvalues); |
1687 | if (cmpval <= 0) |
1688 | { |
1689 | lo = mid; |
1690 | *is_equal = (cmpval == 0); |
1691 | |
1692 | if (*is_equal) |
1693 | break; |
1694 | } |
1695 | else |
1696 | hi = mid - 1; |
1697 | } |
1698 | |
1699 | return lo; |
1700 | } |
1701 | |
1702 | /* |
1703 | * partition_hash_bsearch |
1704 | * Returns the index of the greatest (modulus, remainder) pair that is |
1705 | * less than or equal to the given (modulus, remainder) pair or -1 if |
1706 | * all of them are greater |
1707 | */ |
1708 | int |
1709 | partition_hash_bsearch(PartitionBoundInfo boundinfo, |
1710 | int modulus, int remainder) |
1711 | { |
1712 | int lo, |
1713 | hi, |
1714 | mid; |
1715 | |
1716 | lo = -1; |
1717 | hi = boundinfo->ndatums - 1; |
1718 | while (lo < hi) |
1719 | { |
1720 | int32 cmpval, |
1721 | bound_modulus, |
1722 | bound_remainder; |
1723 | |
1724 | mid = (lo + hi + 1) / 2; |
1725 | bound_modulus = DatumGetInt32(boundinfo->datums[mid][0]); |
1726 | bound_remainder = DatumGetInt32(boundinfo->datums[mid][1]); |
1727 | cmpval = partition_hbound_cmp(bound_modulus, bound_remainder, |
1728 | modulus, remainder); |
1729 | if (cmpval <= 0) |
1730 | { |
1731 | lo = mid; |
1732 | |
1733 | if (cmpval == 0) |
1734 | break; |
1735 | } |
1736 | else |
1737 | hi = mid - 1; |
1738 | } |
1739 | |
1740 | return lo; |
1741 | } |
1742 | |
1743 | /* |
1744 | * qsort_partition_hbound_cmp |
1745 | * |
1746 | * Hash bounds are sorted by modulus, then by remainder. |
1747 | */ |
1748 | static int32 |
1749 | qsort_partition_hbound_cmp(const void *a, const void *b) |
1750 | { |
1751 | PartitionHashBound *h1 = (*(PartitionHashBound *const *) a); |
1752 | PartitionHashBound *h2 = (*(PartitionHashBound *const *) b); |
1753 | |
1754 | return partition_hbound_cmp(h1->modulus, h1->remainder, |
1755 | h2->modulus, h2->remainder); |
1756 | } |
1757 | |
1758 | /* |
1759 | * qsort_partition_list_value_cmp |
1760 | * |
1761 | * Compare two list partition bound datums. |
1762 | */ |
1763 | static int32 |
1764 | qsort_partition_list_value_cmp(const void *a, const void *b, void *arg) |
1765 | { |
1766 | Datum val1 = (*(PartitionListValue *const *) a)->value, |
1767 | val2 = (*(PartitionListValue *const *) b)->value; |
1768 | PartitionKey key = (PartitionKey) arg; |
1769 | |
1770 | return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0], |
1771 | key->partcollation[0], |
1772 | val1, val2)); |
1773 | } |
1774 | |
1775 | /* |
1776 | * qsort_partition_rbound_cmp |
1777 | * |
1778 | * Used when sorting range bounds across all range partitions. |
1779 | */ |
1780 | static int32 |
1781 | qsort_partition_rbound_cmp(const void *a, const void *b, void *arg) |
1782 | { |
1783 | PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a); |
1784 | PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b); |
1785 | PartitionKey key = (PartitionKey) arg; |
1786 | |
1787 | return partition_rbound_cmp(key->partnatts, key->partsupfunc, |
1788 | key->partcollation, b1->datums, b1->kind, |
1789 | b1->lower, b2); |
1790 | } |
1791 | |
1792 | /* |
1793 | * get_partition_bound_num_indexes |
1794 | * |
1795 | * Returns the number of the entries in the partition bound indexes array. |
1796 | */ |
1797 | static int |
1798 | get_partition_bound_num_indexes(PartitionBoundInfo bound) |
1799 | { |
1800 | int num_indexes; |
1801 | |
1802 | Assert(bound); |
1803 | |
1804 | switch (bound->strategy) |
1805 | { |
1806 | case PARTITION_STRATEGY_HASH: |
1807 | |
1808 | /* |
1809 | * The number of the entries in the indexes array is same as the |
1810 | * greatest modulus. |
1811 | */ |
1812 | num_indexes = get_hash_partition_greatest_modulus(bound); |
1813 | break; |
1814 | |
1815 | case PARTITION_STRATEGY_LIST: |
1816 | num_indexes = bound->ndatums; |
1817 | break; |
1818 | |
1819 | case PARTITION_STRATEGY_RANGE: |
1820 | /* Range partitioned table has an extra index. */ |
1821 | num_indexes = bound->ndatums + 1; |
1822 | break; |
1823 | |
1824 | default: |
1825 | elog(ERROR, "unexpected partition strategy: %d" , |
1826 | (int) bound->strategy); |
1827 | } |
1828 | |
1829 | return num_indexes; |
1830 | } |
1831 | |
1832 | /* |
1833 | * get_partition_operator |
1834 | * |
1835 | * Return oid of the operator of the given strategy for the given partition |
1836 | * key column. It is assumed that the partitioning key is of the same type as |
1837 | * the chosen partitioning opclass, or at least binary-compatible. In the |
1838 | * latter case, *need_relabel is set to true if the opclass is not of a |
1839 | * polymorphic type (indicating a RelabelType node needed on top), otherwise |
1840 | * false. |
1841 | */ |
1842 | static Oid |
1843 | get_partition_operator(PartitionKey key, int col, StrategyNumber strategy, |
1844 | bool *need_relabel) |
1845 | { |
1846 | Oid operoid; |
1847 | |
1848 | /* |
1849 | * Get the operator in the partitioning opfamily using the opclass' |
1850 | * declared input type as both left- and righttype. |
1851 | */ |
1852 | operoid = get_opfamily_member(key->partopfamily[col], |
1853 | key->partopcintype[col], |
1854 | key->partopcintype[col], |
1855 | strategy); |
1856 | if (!OidIsValid(operoid)) |
1857 | elog(ERROR, "missing operator %d(%u,%u) in partition opfamily %u" , |
1858 | strategy, key->partopcintype[col], key->partopcintype[col], |
1859 | key->partopfamily[col]); |
1860 | |
1861 | /* |
1862 | * If the partition key column is not of the same type as the operator |
1863 | * class and not polymorphic, tell caller to wrap the non-Const expression |
1864 | * in a RelabelType. This matches what parse_coerce.c does. |
1865 | */ |
1866 | *need_relabel = (key->parttypid[col] != key->partopcintype[col] && |
1867 | key->partopcintype[col] != RECORDOID && |
1868 | !IsPolymorphicType(key->partopcintype[col])); |
1869 | |
1870 | return operoid; |
1871 | } |
1872 | |
1873 | /* |
1874 | * make_partition_op_expr |
1875 | * Returns an Expr for the given partition key column with arg1 and |
1876 | * arg2 as its leftop and rightop, respectively |
1877 | */ |
1878 | static Expr * |
1879 | make_partition_op_expr(PartitionKey key, int keynum, |
1880 | uint16 strategy, Expr *arg1, Expr *arg2) |
1881 | { |
1882 | Oid operoid; |
1883 | bool need_relabel = false; |
1884 | Expr *result = NULL; |
1885 | |
1886 | /* Get the correct btree operator for this partitioning column */ |
1887 | operoid = get_partition_operator(key, keynum, strategy, &need_relabel); |
1888 | |
1889 | /* |
1890 | * Chosen operator may be such that the non-Const operand needs to be |
1891 | * coerced, so apply the same; see the comment in |
1892 | * get_partition_operator(). |
1893 | */ |
1894 | if (!IsA(arg1, Const) && |
1895 | (need_relabel || |
1896 | key->partcollation[keynum] != key->parttypcoll[keynum])) |
1897 | arg1 = (Expr *) makeRelabelType(arg1, |
1898 | key->partopcintype[keynum], |
1899 | -1, |
1900 | key->partcollation[keynum], |
1901 | COERCE_EXPLICIT_CAST); |
1902 | |
1903 | /* Generate the actual expression */ |
1904 | switch (key->strategy) |
1905 | { |
1906 | case PARTITION_STRATEGY_LIST: |
1907 | { |
1908 | List *elems = (List *) arg2; |
1909 | int nelems = list_length(elems); |
1910 | |
1911 | Assert(nelems >= 1); |
1912 | Assert(keynum == 0); |
1913 | |
1914 | if (nelems > 1 && |
1915 | !type_is_array(key->parttypid[keynum])) |
1916 | { |
1917 | ArrayExpr *arrexpr; |
1918 | ScalarArrayOpExpr *saopexpr; |
1919 | |
1920 | /* Construct an ArrayExpr for the right-hand inputs */ |
1921 | arrexpr = makeNode(ArrayExpr); |
1922 | arrexpr->array_typeid = |
1923 | get_array_type(key->parttypid[keynum]); |
1924 | arrexpr->array_collid = key->parttypcoll[keynum]; |
1925 | arrexpr->element_typeid = key->parttypid[keynum]; |
1926 | arrexpr->elements = elems; |
1927 | arrexpr->multidims = false; |
1928 | arrexpr->location = -1; |
1929 | |
1930 | /* Build leftop = ANY (rightop) */ |
1931 | saopexpr = makeNode(ScalarArrayOpExpr); |
1932 | saopexpr->opno = operoid; |
1933 | saopexpr->opfuncid = get_opcode(operoid); |
1934 | saopexpr->useOr = true; |
1935 | saopexpr->inputcollid = key->partcollation[keynum]; |
1936 | saopexpr->args = list_make2(arg1, arrexpr); |
1937 | saopexpr->location = -1; |
1938 | |
1939 | result = (Expr *) saopexpr; |
1940 | } |
1941 | else |
1942 | { |
1943 | List *elemops = NIL; |
1944 | ListCell *lc; |
1945 | |
1946 | foreach(lc, elems) |
1947 | { |
1948 | Expr *elem = lfirst(lc), |
1949 | *elemop; |
1950 | |
1951 | elemop = make_opclause(operoid, |
1952 | BOOLOID, |
1953 | false, |
1954 | arg1, elem, |
1955 | InvalidOid, |
1956 | key->partcollation[keynum]); |
1957 | elemops = lappend(elemops, elemop); |
1958 | } |
1959 | |
1960 | result = nelems > 1 ? makeBoolExpr(OR_EXPR, elemops, -1) : linitial(elemops); |
1961 | } |
1962 | break; |
1963 | } |
1964 | |
1965 | case PARTITION_STRATEGY_RANGE: |
1966 | result = make_opclause(operoid, |
1967 | BOOLOID, |
1968 | false, |
1969 | arg1, arg2, |
1970 | InvalidOid, |
1971 | key->partcollation[keynum]); |
1972 | break; |
1973 | |
1974 | default: |
1975 | elog(ERROR, "invalid partitioning strategy" ); |
1976 | break; |
1977 | } |
1978 | |
1979 | return result; |
1980 | } |
1981 | |
1982 | /* |
1983 | * get_qual_for_hash |
1984 | * |
1985 | * Returns a CHECK constraint expression to use as a hash partition's |
1986 | * constraint, given the parent relation and partition bound structure. |
1987 | * |
1988 | * The partition constraint for a hash partition is always a call to the |
1989 | * built-in function satisfies_hash_partition(). |
1990 | */ |
1991 | static List * |
1992 | get_qual_for_hash(Relation parent, PartitionBoundSpec *spec) |
1993 | { |
1994 | PartitionKey key = RelationGetPartitionKey(parent); |
1995 | FuncExpr *fexpr; |
1996 | Node *relidConst; |
1997 | Node *modulusConst; |
1998 | Node *remainderConst; |
1999 | List *args; |
2000 | ListCell *partexprs_item; |
2001 | int i; |
2002 | |
2003 | /* Fixed arguments. */ |
2004 | relidConst = (Node *) makeConst(OIDOID, |
2005 | -1, |
2006 | InvalidOid, |
2007 | sizeof(Oid), |
2008 | ObjectIdGetDatum(RelationGetRelid(parent)), |
2009 | false, |
2010 | true); |
2011 | |
2012 | modulusConst = (Node *) makeConst(INT4OID, |
2013 | -1, |
2014 | InvalidOid, |
2015 | sizeof(int32), |
2016 | Int32GetDatum(spec->modulus), |
2017 | false, |
2018 | true); |
2019 | |
2020 | remainderConst = (Node *) makeConst(INT4OID, |
2021 | -1, |
2022 | InvalidOid, |
2023 | sizeof(int32), |
2024 | Int32GetDatum(spec->remainder), |
2025 | false, |
2026 | true); |
2027 | |
2028 | args = list_make3(relidConst, modulusConst, remainderConst); |
2029 | partexprs_item = list_head(key->partexprs); |
2030 | |
2031 | /* Add an argument for each key column. */ |
2032 | for (i = 0; i < key->partnatts; i++) |
2033 | { |
2034 | Node *keyCol; |
2035 | |
2036 | /* Left operand */ |
2037 | if (key->partattrs[i] != 0) |
2038 | { |
2039 | keyCol = (Node *) makeVar(1, |
2040 | key->partattrs[i], |
2041 | key->parttypid[i], |
2042 | key->parttypmod[i], |
2043 | key->parttypcoll[i], |
2044 | 0); |
2045 | } |
2046 | else |
2047 | { |
2048 | keyCol = (Node *) copyObject(lfirst(partexprs_item)); |
2049 | partexprs_item = lnext(partexprs_item); |
2050 | } |
2051 | |
2052 | args = lappend(args, keyCol); |
2053 | } |
2054 | |
2055 | fexpr = makeFuncExpr(F_SATISFIES_HASH_PARTITION, |
2056 | BOOLOID, |
2057 | args, |
2058 | InvalidOid, |
2059 | InvalidOid, |
2060 | COERCE_EXPLICIT_CALL); |
2061 | |
2062 | return list_make1(fexpr); |
2063 | } |
2064 | |
2065 | /* |
2066 | * get_qual_for_list |
2067 | * |
2068 | * Returns an implicit-AND list of expressions to use as a list partition's |
2069 | * constraint, given the parent relation and partition bound structure. |
2070 | * |
2071 | * The function returns NIL for a default partition when it's the only |
2072 | * partition since in that case there is no constraint. |
2073 | */ |
2074 | static List * |
2075 | get_qual_for_list(Relation parent, PartitionBoundSpec *spec) |
2076 | { |
2077 | PartitionKey key = RelationGetPartitionKey(parent); |
2078 | List *result; |
2079 | Expr *keyCol; |
2080 | Expr *opexpr; |
2081 | NullTest *nulltest; |
2082 | ListCell *cell; |
2083 | List *elems = NIL; |
2084 | bool list_has_null = false; |
2085 | |
2086 | /* |
2087 | * Only single-column list partitioning is supported, so we are worried |
2088 | * only about the partition key with index 0. |
2089 | */ |
2090 | Assert(key->partnatts == 1); |
2091 | |
2092 | /* Construct Var or expression representing the partition column */ |
2093 | if (key->partattrs[0] != 0) |
2094 | keyCol = (Expr *) makeVar(1, |
2095 | key->partattrs[0], |
2096 | key->parttypid[0], |
2097 | key->parttypmod[0], |
2098 | key->parttypcoll[0], |
2099 | 0); |
2100 | else |
2101 | keyCol = (Expr *) copyObject(linitial(key->partexprs)); |
2102 | |
2103 | /* |
2104 | * For default list partition, collect datums for all the partitions. The |
2105 | * default partition constraint should check that the partition key is |
2106 | * equal to none of those. |
2107 | */ |
2108 | if (spec->is_default) |
2109 | { |
2110 | int i; |
2111 | int ndatums = 0; |
2112 | PartitionDesc pdesc = RelationGetPartitionDesc(parent); |
2113 | PartitionBoundInfo boundinfo = pdesc->boundinfo; |
2114 | |
2115 | if (boundinfo) |
2116 | { |
2117 | ndatums = boundinfo->ndatums; |
2118 | |
2119 | if (partition_bound_accepts_nulls(boundinfo)) |
2120 | list_has_null = true; |
2121 | } |
2122 | |
2123 | /* |
2124 | * If default is the only partition, there need not be any partition |
2125 | * constraint on it. |
2126 | */ |
2127 | if (ndatums == 0 && !list_has_null) |
2128 | return NIL; |
2129 | |
2130 | for (i = 0; i < ndatums; i++) |
2131 | { |
2132 | Const *val; |
2133 | |
2134 | /* |
2135 | * Construct Const from known-not-null datum. We must be careful |
2136 | * to copy the value, because our result has to be able to outlive |
2137 | * the relcache entry we're copying from. |
2138 | */ |
2139 | val = makeConst(key->parttypid[0], |
2140 | key->parttypmod[0], |
2141 | key->parttypcoll[0], |
2142 | key->parttyplen[0], |
2143 | datumCopy(*boundinfo->datums[i], |
2144 | key->parttypbyval[0], |
2145 | key->parttyplen[0]), |
2146 | false, /* isnull */ |
2147 | key->parttypbyval[0]); |
2148 | |
2149 | elems = lappend(elems, val); |
2150 | } |
2151 | } |
2152 | else |
2153 | { |
2154 | /* |
2155 | * Create list of Consts for the allowed values, excluding any nulls. |
2156 | */ |
2157 | foreach(cell, spec->listdatums) |
2158 | { |
2159 | Const *val = castNode(Const, lfirst(cell)); |
2160 | |
2161 | if (val->constisnull) |
2162 | list_has_null = true; |
2163 | else |
2164 | elems = lappend(elems, copyObject(val)); |
2165 | } |
2166 | } |
2167 | |
2168 | if (elems) |
2169 | { |
2170 | /* |
2171 | * Generate the operator expression from the non-null partition |
2172 | * values. |
2173 | */ |
2174 | opexpr = make_partition_op_expr(key, 0, BTEqualStrategyNumber, |
2175 | keyCol, (Expr *) elems); |
2176 | } |
2177 | else |
2178 | { |
2179 | /* |
2180 | * If there are no partition values, we don't need an operator |
2181 | * expression. |
2182 | */ |
2183 | opexpr = NULL; |
2184 | } |
2185 | |
2186 | if (!list_has_null) |
2187 | { |
2188 | /* |
2189 | * Gin up a "col IS NOT NULL" test that will be AND'd with the main |
2190 | * expression. This might seem redundant, but the partition routing |
2191 | * machinery needs it. |
2192 | */ |
2193 | nulltest = makeNode(NullTest); |
2194 | nulltest->arg = keyCol; |
2195 | nulltest->nulltesttype = IS_NOT_NULL; |
2196 | nulltest->argisrow = false; |
2197 | nulltest->location = -1; |
2198 | |
2199 | result = opexpr ? list_make2(nulltest, opexpr) : list_make1(nulltest); |
2200 | } |
2201 | else |
2202 | { |
2203 | /* |
2204 | * Gin up a "col IS NULL" test that will be OR'd with the main |
2205 | * expression. |
2206 | */ |
2207 | nulltest = makeNode(NullTest); |
2208 | nulltest->arg = keyCol; |
2209 | nulltest->nulltesttype = IS_NULL; |
2210 | nulltest->argisrow = false; |
2211 | nulltest->location = -1; |
2212 | |
2213 | if (opexpr) |
2214 | { |
2215 | Expr *or; |
2216 | |
2217 | or = makeBoolExpr(OR_EXPR, list_make2(nulltest, opexpr), -1); |
2218 | result = list_make1(or); |
2219 | } |
2220 | else |
2221 | result = list_make1(nulltest); |
2222 | } |
2223 | |
2224 | /* |
2225 | * Note that, in general, applying NOT to a constraint expression doesn't |
2226 | * necessarily invert the set of rows it accepts, because NOT (NULL) is |
2227 | * NULL. However, the partition constraints we construct here never |
2228 | * evaluate to NULL, so applying NOT works as intended. |
2229 | */ |
2230 | if (spec->is_default) |
2231 | { |
2232 | result = list_make1(make_ands_explicit(result)); |
2233 | result = list_make1(makeBoolExpr(NOT_EXPR, result, -1)); |
2234 | } |
2235 | |
2236 | return result; |
2237 | } |
2238 | |
2239 | /* |
2240 | * get_qual_for_range |
2241 | * |
2242 | * Returns an implicit-AND list of expressions to use as a range partition's |
2243 | * constraint, given the parent relation and partition bound structure. |
2244 | * |
2245 | * For a multi-column range partition key, say (a, b, c), with (al, bl, cl) |
2246 | * as the lower bound tuple and (au, bu, cu) as the upper bound tuple, we |
2247 | * generate an expression tree of the following form: |
2248 | * |
2249 | * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL) |
2250 | * AND |
2251 | * (a > al OR (a = al AND b > bl) OR (a = al AND b = bl AND c >= cl)) |
2252 | * AND |
2253 | * (a < au OR (a = au AND b < bu) OR (a = au AND b = bu AND c < cu)) |
2254 | * |
2255 | * It is often the case that a prefix of lower and upper bound tuples contains |
2256 | * the same values, for example, (al = au), in which case, we will emit an |
2257 | * expression tree of the following form: |
2258 | * |
2259 | * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL) |
2260 | * AND |
2261 | * (a = al) |
2262 | * AND |
2263 | * (b > bl OR (b = bl AND c >= cl)) |
2264 | * AND |
2265 | * (b < bu) OR (b = bu AND c < cu)) |
2266 | * |
2267 | * If a bound datum is either MINVALUE or MAXVALUE, these expressions are |
2268 | * simplified using the fact that any value is greater than MINVALUE and less |
2269 | * than MAXVALUE. So, for example, if cu = MAXVALUE, c < cu is automatically |
2270 | * true, and we need not emit any expression for it, and the last line becomes |
2271 | * |
2272 | * (b < bu) OR (b = bu), which is simplified to (b <= bu) |
2273 | * |
2274 | * In most common cases with only one partition column, say a, the following |
2275 | * expression tree will be generated: a IS NOT NULL AND a >= al AND a < au |
2276 | * |
2277 | * For default partition, it returns the negation of the constraints of all |
2278 | * the other partitions. |
2279 | * |
2280 | * External callers should pass for_default as false; we set it to true only |
2281 | * when recursing. |
2282 | */ |
2283 | static List * |
2284 | get_qual_for_range(Relation parent, PartitionBoundSpec *spec, |
2285 | bool for_default) |
2286 | { |
2287 | List *result = NIL; |
2288 | ListCell *cell1, |
2289 | *cell2, |
2290 | *partexprs_item, |
2291 | *partexprs_item_saved; |
2292 | int i, |
2293 | j; |
2294 | PartitionRangeDatum *ldatum, |
2295 | *udatum; |
2296 | PartitionKey key = RelationGetPartitionKey(parent); |
2297 | Expr *keyCol; |
2298 | Const *lower_val, |
2299 | *upper_val; |
2300 | List *lower_or_arms, |
2301 | *upper_or_arms; |
2302 | int num_or_arms, |
2303 | current_or_arm; |
2304 | ListCell *lower_or_start_datum, |
2305 | *upper_or_start_datum; |
2306 | bool need_next_lower_arm, |
2307 | need_next_upper_arm; |
2308 | |
2309 | if (spec->is_default) |
2310 | { |
2311 | List *or_expr_args = NIL; |
2312 | PartitionDesc pdesc = RelationGetPartitionDesc(parent); |
2313 | Oid *inhoids = pdesc->oids; |
2314 | int nparts = pdesc->nparts, |
2315 | i; |
2316 | |
2317 | for (i = 0; i < nparts; i++) |
2318 | { |
2319 | Oid inhrelid = inhoids[i]; |
2320 | HeapTuple tuple; |
2321 | Datum datum; |
2322 | bool isnull; |
2323 | PartitionBoundSpec *bspec; |
2324 | |
2325 | tuple = SearchSysCache1(RELOID, inhrelid); |
2326 | if (!HeapTupleIsValid(tuple)) |
2327 | elog(ERROR, "cache lookup failed for relation %u" , inhrelid); |
2328 | |
2329 | datum = SysCacheGetAttr(RELOID, tuple, |
2330 | Anum_pg_class_relpartbound, |
2331 | &isnull); |
2332 | if (isnull) |
2333 | elog(ERROR, "null relpartbound for relation %u" , inhrelid); |
2334 | |
2335 | bspec = (PartitionBoundSpec *) |
2336 | stringToNode(TextDatumGetCString(datum)); |
2337 | if (!IsA(bspec, PartitionBoundSpec)) |
2338 | elog(ERROR, "expected PartitionBoundSpec" ); |
2339 | |
2340 | if (!bspec->is_default) |
2341 | { |
2342 | List *part_qual; |
2343 | |
2344 | part_qual = get_qual_for_range(parent, bspec, true); |
2345 | |
2346 | /* |
2347 | * AND the constraints of the partition and add to |
2348 | * or_expr_args |
2349 | */ |
2350 | or_expr_args = lappend(or_expr_args, list_length(part_qual) > 1 |
2351 | ? makeBoolExpr(AND_EXPR, part_qual, -1) |
2352 | : linitial(part_qual)); |
2353 | } |
2354 | ReleaseSysCache(tuple); |
2355 | } |
2356 | |
2357 | if (or_expr_args != NIL) |
2358 | { |
2359 | Expr *other_parts_constr; |
2360 | |
2361 | /* |
2362 | * Combine the constraints obtained for non-default partitions |
2363 | * using OR. As requested, each of the OR's args doesn't include |
2364 | * the NOT NULL test for partition keys (which is to avoid its |
2365 | * useless repetition). Add the same now. |
2366 | */ |
2367 | other_parts_constr = |
2368 | makeBoolExpr(AND_EXPR, |
2369 | lappend(get_range_nulltest(key), |
2370 | list_length(or_expr_args) > 1 |
2371 | ? makeBoolExpr(OR_EXPR, or_expr_args, |
2372 | -1) |
2373 | : linitial(or_expr_args)), |
2374 | -1); |
2375 | |
2376 | /* |
2377 | * Finally, the default partition contains everything *NOT* |
2378 | * contained in the non-default partitions. |
2379 | */ |
2380 | result = list_make1(makeBoolExpr(NOT_EXPR, |
2381 | list_make1(other_parts_constr), -1)); |
2382 | } |
2383 | |
2384 | return result; |
2385 | } |
2386 | |
2387 | lower_or_start_datum = list_head(spec->lowerdatums); |
2388 | upper_or_start_datum = list_head(spec->upperdatums); |
2389 | num_or_arms = key->partnatts; |
2390 | |
2391 | /* |
2392 | * If it is the recursive call for default, we skip the get_range_nulltest |
2393 | * to avoid accumulating the NullTest on the same keys for each partition. |
2394 | */ |
2395 | if (!for_default) |
2396 | result = get_range_nulltest(key); |
2397 | |
2398 | /* |
2399 | * Iterate over the key columns and check if the corresponding lower and |
2400 | * upper datums are equal using the btree equality operator for the |
2401 | * column's type. If equal, we emit single keyCol = common_value |
2402 | * expression. Starting from the first column for which the corresponding |
2403 | * lower and upper bound datums are not equal, we generate OR expressions |
2404 | * as shown in the function's header comment. |
2405 | */ |
2406 | i = 0; |
2407 | partexprs_item = list_head(key->partexprs); |
2408 | partexprs_item_saved = partexprs_item; /* placate compiler */ |
2409 | forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums) |
2410 | { |
2411 | EState *estate; |
2412 | MemoryContext oldcxt; |
2413 | Expr *test_expr; |
2414 | ExprState *test_exprstate; |
2415 | Datum test_result; |
2416 | bool isNull; |
2417 | |
2418 | ldatum = castNode(PartitionRangeDatum, lfirst(cell1)); |
2419 | udatum = castNode(PartitionRangeDatum, lfirst(cell2)); |
2420 | |
2421 | /* |
2422 | * Since get_range_key_properties() modifies partexprs_item, and we |
2423 | * might need to start over from the previous expression in the later |
2424 | * part of this function, save away the current value. |
2425 | */ |
2426 | partexprs_item_saved = partexprs_item; |
2427 | |
2428 | get_range_key_properties(key, i, ldatum, udatum, |
2429 | &partexprs_item, |
2430 | &keyCol, |
2431 | &lower_val, &upper_val); |
2432 | |
2433 | /* |
2434 | * If either value is NULL, the corresponding partition bound is |
2435 | * either MINVALUE or MAXVALUE, and we treat them as unequal, because |
2436 | * even if they're the same, there is no common value to equate the |
2437 | * key column with. |
2438 | */ |
2439 | if (!lower_val || !upper_val) |
2440 | break; |
2441 | |
2442 | /* Create the test expression */ |
2443 | estate = CreateExecutorState(); |
2444 | oldcxt = MemoryContextSwitchTo(estate->es_query_cxt); |
2445 | test_expr = make_partition_op_expr(key, i, BTEqualStrategyNumber, |
2446 | (Expr *) lower_val, |
2447 | (Expr *) upper_val); |
2448 | fix_opfuncids((Node *) test_expr); |
2449 | test_exprstate = ExecInitExpr(test_expr, NULL); |
2450 | test_result = ExecEvalExprSwitchContext(test_exprstate, |
2451 | GetPerTupleExprContext(estate), |
2452 | &isNull); |
2453 | MemoryContextSwitchTo(oldcxt); |
2454 | FreeExecutorState(estate); |
2455 | |
2456 | /* If not equal, go generate the OR expressions */ |
2457 | if (!DatumGetBool(test_result)) |
2458 | break; |
2459 | |
2460 | /* |
2461 | * The bounds for the last key column can't be equal, because such a |
2462 | * range partition would never be allowed to be defined (it would have |
2463 | * an empty range otherwise). |
2464 | */ |
2465 | if (i == key->partnatts - 1) |
2466 | elog(ERROR, "invalid range bound specification" ); |
2467 | |
2468 | /* Equal, so generate keyCol = lower_val expression */ |
2469 | result = lappend(result, |
2470 | make_partition_op_expr(key, i, BTEqualStrategyNumber, |
2471 | keyCol, (Expr *) lower_val)); |
2472 | |
2473 | i++; |
2474 | } |
2475 | |
2476 | /* First pair of lower_val and upper_val that are not equal. */ |
2477 | lower_or_start_datum = cell1; |
2478 | upper_or_start_datum = cell2; |
2479 | |
2480 | /* OR will have as many arms as there are key columns left. */ |
2481 | num_or_arms = key->partnatts - i; |
2482 | current_or_arm = 0; |
2483 | lower_or_arms = upper_or_arms = NIL; |
2484 | need_next_lower_arm = need_next_upper_arm = true; |
2485 | while (current_or_arm < num_or_arms) |
2486 | { |
2487 | List *lower_or_arm_args = NIL, |
2488 | *upper_or_arm_args = NIL; |
2489 | |
2490 | /* Restart scan of columns from the i'th one */ |
2491 | j = i; |
2492 | partexprs_item = partexprs_item_saved; |
2493 | |
2494 | for_both_cell(cell1, lower_or_start_datum, cell2, upper_or_start_datum) |
2495 | { |
2496 | PartitionRangeDatum *ldatum_next = NULL, |
2497 | *udatum_next = NULL; |
2498 | |
2499 | ldatum = castNode(PartitionRangeDatum, lfirst(cell1)); |
2500 | if (lnext(cell1)) |
2501 | ldatum_next = castNode(PartitionRangeDatum, |
2502 | lfirst(lnext(cell1))); |
2503 | udatum = castNode(PartitionRangeDatum, lfirst(cell2)); |
2504 | if (lnext(cell2)) |
2505 | udatum_next = castNode(PartitionRangeDatum, |
2506 | lfirst(lnext(cell2))); |
2507 | get_range_key_properties(key, j, ldatum, udatum, |
2508 | &partexprs_item, |
2509 | &keyCol, |
2510 | &lower_val, &upper_val); |
2511 | |
2512 | if (need_next_lower_arm && lower_val) |
2513 | { |
2514 | uint16 strategy; |
2515 | |
2516 | /* |
2517 | * For the non-last columns of this arm, use the EQ operator. |
2518 | * For the last column of this arm, use GT, unless this is the |
2519 | * last column of the whole bound check, or the next bound |
2520 | * datum is MINVALUE, in which case use GE. |
2521 | */ |
2522 | if (j - i < current_or_arm) |
2523 | strategy = BTEqualStrategyNumber; |
2524 | else if (j == key->partnatts - 1 || |
2525 | (ldatum_next && |
2526 | ldatum_next->kind == PARTITION_RANGE_DATUM_MINVALUE)) |
2527 | strategy = BTGreaterEqualStrategyNumber; |
2528 | else |
2529 | strategy = BTGreaterStrategyNumber; |
2530 | |
2531 | lower_or_arm_args = lappend(lower_or_arm_args, |
2532 | make_partition_op_expr(key, j, |
2533 | strategy, |
2534 | keyCol, |
2535 | (Expr *) lower_val)); |
2536 | } |
2537 | |
2538 | if (need_next_upper_arm && upper_val) |
2539 | { |
2540 | uint16 strategy; |
2541 | |
2542 | /* |
2543 | * For the non-last columns of this arm, use the EQ operator. |
2544 | * For the last column of this arm, use LT, unless the next |
2545 | * bound datum is MAXVALUE, in which case use LE. |
2546 | */ |
2547 | if (j - i < current_or_arm) |
2548 | strategy = BTEqualStrategyNumber; |
2549 | else if (udatum_next && |
2550 | udatum_next->kind == PARTITION_RANGE_DATUM_MAXVALUE) |
2551 | strategy = BTLessEqualStrategyNumber; |
2552 | else |
2553 | strategy = BTLessStrategyNumber; |
2554 | |
2555 | upper_or_arm_args = lappend(upper_or_arm_args, |
2556 | make_partition_op_expr(key, j, |
2557 | strategy, |
2558 | keyCol, |
2559 | (Expr *) upper_val)); |
2560 | } |
2561 | |
2562 | /* |
2563 | * Did we generate enough of OR's arguments? First arm considers |
2564 | * the first of the remaining columns, second arm considers first |
2565 | * two of the remaining columns, and so on. |
2566 | */ |
2567 | ++j; |
2568 | if (j - i > current_or_arm) |
2569 | { |
2570 | /* |
2571 | * We must not emit any more arms if the new column that will |
2572 | * be considered is unbounded, or this one was. |
2573 | */ |
2574 | if (!lower_val || !ldatum_next || |
2575 | ldatum_next->kind != PARTITION_RANGE_DATUM_VALUE) |
2576 | need_next_lower_arm = false; |
2577 | if (!upper_val || !udatum_next || |
2578 | udatum_next->kind != PARTITION_RANGE_DATUM_VALUE) |
2579 | need_next_upper_arm = false; |
2580 | break; |
2581 | } |
2582 | } |
2583 | |
2584 | if (lower_or_arm_args != NIL) |
2585 | lower_or_arms = lappend(lower_or_arms, |
2586 | list_length(lower_or_arm_args) > 1 |
2587 | ? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1) |
2588 | : linitial(lower_or_arm_args)); |
2589 | |
2590 | if (upper_or_arm_args != NIL) |
2591 | upper_or_arms = lappend(upper_or_arms, |
2592 | list_length(upper_or_arm_args) > 1 |
2593 | ? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1) |
2594 | : linitial(upper_or_arm_args)); |
2595 | |
2596 | /* If no work to do in the next iteration, break away. */ |
2597 | if (!need_next_lower_arm && !need_next_upper_arm) |
2598 | break; |
2599 | |
2600 | ++current_or_arm; |
2601 | } |
2602 | |
2603 | /* |
2604 | * Generate the OR expressions for each of lower and upper bounds (if |
2605 | * required), and append to the list of implicitly ANDed list of |
2606 | * expressions. |
2607 | */ |
2608 | if (lower_or_arms != NIL) |
2609 | result = lappend(result, |
2610 | list_length(lower_or_arms) > 1 |
2611 | ? makeBoolExpr(OR_EXPR, lower_or_arms, -1) |
2612 | : linitial(lower_or_arms)); |
2613 | if (upper_or_arms != NIL) |
2614 | result = lappend(result, |
2615 | list_length(upper_or_arms) > 1 |
2616 | ? makeBoolExpr(OR_EXPR, upper_or_arms, -1) |
2617 | : linitial(upper_or_arms)); |
2618 | |
2619 | /* |
2620 | * As noted above, for non-default, we return list with constant TRUE. If |
2621 | * the result is NIL during the recursive call for default, it implies |
2622 | * this is the only other partition which can hold every value of the key |
2623 | * except NULL. Hence we return the NullTest result skipped earlier. |
2624 | */ |
2625 | if (result == NIL) |
2626 | result = for_default |
2627 | ? get_range_nulltest(key) |
2628 | : list_make1(makeBoolConst(true, false)); |
2629 | |
2630 | return result; |
2631 | } |
2632 | |
2633 | /* |
2634 | * get_range_key_properties |
2635 | * Returns range partition key information for a given column |
2636 | * |
2637 | * This is a subroutine for get_qual_for_range, and its API is pretty |
2638 | * specialized to that caller. |
2639 | * |
2640 | * Constructs an Expr for the key column (returned in *keyCol) and Consts |
2641 | * for the lower and upper range limits (returned in *lower_val and |
2642 | * *upper_val). For MINVALUE/MAXVALUE limits, NULL is returned instead of |
2643 | * a Const. All of these structures are freshly palloc'd. |
2644 | * |
2645 | * *partexprs_item points to the cell containing the next expression in |
2646 | * the key->partexprs list, or NULL. It may be advanced upon return. |
2647 | */ |
2648 | static void |
2649 | get_range_key_properties(PartitionKey key, int keynum, |
2650 | PartitionRangeDatum *ldatum, |
2651 | PartitionRangeDatum *udatum, |
2652 | ListCell **partexprs_item, |
2653 | Expr **keyCol, |
2654 | Const **lower_val, Const **upper_val) |
2655 | { |
2656 | /* Get partition key expression for this column */ |
2657 | if (key->partattrs[keynum] != 0) |
2658 | { |
2659 | *keyCol = (Expr *) makeVar(1, |
2660 | key->partattrs[keynum], |
2661 | key->parttypid[keynum], |
2662 | key->parttypmod[keynum], |
2663 | key->parttypcoll[keynum], |
2664 | 0); |
2665 | } |
2666 | else |
2667 | { |
2668 | if (*partexprs_item == NULL) |
2669 | elog(ERROR, "wrong number of partition key expressions" ); |
2670 | *keyCol = copyObject(lfirst(*partexprs_item)); |
2671 | *partexprs_item = lnext(*partexprs_item); |
2672 | } |
2673 | |
2674 | /* Get appropriate Const nodes for the bounds */ |
2675 | if (ldatum->kind == PARTITION_RANGE_DATUM_VALUE) |
2676 | *lower_val = castNode(Const, copyObject(ldatum->value)); |
2677 | else |
2678 | *lower_val = NULL; |
2679 | |
2680 | if (udatum->kind == PARTITION_RANGE_DATUM_VALUE) |
2681 | *upper_val = castNode(Const, copyObject(udatum->value)); |
2682 | else |
2683 | *upper_val = NULL; |
2684 | } |
2685 | |
2686 | /* |
2687 | * get_range_nulltest |
2688 | * |
2689 | * A non-default range partition table does not currently allow partition |
2690 | * keys to be null, so emit an IS NOT NULL expression for each key column. |
2691 | */ |
2692 | static List * |
2693 | get_range_nulltest(PartitionKey key) |
2694 | { |
2695 | List *result = NIL; |
2696 | NullTest *nulltest; |
2697 | ListCell *partexprs_item; |
2698 | int i; |
2699 | |
2700 | partexprs_item = list_head(key->partexprs); |
2701 | for (i = 0; i < key->partnatts; i++) |
2702 | { |
2703 | Expr *keyCol; |
2704 | |
2705 | if (key->partattrs[i] != 0) |
2706 | { |
2707 | keyCol = (Expr *) makeVar(1, |
2708 | key->partattrs[i], |
2709 | key->parttypid[i], |
2710 | key->parttypmod[i], |
2711 | key->parttypcoll[i], |
2712 | 0); |
2713 | } |
2714 | else |
2715 | { |
2716 | if (partexprs_item == NULL) |
2717 | elog(ERROR, "wrong number of partition key expressions" ); |
2718 | keyCol = copyObject(lfirst(partexprs_item)); |
2719 | partexprs_item = lnext(partexprs_item); |
2720 | } |
2721 | |
2722 | nulltest = makeNode(NullTest); |
2723 | nulltest->arg = keyCol; |
2724 | nulltest->nulltesttype = IS_NOT_NULL; |
2725 | nulltest->argisrow = false; |
2726 | nulltest->location = -1; |
2727 | result = lappend(result, nulltest); |
2728 | } |
2729 | |
2730 | return result; |
2731 | } |
2732 | |
2733 | /* |
2734 | * compute_partition_hash_value |
2735 | * |
2736 | * Compute the hash value for given partition key values. |
2737 | */ |
2738 | uint64 |
2739 | compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation, |
2740 | Datum *values, bool *isnull) |
2741 | { |
2742 | int i; |
2743 | uint64 rowHash = 0; |
2744 | Datum seed = UInt64GetDatum(HASH_PARTITION_SEED); |
2745 | |
2746 | for (i = 0; i < partnatts; i++) |
2747 | { |
2748 | /* Nulls are just ignored */ |
2749 | if (!isnull[i]) |
2750 | { |
2751 | Datum hash; |
2752 | |
2753 | Assert(OidIsValid(partsupfunc[i].fn_oid)); |
2754 | |
2755 | /* |
2756 | * Compute hash for each datum value by calling respective |
2757 | * datatype-specific hash functions of each partition key |
2758 | * attribute. |
2759 | */ |
2760 | hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i], |
2761 | values[i], seed); |
2762 | |
2763 | /* Form a single 64-bit hash value */ |
2764 | rowHash = hash_combine64(rowHash, DatumGetUInt64(hash)); |
2765 | } |
2766 | } |
2767 | |
2768 | return rowHash; |
2769 | } |
2770 | |
2771 | /* |
2772 | * satisfies_hash_partition |
2773 | * |
2774 | * This is an SQL-callable function for use in hash partition constraints. |
2775 | * The first three arguments are the parent table OID, modulus, and remainder. |
2776 | * The remaining arguments are the value of the partitioning columns (or |
2777 | * expressions); these are hashed and the results are combined into a single |
2778 | * hash value by calling hash_combine64. |
2779 | * |
2780 | * Returns true if remainder produced when this computed single hash value is |
2781 | * divided by the given modulus is equal to given remainder, otherwise false. |
2782 | * |
2783 | * See get_qual_for_hash() for usage. |
2784 | */ |
2785 | Datum |
2786 | satisfies_hash_partition(PG_FUNCTION_ARGS) |
2787 | { |
2788 | typedef struct ColumnsHashData |
2789 | { |
2790 | Oid relid; |
2791 | int nkeys; |
2792 | Oid variadic_type; |
2793 | int16 variadic_typlen; |
2794 | bool variadic_typbyval; |
2795 | char variadic_typalign; |
2796 | Oid partcollid[PARTITION_MAX_KEYS]; |
2797 | FmgrInfo partsupfunc[FLEXIBLE_ARRAY_MEMBER]; |
2798 | } ColumnsHashData; |
2799 | Oid parentId; |
2800 | int modulus; |
2801 | int remainder; |
2802 | Datum seed = UInt64GetDatum(HASH_PARTITION_SEED); |
2803 | ColumnsHashData *; |
2804 | uint64 rowHash = 0; |
2805 | |
2806 | /* Return null if the parent OID, modulus, or remainder is NULL. */ |
2807 | if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2)) |
2808 | PG_RETURN_NULL(); |
2809 | parentId = PG_GETARG_OID(0); |
2810 | modulus = PG_GETARG_INT32(1); |
2811 | remainder = PG_GETARG_INT32(2); |
2812 | |
2813 | /* Sanity check modulus and remainder. */ |
2814 | if (modulus <= 0) |
2815 | ereport(ERROR, |
2816 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
2817 | errmsg("modulus for hash partition must be a positive integer" ))); |
2818 | if (remainder < 0) |
2819 | ereport(ERROR, |
2820 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
2821 | errmsg("remainder for hash partition must be a non-negative integer" ))); |
2822 | if (remainder >= modulus) |
2823 | ereport(ERROR, |
2824 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
2825 | errmsg("remainder for hash partition must be less than modulus" ))); |
2826 | |
2827 | /* |
2828 | * Cache hash function information. |
2829 | */ |
2830 | my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra; |
2831 | if (my_extra == NULL || my_extra->relid != parentId) |
2832 | { |
2833 | Relation parent; |
2834 | PartitionKey key; |
2835 | int j; |
2836 | |
2837 | /* Open parent relation and fetch partition keyinfo */ |
2838 | parent = try_relation_open(parentId, AccessShareLock); |
2839 | if (parent == NULL) |
2840 | PG_RETURN_NULL(); |
2841 | key = RelationGetPartitionKey(parent); |
2842 | |
2843 | /* Reject parent table that is not hash-partitioned. */ |
2844 | if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE || |
2845 | key->strategy != PARTITION_STRATEGY_HASH) |
2846 | ereport(ERROR, |
2847 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
2848 | errmsg("\"%s\" is not a hash partitioned table" , |
2849 | get_rel_name(parentId)))); |
2850 | |
2851 | if (!get_fn_expr_variadic(fcinfo->flinfo)) |
2852 | { |
2853 | int nargs = PG_NARGS() - 3; |
2854 | |
2855 | /* complain if wrong number of column values */ |
2856 | if (key->partnatts != nargs) |
2857 | ereport(ERROR, |
2858 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
2859 | errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)" , |
2860 | key->partnatts, nargs))); |
2861 | |
2862 | /* allocate space for our cache */ |
2863 | fcinfo->flinfo->fn_extra = |
2864 | MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt, |
2865 | offsetof(ColumnsHashData, partsupfunc) + |
2866 | sizeof(FmgrInfo) * nargs); |
2867 | my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra; |
2868 | my_extra->relid = parentId; |
2869 | my_extra->nkeys = key->partnatts; |
2870 | memcpy(my_extra->partcollid, key->partcollation, |
2871 | key->partnatts * sizeof(Oid)); |
2872 | |
2873 | /* check argument types and save fmgr_infos */ |
2874 | for (j = 0; j < key->partnatts; ++j) |
2875 | { |
2876 | Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, j + 3); |
2877 | |
2878 | if (argtype != key->parttypid[j] && !IsBinaryCoercible(argtype, key->parttypid[j])) |
2879 | ereport(ERROR, |
2880 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
2881 | errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"" , |
2882 | j + 1, format_type_be(key->parttypid[j]), format_type_be(argtype)))); |
2883 | |
2884 | fmgr_info_copy(&my_extra->partsupfunc[j], |
2885 | &key->partsupfunc[j], |
2886 | fcinfo->flinfo->fn_mcxt); |
2887 | } |
2888 | } |
2889 | else |
2890 | { |
2891 | ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3); |
2892 | |
2893 | /* allocate space for our cache -- just one FmgrInfo in this case */ |
2894 | fcinfo->flinfo->fn_extra = |
2895 | MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt, |
2896 | offsetof(ColumnsHashData, partsupfunc) + |
2897 | sizeof(FmgrInfo)); |
2898 | my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra; |
2899 | my_extra->relid = parentId; |
2900 | my_extra->nkeys = key->partnatts; |
2901 | my_extra->variadic_type = ARR_ELEMTYPE(variadic_array); |
2902 | get_typlenbyvalalign(my_extra->variadic_type, |
2903 | &my_extra->variadic_typlen, |
2904 | &my_extra->variadic_typbyval, |
2905 | &my_extra->variadic_typalign); |
2906 | my_extra->partcollid[0] = key->partcollation[0]; |
2907 | |
2908 | /* check argument types */ |
2909 | for (j = 0; j < key->partnatts; ++j) |
2910 | if (key->parttypid[j] != my_extra->variadic_type) |
2911 | ereport(ERROR, |
2912 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
2913 | errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"" , |
2914 | j + 1, |
2915 | format_type_be(key->parttypid[j]), |
2916 | format_type_be(my_extra->variadic_type)))); |
2917 | |
2918 | fmgr_info_copy(&my_extra->partsupfunc[0], |
2919 | &key->partsupfunc[0], |
2920 | fcinfo->flinfo->fn_mcxt); |
2921 | } |
2922 | |
2923 | /* Hold lock until commit */ |
2924 | relation_close(parent, NoLock); |
2925 | } |
2926 | |
2927 | if (!OidIsValid(my_extra->variadic_type)) |
2928 | { |
2929 | int nkeys = my_extra->nkeys; |
2930 | int i; |
2931 | |
2932 | /* |
2933 | * For a non-variadic call, neither the number of arguments nor their |
2934 | * types can change across calls, so avoid the expense of rechecking |
2935 | * here. |
2936 | */ |
2937 | |
2938 | for (i = 0; i < nkeys; i++) |
2939 | { |
2940 | Datum hash; |
2941 | |
2942 | /* keys start from fourth argument of function. */ |
2943 | int argno = i + 3; |
2944 | |
2945 | if (PG_ARGISNULL(argno)) |
2946 | continue; |
2947 | |
2948 | hash = FunctionCall2Coll(&my_extra->partsupfunc[i], |
2949 | my_extra->partcollid[i], |
2950 | PG_GETARG_DATUM(argno), |
2951 | seed); |
2952 | |
2953 | /* Form a single 64-bit hash value */ |
2954 | rowHash = hash_combine64(rowHash, DatumGetUInt64(hash)); |
2955 | } |
2956 | } |
2957 | else |
2958 | { |
2959 | ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3); |
2960 | int i; |
2961 | int nelems; |
2962 | Datum *datum; |
2963 | bool *isnull; |
2964 | |
2965 | deconstruct_array(variadic_array, |
2966 | my_extra->variadic_type, |
2967 | my_extra->variadic_typlen, |
2968 | my_extra->variadic_typbyval, |
2969 | my_extra->variadic_typalign, |
2970 | &datum, &isnull, &nelems); |
2971 | |
2972 | /* complain if wrong number of column values */ |
2973 | if (nelems != my_extra->nkeys) |
2974 | ereport(ERROR, |
2975 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
2976 | errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)" , |
2977 | my_extra->nkeys, nelems))); |
2978 | |
2979 | for (i = 0; i < nelems; i++) |
2980 | { |
2981 | Datum hash; |
2982 | |
2983 | if (isnull[i]) |
2984 | continue; |
2985 | |
2986 | hash = FunctionCall2Coll(&my_extra->partsupfunc[0], |
2987 | my_extra->partcollid[0], |
2988 | datum[i], |
2989 | seed); |
2990 | |
2991 | /* Form a single 64-bit hash value */ |
2992 | rowHash = hash_combine64(rowHash, DatumGetUInt64(hash)); |
2993 | } |
2994 | } |
2995 | |
2996 | PG_RETURN_BOOL(rowHash % modulus == remainder); |
2997 | } |
2998 | |