| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * partbounds.c |
| 4 | * Support routines for manipulating partition bounds |
| 5 | * |
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 7 | * Portions Copyright (c) 1994, Regents of the University of California |
| 8 | * |
| 9 | * IDENTIFICATION |
| 10 | * src/backend/partitioning/partbounds.c |
| 11 | * |
| 12 | *------------------------------------------------------------------------- |
| 13 | */ |
| 14 | |
| 15 | #include "postgres.h" |
| 16 | |
| 17 | #include "access/relation.h" |
| 18 | #include "access/table.h" |
| 19 | #include "access/tableam.h" |
| 20 | #include "catalog/partition.h" |
| 21 | #include "catalog/pg_inherits.h" |
| 22 | #include "catalog/pg_type.h" |
| 23 | #include "commands/tablecmds.h" |
| 24 | #include "executor/executor.h" |
| 25 | #include "miscadmin.h" |
| 26 | #include "nodes/makefuncs.h" |
| 27 | #include "nodes/nodeFuncs.h" |
| 28 | #include "parser/parse_coerce.h" |
| 29 | #include "partitioning/partbounds.h" |
| 30 | #include "partitioning/partdesc.h" |
| 31 | #include "partitioning/partprune.h" |
| 32 | #include "utils/builtins.h" |
| 33 | #include "utils/datum.h" |
| 34 | #include "utils/fmgroids.h" |
| 35 | #include "utils/hashutils.h" |
| 36 | #include "utils/lsyscache.h" |
| 37 | #include "utils/partcache.h" |
| 38 | #include "utils/rel.h" |
| 39 | #include "utils/snapmgr.h" |
| 40 | #include "utils/ruleutils.h" |
| 41 | #include "utils/syscache.h" |
| 42 | |
| 43 | /* |
| 44 | * When qsort'ing partition bounds after reading from the catalog, each bound |
| 45 | * is represented with one of the following structs. |
| 46 | */ |
| 47 | |
| 48 | /* One bound of a hash partition */ |
| 49 | typedef struct PartitionHashBound |
| 50 | { |
| 51 | int modulus; |
| 52 | int remainder; |
| 53 | int index; |
| 54 | } PartitionHashBound; |
| 55 | |
| 56 | /* One value coming from some (index'th) list partition */ |
| 57 | typedef struct PartitionListValue |
| 58 | { |
| 59 | int index; |
| 60 | Datum value; |
| 61 | } PartitionListValue; |
| 62 | |
| 63 | /* One bound of a range partition */ |
| 64 | typedef struct PartitionRangeBound |
| 65 | { |
| 66 | int index; |
| 67 | Datum *datums; /* range bound datums */ |
| 68 | PartitionRangeDatumKind *kind; /* the kind of each datum */ |
| 69 | bool lower; /* this is the lower (vs upper) bound */ |
| 70 | } PartitionRangeBound; |
| 71 | |
| 72 | static int32 qsort_partition_hbound_cmp(const void *a, const void *b); |
| 73 | static int32 qsort_partition_list_value_cmp(const void *a, const void *b, |
| 74 | void *arg); |
| 75 | static int32 qsort_partition_rbound_cmp(const void *a, const void *b, |
| 76 | void *arg); |
| 77 | static PartitionBoundInfo create_hash_bounds(PartitionBoundSpec **boundspecs, |
| 78 | int nparts, PartitionKey key, int **mapping); |
| 79 | static PartitionBoundInfo create_list_bounds(PartitionBoundSpec **boundspecs, |
| 80 | int nparts, PartitionKey key, int **mapping); |
| 81 | static PartitionBoundInfo create_range_bounds(PartitionBoundSpec **boundspecs, |
| 82 | int nparts, PartitionKey key, int **mapping); |
| 83 | static PartitionRangeBound *make_one_partition_rbound(PartitionKey key, int index, |
| 84 | List *datums, bool lower); |
| 85 | static int32 partition_hbound_cmp(int modulus1, int remainder1, int modulus2, |
| 86 | int remainder2); |
| 87 | static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc, |
| 88 | Oid *partcollation, Datum *datums1, |
| 89 | PartitionRangeDatumKind *kind1, bool lower1, |
| 90 | PartitionRangeBound *b2); |
| 91 | static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc, |
| 92 | Oid *partcollation, |
| 93 | PartitionBoundInfo boundinfo, |
| 94 | PartitionRangeBound *probe, bool *is_equal); |
| 95 | static int get_partition_bound_num_indexes(PartitionBoundInfo b); |
| 96 | static Expr *make_partition_op_expr(PartitionKey key, int keynum, |
| 97 | uint16 strategy, Expr *arg1, Expr *arg2); |
| 98 | static Oid get_partition_operator(PartitionKey key, int col, |
| 99 | StrategyNumber strategy, bool *need_relabel); |
| 100 | static List *get_qual_for_hash(Relation parent, PartitionBoundSpec *spec); |
| 101 | static List *get_qual_for_list(Relation parent, PartitionBoundSpec *spec); |
| 102 | static List *get_qual_for_range(Relation parent, PartitionBoundSpec *spec, |
| 103 | bool for_default); |
| 104 | static void get_range_key_properties(PartitionKey key, int keynum, |
| 105 | PartitionRangeDatum *ldatum, |
| 106 | PartitionRangeDatum *udatum, |
| 107 | ListCell **partexprs_item, |
| 108 | Expr **keyCol, |
| 109 | Const **lower_val, Const **upper_val); |
| 110 | static List *get_range_nulltest(PartitionKey key); |
| 111 | |
| 112 | /* |
| 113 | * get_qual_from_partbound |
| 114 | * Given a parser node for partition bound, return the list of executable |
| 115 | * expressions as partition constraint |
| 116 | */ |
| 117 | List * |
| 118 | get_qual_from_partbound(Relation rel, Relation parent, |
| 119 | PartitionBoundSpec *spec) |
| 120 | { |
| 121 | PartitionKey key = RelationGetPartitionKey(parent); |
| 122 | List *my_qual = NIL; |
| 123 | |
| 124 | Assert(key != NULL); |
| 125 | |
| 126 | switch (key->strategy) |
| 127 | { |
| 128 | case PARTITION_STRATEGY_HASH: |
| 129 | Assert(spec->strategy == PARTITION_STRATEGY_HASH); |
| 130 | my_qual = get_qual_for_hash(parent, spec); |
| 131 | break; |
| 132 | |
| 133 | case PARTITION_STRATEGY_LIST: |
| 134 | Assert(spec->strategy == PARTITION_STRATEGY_LIST); |
| 135 | my_qual = get_qual_for_list(parent, spec); |
| 136 | break; |
| 137 | |
| 138 | case PARTITION_STRATEGY_RANGE: |
| 139 | Assert(spec->strategy == PARTITION_STRATEGY_RANGE); |
| 140 | my_qual = get_qual_for_range(parent, spec, false); |
| 141 | break; |
| 142 | |
| 143 | default: |
| 144 | elog(ERROR, "unexpected partition strategy: %d" , |
| 145 | (int) key->strategy); |
| 146 | } |
| 147 | |
| 148 | return my_qual; |
| 149 | } |
| 150 | |
| 151 | /* |
| 152 | * partition_bounds_create |
| 153 | * Build a PartitionBoundInfo struct from a list of PartitionBoundSpec |
| 154 | * nodes |
| 155 | * |
| 156 | * This function creates a PartitionBoundInfo and fills the values of its |
| 157 | * various members based on the input list. Importantly, 'datums' array will |
| 158 | * contain Datum representation of individual bounds (possibly after |
| 159 | * de-duplication as in case of range bounds), sorted in a canonical order |
| 160 | * defined by qsort_partition_* functions of respective partitioning methods. |
| 161 | * 'indexes' array will contain as many elements as there are bounds (specific |
| 162 | * exceptions to this rule are listed in the function body), which represent |
| 163 | * the 0-based canonical positions of partitions. |
| 164 | * |
| 165 | * Upon return from this function, *mapping is set to an array of |
| 166 | * list_length(boundspecs) elements, each of which maps the original index of |
| 167 | * a partition to its canonical index. |
| 168 | * |
| 169 | * Note: The objects returned by this function are wholly allocated in the |
| 170 | * current memory context. |
| 171 | */ |
| 172 | PartitionBoundInfo |
| 173 | partition_bounds_create(PartitionBoundSpec **boundspecs, int nparts, |
| 174 | PartitionKey key, int **mapping) |
| 175 | { |
| 176 | int i; |
| 177 | |
| 178 | Assert(nparts > 0); |
| 179 | |
| 180 | /* |
| 181 | * For each partitioning method, we first convert the partition bounds |
| 182 | * from their parser node representation to the internal representation, |
| 183 | * along with any additional preprocessing (such as de-duplicating range |
| 184 | * bounds). Resulting bound datums are then added to the 'datums' array |
| 185 | * in PartitionBoundInfo. For each datum added, an integer indicating the |
| 186 | * canonical partition index is added to the 'indexes' array. |
| 187 | * |
| 188 | * For each bound, we remember its partition's position (0-based) in the |
| 189 | * original list to later map it to the canonical index. |
| 190 | */ |
| 191 | |
| 192 | /* |
| 193 | * Initialize mapping array with invalid values, this is filled within |
| 194 | * each sub-routine below depending on the bound type. |
| 195 | */ |
| 196 | *mapping = (int *) palloc(sizeof(int) * nparts); |
| 197 | for (i = 0; i < nparts; i++) |
| 198 | (*mapping)[i] = -1; |
| 199 | |
| 200 | switch (key->strategy) |
| 201 | { |
| 202 | case PARTITION_STRATEGY_HASH: |
| 203 | return create_hash_bounds(boundspecs, nparts, key, mapping); |
| 204 | |
| 205 | case PARTITION_STRATEGY_LIST: |
| 206 | return create_list_bounds(boundspecs, nparts, key, mapping); |
| 207 | |
| 208 | case PARTITION_STRATEGY_RANGE: |
| 209 | return create_range_bounds(boundspecs, nparts, key, mapping); |
| 210 | |
| 211 | default: |
| 212 | elog(ERROR, "unexpected partition strategy: %d" , |
| 213 | (int) key->strategy); |
| 214 | break; |
| 215 | } |
| 216 | |
| 217 | Assert(false); |
| 218 | return NULL; /* keep compiler quiet */ |
| 219 | } |
| 220 | |
| 221 | /* |
| 222 | * create_hash_bounds |
| 223 | * Create a PartitionBoundInfo for a hash partitioned table |
| 224 | */ |
| 225 | static PartitionBoundInfo |
| 226 | create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts, |
| 227 | PartitionKey key, int **mapping) |
| 228 | { |
| 229 | PartitionBoundInfo boundinfo; |
| 230 | PartitionHashBound **hbounds = NULL; |
| 231 | int i; |
| 232 | int ndatums = 0; |
| 233 | int greatest_modulus; |
| 234 | |
| 235 | boundinfo = (PartitionBoundInfoData *) |
| 236 | palloc0(sizeof(PartitionBoundInfoData)); |
| 237 | boundinfo->strategy = key->strategy; |
| 238 | /* No special hash partitions. */ |
| 239 | boundinfo->null_index = -1; |
| 240 | boundinfo->default_index = -1; |
| 241 | |
| 242 | ndatums = nparts; |
| 243 | hbounds = (PartitionHashBound **) |
| 244 | palloc(nparts * sizeof(PartitionHashBound *)); |
| 245 | |
| 246 | /* Convert from node to the internal representation */ |
| 247 | for (i = 0; i < nparts; i++) |
| 248 | { |
| 249 | PartitionBoundSpec *spec = boundspecs[i]; |
| 250 | |
| 251 | if (spec->strategy != PARTITION_STRATEGY_HASH) |
| 252 | elog(ERROR, "invalid strategy in partition bound spec" ); |
| 253 | |
| 254 | hbounds[i] = (PartitionHashBound *) palloc(sizeof(PartitionHashBound)); |
| 255 | hbounds[i]->modulus = spec->modulus; |
| 256 | hbounds[i]->remainder = spec->remainder; |
| 257 | hbounds[i]->index = i; |
| 258 | } |
| 259 | |
| 260 | /* Sort all the bounds in ascending order */ |
| 261 | qsort(hbounds, nparts, sizeof(PartitionHashBound *), |
| 262 | qsort_partition_hbound_cmp); |
| 263 | |
| 264 | /* After sorting, moduli are now stored in ascending order. */ |
| 265 | greatest_modulus = hbounds[ndatums - 1]->modulus; |
| 266 | |
| 267 | boundinfo->ndatums = ndatums; |
| 268 | boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *)); |
| 269 | boundinfo->indexes = (int *) palloc(greatest_modulus * sizeof(int)); |
| 270 | for (i = 0; i < greatest_modulus; i++) |
| 271 | boundinfo->indexes[i] = -1; |
| 272 | |
| 273 | /* |
| 274 | * For hash partitioning, there are as many datums (modulus and remainder |
| 275 | * pairs) as there are partitions. Indexes are simply values ranging from |
| 276 | * 0 to (nparts - 1). |
| 277 | */ |
| 278 | for (i = 0; i < nparts; i++) |
| 279 | { |
| 280 | int modulus = hbounds[i]->modulus; |
| 281 | int remainder = hbounds[i]->remainder; |
| 282 | |
| 283 | boundinfo->datums[i] = (Datum *) palloc(2 * sizeof(Datum)); |
| 284 | boundinfo->datums[i][0] = Int32GetDatum(modulus); |
| 285 | boundinfo->datums[i][1] = Int32GetDatum(remainder); |
| 286 | |
| 287 | while (remainder < greatest_modulus) |
| 288 | { |
| 289 | /* overlap? */ |
| 290 | Assert(boundinfo->indexes[remainder] == -1); |
| 291 | boundinfo->indexes[remainder] = i; |
| 292 | remainder += modulus; |
| 293 | } |
| 294 | |
| 295 | (*mapping)[hbounds[i]->index] = i; |
| 296 | pfree(hbounds[i]); |
| 297 | } |
| 298 | pfree(hbounds); |
| 299 | |
| 300 | return boundinfo; |
| 301 | } |
| 302 | |
| 303 | /* |
| 304 | * create_list_bounds |
| 305 | * Create a PartitionBoundInfo for a list partitioned table |
| 306 | */ |
| 307 | static PartitionBoundInfo |
| 308 | create_list_bounds(PartitionBoundSpec **boundspecs, int nparts, |
| 309 | PartitionKey key, int **mapping) |
| 310 | { |
| 311 | PartitionBoundInfo boundinfo; |
| 312 | PartitionListValue **all_values = NULL; |
| 313 | ListCell *cell; |
| 314 | int i = 0; |
| 315 | int ndatums = 0; |
| 316 | int next_index = 0; |
| 317 | int default_index = -1; |
| 318 | int null_index = -1; |
| 319 | List *non_null_values = NIL; |
| 320 | |
| 321 | boundinfo = (PartitionBoundInfoData *) |
| 322 | palloc0(sizeof(PartitionBoundInfoData)); |
| 323 | boundinfo->strategy = key->strategy; |
| 324 | /* Will be set correctly below. */ |
| 325 | boundinfo->null_index = -1; |
| 326 | boundinfo->default_index = -1; |
| 327 | |
| 328 | /* Create a unified list of non-null values across all partitions. */ |
| 329 | for (i = 0; i < nparts; i++) |
| 330 | { |
| 331 | PartitionBoundSpec *spec = boundspecs[i]; |
| 332 | ListCell *c; |
| 333 | |
| 334 | if (spec->strategy != PARTITION_STRATEGY_LIST) |
| 335 | elog(ERROR, "invalid strategy in partition bound spec" ); |
| 336 | |
| 337 | /* |
| 338 | * Note the index of the partition bound spec for the default |
| 339 | * partition. There's no datum to add to the list on non-null datums |
| 340 | * for this partition. |
| 341 | */ |
| 342 | if (spec->is_default) |
| 343 | { |
| 344 | default_index = i; |
| 345 | continue; |
| 346 | } |
| 347 | |
| 348 | foreach(c, spec->listdatums) |
| 349 | { |
| 350 | Const *val = castNode(Const, lfirst(c)); |
| 351 | PartitionListValue *list_value = NULL; |
| 352 | |
| 353 | if (!val->constisnull) |
| 354 | { |
| 355 | list_value = (PartitionListValue *) |
| 356 | palloc0(sizeof(PartitionListValue)); |
| 357 | list_value->index = i; |
| 358 | list_value->value = val->constvalue; |
| 359 | } |
| 360 | else |
| 361 | { |
| 362 | /* |
| 363 | * Never put a null into the values array, flag instead for |
| 364 | * the code further down below where we construct the actual |
| 365 | * relcache struct. |
| 366 | */ |
| 367 | if (null_index != -1) |
| 368 | elog(ERROR, "found null more than once" ); |
| 369 | null_index = i; |
| 370 | } |
| 371 | |
| 372 | if (list_value) |
| 373 | non_null_values = lappend(non_null_values, list_value); |
| 374 | } |
| 375 | } |
| 376 | |
| 377 | ndatums = list_length(non_null_values); |
| 378 | |
| 379 | /* |
| 380 | * Collect all list values in one array. Alongside the value, we also save |
| 381 | * the index of partition the value comes from. |
| 382 | */ |
| 383 | all_values = (PartitionListValue **) |
| 384 | palloc(ndatums * sizeof(PartitionListValue *)); |
| 385 | i = 0; |
| 386 | foreach(cell, non_null_values) |
| 387 | { |
| 388 | PartitionListValue *src = lfirst(cell); |
| 389 | |
| 390 | all_values[i] = (PartitionListValue *) |
| 391 | palloc(sizeof(PartitionListValue)); |
| 392 | all_values[i]->value = src->value; |
| 393 | all_values[i]->index = src->index; |
| 394 | i++; |
| 395 | } |
| 396 | |
| 397 | qsort_arg(all_values, ndatums, sizeof(PartitionListValue *), |
| 398 | qsort_partition_list_value_cmp, (void *) key); |
| 399 | |
| 400 | boundinfo->ndatums = ndatums; |
| 401 | boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *)); |
| 402 | boundinfo->indexes = (int *) palloc(ndatums * sizeof(int)); |
| 403 | |
| 404 | /* |
| 405 | * Copy values. Canonical indexes are values ranging from 0 to (nparts - |
| 406 | * 1) assigned to each partition such that all datums of a given partition |
| 407 | * receive the same value. The value for a given partition is the index of |
| 408 | * that partition's smallest datum in the all_values[] array. |
| 409 | */ |
| 410 | for (i = 0; i < ndatums; i++) |
| 411 | { |
| 412 | int orig_index = all_values[i]->index; |
| 413 | |
| 414 | boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum)); |
| 415 | boundinfo->datums[i][0] = datumCopy(all_values[i]->value, |
| 416 | key->parttypbyval[0], |
| 417 | key->parttyplen[0]); |
| 418 | |
| 419 | /* If the old index has no mapping, assign one */ |
| 420 | if ((*mapping)[orig_index] == -1) |
| 421 | (*mapping)[orig_index] = next_index++; |
| 422 | |
| 423 | boundinfo->indexes[i] = (*mapping)[orig_index]; |
| 424 | } |
| 425 | |
| 426 | /* |
| 427 | * Set the canonical value for null_index, if any. |
| 428 | * |
| 429 | * It is possible that the null-accepting partition has not been assigned |
| 430 | * an index yet, which could happen if such partition accepts only null |
| 431 | * and hence not handled in the above loop which only looked at non-null |
| 432 | * values. |
| 433 | */ |
| 434 | if (null_index != -1) |
| 435 | { |
| 436 | Assert(null_index >= 0); |
| 437 | if ((*mapping)[null_index] == -1) |
| 438 | (*mapping)[null_index] = next_index++; |
| 439 | boundinfo->null_index = (*mapping)[null_index]; |
| 440 | } |
| 441 | |
| 442 | /* Set the canonical value for default_index, if any. */ |
| 443 | if (default_index != -1) |
| 444 | { |
| 445 | /* |
| 446 | * The default partition accepts any value not specified in the lists |
| 447 | * of other partitions, hence it should not get mapped index while |
| 448 | * assigning those for non-null datums. |
| 449 | */ |
| 450 | Assert(default_index >= 0); |
| 451 | Assert((*mapping)[default_index] == -1); |
| 452 | (*mapping)[default_index] = next_index++; |
| 453 | boundinfo->default_index = (*mapping)[default_index]; |
| 454 | } |
| 455 | |
| 456 | /* All partitions must now have been assigned canonical indexes. */ |
| 457 | Assert(next_index == nparts); |
| 458 | return boundinfo; |
| 459 | } |
| 460 | |
| 461 | /* |
| 462 | * create_range_bounds |
| 463 | * Create a PartitionBoundInfo for a range partitioned table |
| 464 | */ |
| 465 | static PartitionBoundInfo |
| 466 | create_range_bounds(PartitionBoundSpec **boundspecs, int nparts, |
| 467 | PartitionKey key, int **mapping) |
| 468 | { |
| 469 | PartitionBoundInfo boundinfo; |
| 470 | PartitionRangeBound **rbounds = NULL; |
| 471 | PartitionRangeBound **all_bounds, |
| 472 | *prev; |
| 473 | int i, |
| 474 | k; |
| 475 | int ndatums = 0; |
| 476 | int default_index = -1; |
| 477 | int next_index = 0; |
| 478 | |
| 479 | boundinfo = (PartitionBoundInfoData *) |
| 480 | palloc0(sizeof(PartitionBoundInfoData)); |
| 481 | boundinfo->strategy = key->strategy; |
| 482 | /* There is no special null-accepting range partition. */ |
| 483 | boundinfo->null_index = -1; |
| 484 | /* Will be set correctly below. */ |
| 485 | boundinfo->default_index = -1; |
| 486 | |
| 487 | all_bounds = (PartitionRangeBound **) |
| 488 | palloc0(2 * nparts * sizeof(PartitionRangeBound *)); |
| 489 | |
| 490 | /* Create a unified list of range bounds across all the partitions. */ |
| 491 | ndatums = 0; |
| 492 | for (i = 0; i < nparts; i++) |
| 493 | { |
| 494 | PartitionBoundSpec *spec = boundspecs[i]; |
| 495 | PartitionRangeBound *lower, |
| 496 | *upper; |
| 497 | |
| 498 | if (spec->strategy != PARTITION_STRATEGY_RANGE) |
| 499 | elog(ERROR, "invalid strategy in partition bound spec" ); |
| 500 | |
| 501 | /* |
| 502 | * Note the index of the partition bound spec for the default |
| 503 | * partition. There's no datum to add to the all_bounds array for |
| 504 | * this partition. |
| 505 | */ |
| 506 | if (spec->is_default) |
| 507 | { |
| 508 | default_index = i; |
| 509 | continue; |
| 510 | } |
| 511 | |
| 512 | lower = make_one_partition_rbound(key, i, spec->lowerdatums, true); |
| 513 | upper = make_one_partition_rbound(key, i, spec->upperdatums, false); |
| 514 | all_bounds[ndatums++] = lower; |
| 515 | all_bounds[ndatums++] = upper; |
| 516 | } |
| 517 | |
| 518 | Assert(ndatums == nparts * 2 || |
| 519 | (default_index != -1 && ndatums == (nparts - 1) * 2)); |
| 520 | |
| 521 | /* Sort all the bounds in ascending order */ |
| 522 | qsort_arg(all_bounds, ndatums, |
| 523 | sizeof(PartitionRangeBound *), |
| 524 | qsort_partition_rbound_cmp, |
| 525 | (void *) key); |
| 526 | |
| 527 | /* Save distinct bounds from all_bounds into rbounds. */ |
| 528 | rbounds = (PartitionRangeBound **) |
| 529 | palloc(ndatums * sizeof(PartitionRangeBound *)); |
| 530 | k = 0; |
| 531 | prev = NULL; |
| 532 | for (i = 0; i < ndatums; i++) |
| 533 | { |
| 534 | PartitionRangeBound *cur = all_bounds[i]; |
| 535 | bool is_distinct = false; |
| 536 | int j; |
| 537 | |
| 538 | /* Is the current bound distinct from the previous one? */ |
| 539 | for (j = 0; j < key->partnatts; j++) |
| 540 | { |
| 541 | Datum cmpval; |
| 542 | |
| 543 | if (prev == NULL || cur->kind[j] != prev->kind[j]) |
| 544 | { |
| 545 | is_distinct = true; |
| 546 | break; |
| 547 | } |
| 548 | |
| 549 | /* |
| 550 | * If the bounds are both MINVALUE or MAXVALUE, stop now and treat |
| 551 | * them as equal, since any values after this point must be |
| 552 | * ignored. |
| 553 | */ |
| 554 | if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE) |
| 555 | break; |
| 556 | |
| 557 | cmpval = FunctionCall2Coll(&key->partsupfunc[j], |
| 558 | key->partcollation[j], |
| 559 | cur->datums[j], |
| 560 | prev->datums[j]); |
| 561 | if (DatumGetInt32(cmpval) != 0) |
| 562 | { |
| 563 | is_distinct = true; |
| 564 | break; |
| 565 | } |
| 566 | } |
| 567 | |
| 568 | /* |
| 569 | * Only if the bound is distinct save it into a temporary array, i.e, |
| 570 | * rbounds which is later copied into boundinfo datums array. |
| 571 | */ |
| 572 | if (is_distinct) |
| 573 | rbounds[k++] = all_bounds[i]; |
| 574 | |
| 575 | prev = cur; |
| 576 | } |
| 577 | |
| 578 | /* Update ndatums to hold the count of distinct datums. */ |
| 579 | ndatums = k; |
| 580 | |
| 581 | /* |
| 582 | * Add datums to boundinfo. Canonical indexes are values ranging from 0 |
| 583 | * to nparts - 1, assigned in that order to each partition's upper bound. |
| 584 | * For 'datums' elements that are lower bounds, there is -1 in the |
| 585 | * 'indexes' array to signify that no partition exists for the values less |
| 586 | * than such a bound and greater than or equal to the previous upper |
| 587 | * bound. |
| 588 | */ |
| 589 | boundinfo->ndatums = ndatums; |
| 590 | boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *)); |
| 591 | boundinfo->kind = (PartitionRangeDatumKind **) |
| 592 | palloc(ndatums * |
| 593 | sizeof(PartitionRangeDatumKind *)); |
| 594 | |
| 595 | /* |
| 596 | * For range partitioning, an additional value of -1 is stored as the last |
| 597 | * element. |
| 598 | */ |
| 599 | boundinfo->indexes = (int *) palloc((ndatums + 1) * sizeof(int)); |
| 600 | |
| 601 | for (i = 0; i < ndatums; i++) |
| 602 | { |
| 603 | int j; |
| 604 | |
| 605 | boundinfo->datums[i] = (Datum *) palloc(key->partnatts * |
| 606 | sizeof(Datum)); |
| 607 | boundinfo->kind[i] = (PartitionRangeDatumKind *) |
| 608 | palloc(key->partnatts * |
| 609 | sizeof(PartitionRangeDatumKind)); |
| 610 | for (j = 0; j < key->partnatts; j++) |
| 611 | { |
| 612 | if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE) |
| 613 | boundinfo->datums[i][j] = |
| 614 | datumCopy(rbounds[i]->datums[j], |
| 615 | key->parttypbyval[j], |
| 616 | key->parttyplen[j]); |
| 617 | boundinfo->kind[i][j] = rbounds[i]->kind[j]; |
| 618 | } |
| 619 | |
| 620 | /* |
| 621 | * There is no mapping for invalid indexes. |
| 622 | * |
| 623 | * Any lower bounds in the rbounds array have invalid indexes |
| 624 | * assigned, because the values between the previous bound (if there |
| 625 | * is one) and this (lower) bound are not part of the range of any |
| 626 | * existing partition. |
| 627 | */ |
| 628 | if (rbounds[i]->lower) |
| 629 | boundinfo->indexes[i] = -1; |
| 630 | else |
| 631 | { |
| 632 | int orig_index = rbounds[i]->index; |
| 633 | |
| 634 | /* If the old index has no mapping, assign one */ |
| 635 | if ((*mapping)[orig_index] == -1) |
| 636 | (*mapping)[orig_index] = next_index++; |
| 637 | |
| 638 | boundinfo->indexes[i] = (*mapping)[orig_index]; |
| 639 | } |
| 640 | } |
| 641 | |
| 642 | /* Set the canonical value for default_index, if any. */ |
| 643 | if (default_index != -1) |
| 644 | { |
| 645 | Assert(default_index >= 0 && (*mapping)[default_index] == -1); |
| 646 | (*mapping)[default_index] = next_index++; |
| 647 | boundinfo->default_index = (*mapping)[default_index]; |
| 648 | } |
| 649 | |
| 650 | /* The extra -1 element. */ |
| 651 | Assert(i == ndatums); |
| 652 | boundinfo->indexes[i] = -1; |
| 653 | |
| 654 | /* All partitions must now have been assigned canonical indexes. */ |
| 655 | Assert(next_index == nparts); |
| 656 | return boundinfo; |
| 657 | } |
| 658 | |
| 659 | /* |
| 660 | * Are two partition bound collections logically equal? |
| 661 | * |
| 662 | * Used in the keep logic of relcache.c (ie, in RelationClearRelation()). |
| 663 | * This is also useful when b1 and b2 are bound collections of two separate |
| 664 | * relations, respectively, because PartitionBoundInfo is a canonical |
| 665 | * representation of partition bounds. |
| 666 | */ |
| 667 | bool |
| 668 | partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval, |
| 669 | PartitionBoundInfo b1, PartitionBoundInfo b2) |
| 670 | { |
| 671 | int i; |
| 672 | |
| 673 | if (b1->strategy != b2->strategy) |
| 674 | return false; |
| 675 | |
| 676 | if (b1->ndatums != b2->ndatums) |
| 677 | return false; |
| 678 | |
| 679 | if (b1->null_index != b2->null_index) |
| 680 | return false; |
| 681 | |
| 682 | if (b1->default_index != b2->default_index) |
| 683 | return false; |
| 684 | |
| 685 | if (b1->strategy == PARTITION_STRATEGY_HASH) |
| 686 | { |
| 687 | int greatest_modulus = get_hash_partition_greatest_modulus(b1); |
| 688 | |
| 689 | /* |
| 690 | * If two hash partitioned tables have different greatest moduli, |
| 691 | * their partition schemes don't match. |
| 692 | */ |
| 693 | if (greatest_modulus != get_hash_partition_greatest_modulus(b2)) |
| 694 | return false; |
| 695 | |
| 696 | /* |
| 697 | * We arrange the partitions in the ascending order of their moduli |
| 698 | * and remainders. Also every modulus is factor of next larger |
| 699 | * modulus. Therefore we can safely store index of a given partition |
| 700 | * in indexes array at remainder of that partition. Also entries at |
| 701 | * (remainder + N * modulus) positions in indexes array are all same |
| 702 | * for (modulus, remainder) specification for any partition. Thus |
| 703 | * datums array from both the given bounds are same, if and only if |
| 704 | * their indexes array will be same. So, it suffices to compare |
| 705 | * indexes array. |
| 706 | */ |
| 707 | for (i = 0; i < greatest_modulus; i++) |
| 708 | if (b1->indexes[i] != b2->indexes[i]) |
| 709 | return false; |
| 710 | |
| 711 | #ifdef USE_ASSERT_CHECKING |
| 712 | |
| 713 | /* |
| 714 | * Nonetheless make sure that the bounds are indeed same when the |
| 715 | * indexes match. Hash partition bound stores modulus and remainder |
| 716 | * at b1->datums[i][0] and b1->datums[i][1] position respectively. |
| 717 | */ |
| 718 | for (i = 0; i < b1->ndatums; i++) |
| 719 | Assert((b1->datums[i][0] == b2->datums[i][0] && |
| 720 | b1->datums[i][1] == b2->datums[i][1])); |
| 721 | #endif |
| 722 | } |
| 723 | else |
| 724 | { |
| 725 | for (i = 0; i < b1->ndatums; i++) |
| 726 | { |
| 727 | int j; |
| 728 | |
| 729 | for (j = 0; j < partnatts; j++) |
| 730 | { |
| 731 | /* For range partitions, the bounds might not be finite. */ |
| 732 | if (b1->kind != NULL) |
| 733 | { |
| 734 | /* The different kinds of bound all differ from each other */ |
| 735 | if (b1->kind[i][j] != b2->kind[i][j]) |
| 736 | return false; |
| 737 | |
| 738 | /* |
| 739 | * Non-finite bounds are equal without further |
| 740 | * examination. |
| 741 | */ |
| 742 | if (b1->kind[i][j] != PARTITION_RANGE_DATUM_VALUE) |
| 743 | continue; |
| 744 | } |
| 745 | |
| 746 | /* |
| 747 | * Compare the actual values. Note that it would be both |
| 748 | * incorrect and unsafe to invoke the comparison operator |
| 749 | * derived from the partitioning specification here. It would |
| 750 | * be incorrect because we want the relcache entry to be |
| 751 | * updated for ANY change to the partition bounds, not just |
| 752 | * those that the partitioning operator thinks are |
| 753 | * significant. It would be unsafe because we might reach |
| 754 | * this code in the context of an aborted transaction, and an |
| 755 | * arbitrary partitioning operator might not be safe in that |
| 756 | * context. datumIsEqual() should be simple enough to be |
| 757 | * safe. |
| 758 | */ |
| 759 | if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j], |
| 760 | parttypbyval[j], parttyplen[j])) |
| 761 | return false; |
| 762 | } |
| 763 | |
| 764 | if (b1->indexes[i] != b2->indexes[i]) |
| 765 | return false; |
| 766 | } |
| 767 | |
| 768 | /* There are ndatums+1 indexes in case of range partitions */ |
| 769 | if (b1->strategy == PARTITION_STRATEGY_RANGE && |
| 770 | b1->indexes[i] != b2->indexes[i]) |
| 771 | return false; |
| 772 | } |
| 773 | return true; |
| 774 | } |
| 775 | |
| 776 | /* |
| 777 | * Return a copy of given PartitionBoundInfo structure. The data types of bounds |
| 778 | * are described by given partition key specification. |
| 779 | */ |
| 780 | PartitionBoundInfo |
| 781 | partition_bounds_copy(PartitionBoundInfo src, |
| 782 | PartitionKey key) |
| 783 | { |
| 784 | PartitionBoundInfo dest; |
| 785 | int i; |
| 786 | int ndatums; |
| 787 | int partnatts; |
| 788 | int num_indexes; |
| 789 | |
| 790 | dest = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData)); |
| 791 | |
| 792 | dest->strategy = src->strategy; |
| 793 | ndatums = dest->ndatums = src->ndatums; |
| 794 | partnatts = key->partnatts; |
| 795 | |
| 796 | num_indexes = get_partition_bound_num_indexes(src); |
| 797 | |
| 798 | /* List partitioned tables have only a single partition key. */ |
| 799 | Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1); |
| 800 | |
| 801 | dest->datums = (Datum **) palloc(sizeof(Datum *) * ndatums); |
| 802 | |
| 803 | if (src->kind != NULL) |
| 804 | { |
| 805 | dest->kind = (PartitionRangeDatumKind **) palloc(ndatums * |
| 806 | sizeof(PartitionRangeDatumKind *)); |
| 807 | for (i = 0; i < ndatums; i++) |
| 808 | { |
| 809 | dest->kind[i] = (PartitionRangeDatumKind *) palloc(partnatts * |
| 810 | sizeof(PartitionRangeDatumKind)); |
| 811 | |
| 812 | memcpy(dest->kind[i], src->kind[i], |
| 813 | sizeof(PartitionRangeDatumKind) * key->partnatts); |
| 814 | } |
| 815 | } |
| 816 | else |
| 817 | dest->kind = NULL; |
| 818 | |
| 819 | for (i = 0; i < ndatums; i++) |
| 820 | { |
| 821 | int j; |
| 822 | |
| 823 | /* |
| 824 | * For a corresponding to hash partition, datums array will have two |
| 825 | * elements - modulus and remainder. |
| 826 | */ |
| 827 | bool hash_part = (key->strategy == PARTITION_STRATEGY_HASH); |
| 828 | int natts = hash_part ? 2 : partnatts; |
| 829 | |
| 830 | dest->datums[i] = (Datum *) palloc(sizeof(Datum) * natts); |
| 831 | |
| 832 | for (j = 0; j < natts; j++) |
| 833 | { |
| 834 | bool byval; |
| 835 | int typlen; |
| 836 | |
| 837 | if (hash_part) |
| 838 | { |
| 839 | typlen = sizeof(int32); /* Always int4 */ |
| 840 | byval = true; /* int4 is pass-by-value */ |
| 841 | } |
| 842 | else |
| 843 | { |
| 844 | byval = key->parttypbyval[j]; |
| 845 | typlen = key->parttyplen[j]; |
| 846 | } |
| 847 | |
| 848 | if (dest->kind == NULL || |
| 849 | dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE) |
| 850 | dest->datums[i][j] = datumCopy(src->datums[i][j], |
| 851 | byval, typlen); |
| 852 | } |
| 853 | } |
| 854 | |
| 855 | dest->indexes = (int *) palloc(sizeof(int) * num_indexes); |
| 856 | memcpy(dest->indexes, src->indexes, sizeof(int) * num_indexes); |
| 857 | |
| 858 | dest->null_index = src->null_index; |
| 859 | dest->default_index = src->default_index; |
| 860 | |
| 861 | return dest; |
| 862 | } |
| 863 | |
| 864 | /* |
| 865 | * partitions_are_ordered |
| 866 | * Determine whether the partitions described by 'boundinfo' are ordered, |
| 867 | * that is partitions appearing earlier in the PartitionDesc sequence |
| 868 | * contain partition keys strictly less than those appearing later. |
| 869 | * Also, if NULL values are possible, they must come in the last |
| 870 | * partition defined in the PartitionDesc. |
| 871 | * |
| 872 | * If out of order, or there is insufficient info to know the order, |
| 873 | * then we return false. |
| 874 | */ |
| 875 | bool |
| 876 | partitions_are_ordered(PartitionBoundInfo boundinfo, int nparts) |
| 877 | { |
| 878 | Assert(boundinfo != NULL); |
| 879 | |
| 880 | switch (boundinfo->strategy) |
| 881 | { |
| 882 | case PARTITION_STRATEGY_RANGE: |
| 883 | |
| 884 | /* |
| 885 | * RANGE-type partitioning guarantees that the partitions can be |
| 886 | * scanned in the order that they're defined in the PartitionDesc |
| 887 | * to provide sequential, non-overlapping ranges of tuples. |
| 888 | * However, if a DEFAULT partition exists then it doesn't work, as |
| 889 | * that could contain tuples from either below or above the |
| 890 | * defined range, or tuples belonging to gaps between partitions. |
| 891 | */ |
| 892 | if (!partition_bound_has_default(boundinfo)) |
| 893 | return true; |
| 894 | break; |
| 895 | |
| 896 | case PARTITION_STRATEGY_LIST: |
| 897 | |
| 898 | /* |
| 899 | * LIST partitioning can also guarantee ordering, but only if the |
| 900 | * partitions don't accept interleaved values. We could likely |
| 901 | * check for this by looping over the PartitionBound's indexes |
| 902 | * array to check that the indexes are in order. For now, let's |
| 903 | * just keep it simple and just accept LIST partitioning when |
| 904 | * there's no DEFAULT partition, exactly one value per partition, |
| 905 | * and optionally a NULL partition that does not accept any other |
| 906 | * values. Such a NULL partition will come last in the |
| 907 | * PartitionDesc, and the other partitions will be properly |
| 908 | * ordered. This is a cheap test to make as it does not require |
| 909 | * any per-partition processing. Maybe we'd like to handle more |
| 910 | * complex cases in the future. |
| 911 | */ |
| 912 | if (partition_bound_has_default(boundinfo)) |
| 913 | return false; |
| 914 | |
| 915 | if (boundinfo->ndatums + partition_bound_accepts_nulls(boundinfo) |
| 916 | == nparts) |
| 917 | return true; |
| 918 | break; |
| 919 | |
| 920 | default: |
| 921 | /* HASH, or some other strategy */ |
| 922 | break; |
| 923 | } |
| 924 | |
| 925 | return false; |
| 926 | } |
| 927 | |
| 928 | /* |
| 929 | * check_new_partition_bound |
| 930 | * |
| 931 | * Checks if the new partition's bound overlaps any of the existing partitions |
| 932 | * of parent. Also performs additional checks as necessary per strategy. |
| 933 | */ |
| 934 | void |
| 935 | check_new_partition_bound(char *relname, Relation parent, |
| 936 | PartitionBoundSpec *spec) |
| 937 | { |
| 938 | PartitionKey key = RelationGetPartitionKey(parent); |
| 939 | PartitionDesc partdesc = RelationGetPartitionDesc(parent); |
| 940 | PartitionBoundInfo boundinfo = partdesc->boundinfo; |
| 941 | ParseState *pstate = make_parsestate(NULL); |
| 942 | int with = -1; |
| 943 | bool overlap = false; |
| 944 | |
| 945 | if (spec->is_default) |
| 946 | { |
| 947 | /* |
| 948 | * The default partition bound never conflicts with any other |
| 949 | * partition's; if that's what we're attaching, the only possible |
| 950 | * problem is that one already exists, so check for that and we're |
| 951 | * done. |
| 952 | */ |
| 953 | if (boundinfo == NULL || !partition_bound_has_default(boundinfo)) |
| 954 | return; |
| 955 | |
| 956 | /* Default partition already exists, error out. */ |
| 957 | ereport(ERROR, |
| 958 | (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), |
| 959 | errmsg("partition \"%s\" conflicts with existing default partition \"%s\"" , |
| 960 | relname, get_rel_name(partdesc->oids[boundinfo->default_index])), |
| 961 | parser_errposition(pstate, spec->location))); |
| 962 | } |
| 963 | |
| 964 | switch (key->strategy) |
| 965 | { |
| 966 | case PARTITION_STRATEGY_HASH: |
| 967 | { |
| 968 | Assert(spec->strategy == PARTITION_STRATEGY_HASH); |
| 969 | Assert(spec->remainder >= 0 && spec->remainder < spec->modulus); |
| 970 | |
| 971 | if (partdesc->nparts > 0) |
| 972 | { |
| 973 | Datum **datums = boundinfo->datums; |
| 974 | int ndatums = boundinfo->ndatums; |
| 975 | int greatest_modulus; |
| 976 | int remainder; |
| 977 | int offset; |
| 978 | bool valid_modulus = true; |
| 979 | int prev_modulus, /* Previous largest modulus */ |
| 980 | next_modulus; /* Next largest modulus */ |
| 981 | |
| 982 | /* |
| 983 | * Check rule that every modulus must be a factor of the |
| 984 | * next larger modulus. For example, if you have a bunch |
| 985 | * of partitions that all have modulus 5, you can add a |
| 986 | * new partition with modulus 10 or a new partition with |
| 987 | * modulus 15, but you cannot add both a partition with |
| 988 | * modulus 10 and a partition with modulus 15, because 10 |
| 989 | * is not a factor of 15. |
| 990 | * |
| 991 | * Get the greatest (modulus, remainder) pair contained in |
| 992 | * boundinfo->datums that is less than or equal to the |
| 993 | * (spec->modulus, spec->remainder) pair. |
| 994 | */ |
| 995 | offset = partition_hash_bsearch(boundinfo, |
| 996 | spec->modulus, |
| 997 | spec->remainder); |
| 998 | if (offset < 0) |
| 999 | { |
| 1000 | next_modulus = DatumGetInt32(datums[0][0]); |
| 1001 | valid_modulus = (next_modulus % spec->modulus) == 0; |
| 1002 | } |
| 1003 | else |
| 1004 | { |
| 1005 | prev_modulus = DatumGetInt32(datums[offset][0]); |
| 1006 | valid_modulus = (spec->modulus % prev_modulus) == 0; |
| 1007 | |
| 1008 | if (valid_modulus && (offset + 1) < ndatums) |
| 1009 | { |
| 1010 | next_modulus = DatumGetInt32(datums[offset + 1][0]); |
| 1011 | valid_modulus = (next_modulus % spec->modulus) == 0; |
| 1012 | } |
| 1013 | } |
| 1014 | |
| 1015 | if (!valid_modulus) |
| 1016 | ereport(ERROR, |
| 1017 | (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), |
| 1018 | errmsg("every hash partition modulus must be a factor of the next larger modulus" ))); |
| 1019 | |
| 1020 | greatest_modulus = get_hash_partition_greatest_modulus(boundinfo); |
| 1021 | remainder = spec->remainder; |
| 1022 | |
| 1023 | /* |
| 1024 | * Normally, the lowest remainder that could conflict with |
| 1025 | * the new partition is equal to the remainder specified |
| 1026 | * for the new partition, but when the new partition has a |
| 1027 | * modulus higher than any used so far, we need to adjust. |
| 1028 | */ |
| 1029 | if (remainder >= greatest_modulus) |
| 1030 | remainder = remainder % greatest_modulus; |
| 1031 | |
| 1032 | /* Check every potentially-conflicting remainder. */ |
| 1033 | do |
| 1034 | { |
| 1035 | if (boundinfo->indexes[remainder] != -1) |
| 1036 | { |
| 1037 | overlap = true; |
| 1038 | with = boundinfo->indexes[remainder]; |
| 1039 | break; |
| 1040 | } |
| 1041 | remainder += spec->modulus; |
| 1042 | } while (remainder < greatest_modulus); |
| 1043 | } |
| 1044 | |
| 1045 | break; |
| 1046 | } |
| 1047 | |
| 1048 | case PARTITION_STRATEGY_LIST: |
| 1049 | { |
| 1050 | Assert(spec->strategy == PARTITION_STRATEGY_LIST); |
| 1051 | |
| 1052 | if (partdesc->nparts > 0) |
| 1053 | { |
| 1054 | ListCell *cell; |
| 1055 | |
| 1056 | Assert(boundinfo && |
| 1057 | boundinfo->strategy == PARTITION_STRATEGY_LIST && |
| 1058 | (boundinfo->ndatums > 0 || |
| 1059 | partition_bound_accepts_nulls(boundinfo) || |
| 1060 | partition_bound_has_default(boundinfo))); |
| 1061 | |
| 1062 | foreach(cell, spec->listdatums) |
| 1063 | { |
| 1064 | Const *val = castNode(Const, lfirst(cell)); |
| 1065 | |
| 1066 | if (!val->constisnull) |
| 1067 | { |
| 1068 | int offset; |
| 1069 | bool equal; |
| 1070 | |
| 1071 | offset = partition_list_bsearch(&key->partsupfunc[0], |
| 1072 | key->partcollation, |
| 1073 | boundinfo, |
| 1074 | val->constvalue, |
| 1075 | &equal); |
| 1076 | if (offset >= 0 && equal) |
| 1077 | { |
| 1078 | overlap = true; |
| 1079 | with = boundinfo->indexes[offset]; |
| 1080 | break; |
| 1081 | } |
| 1082 | } |
| 1083 | else if (partition_bound_accepts_nulls(boundinfo)) |
| 1084 | { |
| 1085 | overlap = true; |
| 1086 | with = boundinfo->null_index; |
| 1087 | break; |
| 1088 | } |
| 1089 | } |
| 1090 | } |
| 1091 | |
| 1092 | break; |
| 1093 | } |
| 1094 | |
| 1095 | case PARTITION_STRATEGY_RANGE: |
| 1096 | { |
| 1097 | PartitionRangeBound *lower, |
| 1098 | *upper; |
| 1099 | |
| 1100 | Assert(spec->strategy == PARTITION_STRATEGY_RANGE); |
| 1101 | lower = make_one_partition_rbound(key, -1, spec->lowerdatums, true); |
| 1102 | upper = make_one_partition_rbound(key, -1, spec->upperdatums, false); |
| 1103 | |
| 1104 | /* |
| 1105 | * First check if the resulting range would be empty with |
| 1106 | * specified lower and upper bounds |
| 1107 | */ |
| 1108 | if (partition_rbound_cmp(key->partnatts, key->partsupfunc, |
| 1109 | key->partcollation, lower->datums, |
| 1110 | lower->kind, true, upper) >= 0) |
| 1111 | { |
| 1112 | ereport(ERROR, |
| 1113 | (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), |
| 1114 | errmsg("empty range bound specified for partition \"%s\"" , |
| 1115 | relname), |
| 1116 | errdetail("Specified lower bound %s is greater than or equal to upper bound %s." , |
| 1117 | get_range_partbound_string(spec->lowerdatums), |
| 1118 | get_range_partbound_string(spec->upperdatums)), |
| 1119 | parser_errposition(pstate, spec->location))); |
| 1120 | } |
| 1121 | |
| 1122 | if (partdesc->nparts > 0) |
| 1123 | { |
| 1124 | int offset; |
| 1125 | bool equal; |
| 1126 | |
| 1127 | Assert(boundinfo && |
| 1128 | boundinfo->strategy == PARTITION_STRATEGY_RANGE && |
| 1129 | (boundinfo->ndatums > 0 || |
| 1130 | partition_bound_has_default(boundinfo))); |
| 1131 | |
| 1132 | /* |
| 1133 | * Test whether the new lower bound (which is treated |
| 1134 | * inclusively as part of the new partition) lies inside |
| 1135 | * an existing partition, or in a gap. |
| 1136 | * |
| 1137 | * If it's inside an existing partition, the bound at |
| 1138 | * offset + 1 will be the upper bound of that partition, |
| 1139 | * and its index will be >= 0. |
| 1140 | * |
| 1141 | * If it's in a gap, the bound at offset + 1 will be the |
| 1142 | * lower bound of the next partition, and its index will |
| 1143 | * be -1. This is also true if there is no next partition, |
| 1144 | * since the index array is initialised with an extra -1 |
| 1145 | * at the end. |
| 1146 | */ |
| 1147 | offset = partition_range_bsearch(key->partnatts, |
| 1148 | key->partsupfunc, |
| 1149 | key->partcollation, |
| 1150 | boundinfo, lower, |
| 1151 | &equal); |
| 1152 | |
| 1153 | if (boundinfo->indexes[offset + 1] < 0) |
| 1154 | { |
| 1155 | /* |
| 1156 | * Check that the new partition will fit in the gap. |
| 1157 | * For it to fit, the new upper bound must be less |
| 1158 | * than or equal to the lower bound of the next |
| 1159 | * partition, if there is one. |
| 1160 | */ |
| 1161 | if (offset + 1 < boundinfo->ndatums) |
| 1162 | { |
| 1163 | int32 cmpval; |
| 1164 | Datum *datums; |
| 1165 | PartitionRangeDatumKind *kind; |
| 1166 | bool is_lower; |
| 1167 | |
| 1168 | datums = boundinfo->datums[offset + 1]; |
| 1169 | kind = boundinfo->kind[offset + 1]; |
| 1170 | is_lower = (boundinfo->indexes[offset + 1] == -1); |
| 1171 | |
| 1172 | cmpval = partition_rbound_cmp(key->partnatts, |
| 1173 | key->partsupfunc, |
| 1174 | key->partcollation, |
| 1175 | datums, kind, |
| 1176 | is_lower, upper); |
| 1177 | if (cmpval < 0) |
| 1178 | { |
| 1179 | /* |
| 1180 | * The new partition overlaps with the |
| 1181 | * existing partition between offset + 1 and |
| 1182 | * offset + 2. |
| 1183 | */ |
| 1184 | overlap = true; |
| 1185 | with = boundinfo->indexes[offset + 2]; |
| 1186 | } |
| 1187 | } |
| 1188 | } |
| 1189 | else |
| 1190 | { |
| 1191 | /* |
| 1192 | * The new partition overlaps with the existing |
| 1193 | * partition between offset and offset + 1. |
| 1194 | */ |
| 1195 | overlap = true; |
| 1196 | with = boundinfo->indexes[offset + 1]; |
| 1197 | } |
| 1198 | } |
| 1199 | |
| 1200 | break; |
| 1201 | } |
| 1202 | |
| 1203 | default: |
| 1204 | elog(ERROR, "unexpected partition strategy: %d" , |
| 1205 | (int) key->strategy); |
| 1206 | } |
| 1207 | |
| 1208 | if (overlap) |
| 1209 | { |
| 1210 | Assert(with >= 0); |
| 1211 | ereport(ERROR, |
| 1212 | (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), |
| 1213 | errmsg("partition \"%s\" would overlap partition \"%s\"" , |
| 1214 | relname, get_rel_name(partdesc->oids[with])), |
| 1215 | parser_errposition(pstate, spec->location))); |
| 1216 | } |
| 1217 | } |
| 1218 | |
| 1219 | /* |
| 1220 | * check_default_partition_contents |
| 1221 | * |
| 1222 | * This function checks if there exists a row in the default partition that |
| 1223 | * would properly belong to the new partition being added. If it finds one, |
| 1224 | * it throws an error. |
| 1225 | */ |
| 1226 | void |
| 1227 | check_default_partition_contents(Relation parent, Relation default_rel, |
| 1228 | PartitionBoundSpec *new_spec) |
| 1229 | { |
| 1230 | List *new_part_constraints; |
| 1231 | List *def_part_constraints; |
| 1232 | List *all_parts; |
| 1233 | ListCell *lc; |
| 1234 | |
| 1235 | new_part_constraints = (new_spec->strategy == PARTITION_STRATEGY_LIST) |
| 1236 | ? get_qual_for_list(parent, new_spec) |
| 1237 | : get_qual_for_range(parent, new_spec, false); |
| 1238 | def_part_constraints = |
| 1239 | get_proposed_default_constraint(new_part_constraints); |
| 1240 | |
| 1241 | /* |
| 1242 | * Map the Vars in the constraint expression from parent's attnos to |
| 1243 | * default_rel's. |
| 1244 | */ |
| 1245 | def_part_constraints = |
| 1246 | map_partition_varattnos(def_part_constraints, 1, default_rel, |
| 1247 | parent, NULL); |
| 1248 | |
| 1249 | /* |
| 1250 | * If the existing constraints on the default partition imply that it will |
| 1251 | * not contain any row that would belong to the new partition, we can |
| 1252 | * avoid scanning the default partition. |
| 1253 | */ |
| 1254 | if (PartConstraintImpliedByRelConstraint(default_rel, def_part_constraints)) |
| 1255 | { |
| 1256 | ereport(DEBUG1, |
| 1257 | (errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints" , |
| 1258 | RelationGetRelationName(default_rel)))); |
| 1259 | return; |
| 1260 | } |
| 1261 | |
| 1262 | /* |
| 1263 | * Scan the default partition and its subpartitions, and check for rows |
| 1264 | * that do not satisfy the revised partition constraints. |
| 1265 | */ |
| 1266 | if (default_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) |
| 1267 | all_parts = find_all_inheritors(RelationGetRelid(default_rel), |
| 1268 | AccessExclusiveLock, NULL); |
| 1269 | else |
| 1270 | all_parts = list_make1_oid(RelationGetRelid(default_rel)); |
| 1271 | |
| 1272 | foreach(lc, all_parts) |
| 1273 | { |
| 1274 | Oid part_relid = lfirst_oid(lc); |
| 1275 | Relation part_rel; |
| 1276 | Expr *partition_constraint; |
| 1277 | EState *estate; |
| 1278 | ExprState *partqualstate = NULL; |
| 1279 | Snapshot snapshot; |
| 1280 | ExprContext *econtext; |
| 1281 | TableScanDesc scan; |
| 1282 | MemoryContext oldCxt; |
| 1283 | TupleTableSlot *tupslot; |
| 1284 | |
| 1285 | /* Lock already taken above. */ |
| 1286 | if (part_relid != RelationGetRelid(default_rel)) |
| 1287 | { |
| 1288 | part_rel = table_open(part_relid, NoLock); |
| 1289 | |
| 1290 | /* |
| 1291 | * Map the Vars in the constraint expression from default_rel's |
| 1292 | * the sub-partition's. |
| 1293 | */ |
| 1294 | partition_constraint = make_ands_explicit(def_part_constraints); |
| 1295 | partition_constraint = (Expr *) |
| 1296 | map_partition_varattnos((List *) partition_constraint, 1, |
| 1297 | part_rel, default_rel, NULL); |
| 1298 | |
| 1299 | /* |
| 1300 | * If the partition constraints on default partition child imply |
| 1301 | * that it will not contain any row that would belong to the new |
| 1302 | * partition, we can avoid scanning the child table. |
| 1303 | */ |
| 1304 | if (PartConstraintImpliedByRelConstraint(part_rel, |
| 1305 | def_part_constraints)) |
| 1306 | { |
| 1307 | ereport(DEBUG1, |
| 1308 | (errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints" , |
| 1309 | RelationGetRelationName(part_rel)))); |
| 1310 | |
| 1311 | table_close(part_rel, NoLock); |
| 1312 | continue; |
| 1313 | } |
| 1314 | } |
| 1315 | else |
| 1316 | { |
| 1317 | part_rel = default_rel; |
| 1318 | partition_constraint = make_ands_explicit(def_part_constraints); |
| 1319 | } |
| 1320 | |
| 1321 | /* |
| 1322 | * Only RELKIND_RELATION relations (i.e. leaf partitions) need to be |
| 1323 | * scanned. |
| 1324 | */ |
| 1325 | if (part_rel->rd_rel->relkind != RELKIND_RELATION) |
| 1326 | { |
| 1327 | if (part_rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) |
| 1328 | ereport(WARNING, |
| 1329 | (errcode(ERRCODE_CHECK_VIOLATION), |
| 1330 | errmsg("skipped scanning foreign table \"%s\" which is a partition of default partition \"%s\"" , |
| 1331 | RelationGetRelationName(part_rel), |
| 1332 | RelationGetRelationName(default_rel)))); |
| 1333 | |
| 1334 | if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel)) |
| 1335 | table_close(part_rel, NoLock); |
| 1336 | |
| 1337 | continue; |
| 1338 | } |
| 1339 | |
| 1340 | estate = CreateExecutorState(); |
| 1341 | |
| 1342 | /* Build expression execution states for partition check quals */ |
| 1343 | partqualstate = ExecPrepareExpr(partition_constraint, estate); |
| 1344 | |
| 1345 | econtext = GetPerTupleExprContext(estate); |
| 1346 | snapshot = RegisterSnapshot(GetLatestSnapshot()); |
| 1347 | tupslot = table_slot_create(part_rel, &estate->es_tupleTable); |
| 1348 | scan = table_beginscan(part_rel, snapshot, 0, NULL); |
| 1349 | |
| 1350 | /* |
| 1351 | * Switch to per-tuple memory context and reset it for each tuple |
| 1352 | * produced, so we don't leak memory. |
| 1353 | */ |
| 1354 | oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); |
| 1355 | |
| 1356 | while (table_scan_getnextslot(scan, ForwardScanDirection, tupslot)) |
| 1357 | { |
| 1358 | econtext->ecxt_scantuple = tupslot; |
| 1359 | |
| 1360 | if (!ExecCheck(partqualstate, econtext)) |
| 1361 | ereport(ERROR, |
| 1362 | (errcode(ERRCODE_CHECK_VIOLATION), |
| 1363 | errmsg("updated partition constraint for default partition \"%s\" would be violated by some row" , |
| 1364 | RelationGetRelationName(default_rel)))); |
| 1365 | |
| 1366 | ResetExprContext(econtext); |
| 1367 | CHECK_FOR_INTERRUPTS(); |
| 1368 | } |
| 1369 | |
| 1370 | MemoryContextSwitchTo(oldCxt); |
| 1371 | table_endscan(scan); |
| 1372 | UnregisterSnapshot(snapshot); |
| 1373 | ExecDropSingleTupleTableSlot(tupslot); |
| 1374 | FreeExecutorState(estate); |
| 1375 | |
| 1376 | if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel)) |
| 1377 | table_close(part_rel, NoLock); /* keep the lock until commit */ |
| 1378 | } |
| 1379 | } |
| 1380 | |
| 1381 | /* |
| 1382 | * get_hash_partition_greatest_modulus |
| 1383 | * |
| 1384 | * Returns the greatest modulus of the hash partition bound. The greatest |
| 1385 | * modulus will be at the end of the datums array because hash partitions are |
| 1386 | * arranged in the ascending order of their moduli and remainders. |
| 1387 | */ |
| 1388 | int |
| 1389 | get_hash_partition_greatest_modulus(PartitionBoundInfo bound) |
| 1390 | { |
| 1391 | Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH); |
| 1392 | Assert(bound->datums && bound->ndatums > 0); |
| 1393 | Assert(DatumGetInt32(bound->datums[bound->ndatums - 1][0]) > 0); |
| 1394 | |
| 1395 | return DatumGetInt32(bound->datums[bound->ndatums - 1][0]); |
| 1396 | } |
| 1397 | |
| 1398 | /* |
| 1399 | * make_one_partition_rbound |
| 1400 | * |
| 1401 | * Return a PartitionRangeBound given a list of PartitionRangeDatum elements |
| 1402 | * and a flag telling whether the bound is lower or not. Made into a function |
| 1403 | * because there are multiple sites that want to use this facility. |
| 1404 | */ |
| 1405 | static PartitionRangeBound * |
| 1406 | make_one_partition_rbound(PartitionKey key, int index, List *datums, bool lower) |
| 1407 | { |
| 1408 | PartitionRangeBound *bound; |
| 1409 | ListCell *lc; |
| 1410 | int i; |
| 1411 | |
| 1412 | Assert(datums != NIL); |
| 1413 | |
| 1414 | bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound)); |
| 1415 | bound->index = index; |
| 1416 | bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum)); |
| 1417 | bound->kind = (PartitionRangeDatumKind *) palloc0(key->partnatts * |
| 1418 | sizeof(PartitionRangeDatumKind)); |
| 1419 | bound->lower = lower; |
| 1420 | |
| 1421 | i = 0; |
| 1422 | foreach(lc, datums) |
| 1423 | { |
| 1424 | PartitionRangeDatum *datum = castNode(PartitionRangeDatum, lfirst(lc)); |
| 1425 | |
| 1426 | /* What's contained in this range datum? */ |
| 1427 | bound->kind[i] = datum->kind; |
| 1428 | |
| 1429 | if (datum->kind == PARTITION_RANGE_DATUM_VALUE) |
| 1430 | { |
| 1431 | Const *val = castNode(Const, datum->value); |
| 1432 | |
| 1433 | if (val->constisnull) |
| 1434 | elog(ERROR, "invalid range bound datum" ); |
| 1435 | bound->datums[i] = val->constvalue; |
| 1436 | } |
| 1437 | |
| 1438 | i++; |
| 1439 | } |
| 1440 | |
| 1441 | return bound; |
| 1442 | } |
| 1443 | |
| 1444 | /* |
| 1445 | * partition_rbound_cmp |
| 1446 | * |
| 1447 | * Return for two range bounds whether the 1st one (specified in datums1, |
| 1448 | * kind1, and lower1) is <, =, or > the bound specified in *b2. |
| 1449 | * |
| 1450 | * partnatts, partsupfunc and partcollation give the number of attributes in the |
| 1451 | * bounds to be compared, comparison function to be used and the collations of |
| 1452 | * attributes, respectively. |
| 1453 | * |
| 1454 | * Note that if the values of the two range bounds compare equal, then we take |
| 1455 | * into account whether they are upper or lower bounds, and an upper bound is |
| 1456 | * considered to be smaller than a lower bound. This is important to the way |
| 1457 | * that RelationBuildPartitionDesc() builds the PartitionBoundInfoData |
| 1458 | * structure, which only stores the upper bound of a common boundary between |
| 1459 | * two contiguous partitions. |
| 1460 | */ |
| 1461 | static int32 |
| 1462 | partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc, |
| 1463 | Oid *partcollation, |
| 1464 | Datum *datums1, PartitionRangeDatumKind *kind1, |
| 1465 | bool lower1, PartitionRangeBound *b2) |
| 1466 | { |
| 1467 | int32 cmpval = 0; /* placate compiler */ |
| 1468 | int i; |
| 1469 | Datum *datums2 = b2->datums; |
| 1470 | PartitionRangeDatumKind *kind2 = b2->kind; |
| 1471 | bool lower2 = b2->lower; |
| 1472 | |
| 1473 | for (i = 0; i < partnatts; i++) |
| 1474 | { |
| 1475 | /* |
| 1476 | * First, handle cases where the column is unbounded, which should not |
| 1477 | * invoke the comparison procedure, and should not consider any later |
| 1478 | * columns. Note that the PartitionRangeDatumKind enum elements |
| 1479 | * compare the same way as the values they represent. |
| 1480 | */ |
| 1481 | if (kind1[i] < kind2[i]) |
| 1482 | return -1; |
| 1483 | else if (kind1[i] > kind2[i]) |
| 1484 | return 1; |
| 1485 | else if (kind1[i] != PARTITION_RANGE_DATUM_VALUE) |
| 1486 | |
| 1487 | /* |
| 1488 | * The column bounds are both MINVALUE or both MAXVALUE. No later |
| 1489 | * columns should be considered, but we still need to compare |
| 1490 | * whether they are upper or lower bounds. |
| 1491 | */ |
| 1492 | break; |
| 1493 | |
| 1494 | cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i], |
| 1495 | partcollation[i], |
| 1496 | datums1[i], |
| 1497 | datums2[i])); |
| 1498 | if (cmpval != 0) |
| 1499 | break; |
| 1500 | } |
| 1501 | |
| 1502 | /* |
| 1503 | * If the comparison is anything other than equal, we're done. If they |
| 1504 | * compare equal though, we still have to consider whether the boundaries |
| 1505 | * are inclusive or exclusive. Exclusive one is considered smaller of the |
| 1506 | * two. |
| 1507 | */ |
| 1508 | if (cmpval == 0 && lower1 != lower2) |
| 1509 | cmpval = lower1 ? 1 : -1; |
| 1510 | |
| 1511 | return cmpval; |
| 1512 | } |
| 1513 | |
| 1514 | /* |
| 1515 | * partition_rbound_datum_cmp |
| 1516 | * |
| 1517 | * Return whether range bound (specified in rb_datums, rb_kind, and rb_lower) |
| 1518 | * is <, =, or > partition key of tuple (tuple_datums) |
| 1519 | * |
| 1520 | * n_tuple_datums, partsupfunc and partcollation give number of attributes in |
| 1521 | * the bounds to be compared, comparison function to be used and the collations |
| 1522 | * of attributes resp. |
| 1523 | * |
| 1524 | */ |
| 1525 | int32 |
| 1526 | partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation, |
| 1527 | Datum *rb_datums, PartitionRangeDatumKind *rb_kind, |
| 1528 | Datum *tuple_datums, int n_tuple_datums) |
| 1529 | { |
| 1530 | int i; |
| 1531 | int32 cmpval = -1; |
| 1532 | |
| 1533 | for (i = 0; i < n_tuple_datums; i++) |
| 1534 | { |
| 1535 | if (rb_kind[i] == PARTITION_RANGE_DATUM_MINVALUE) |
| 1536 | return -1; |
| 1537 | else if (rb_kind[i] == PARTITION_RANGE_DATUM_MAXVALUE) |
| 1538 | return 1; |
| 1539 | |
| 1540 | cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i], |
| 1541 | partcollation[i], |
| 1542 | rb_datums[i], |
| 1543 | tuple_datums[i])); |
| 1544 | if (cmpval != 0) |
| 1545 | break; |
| 1546 | } |
| 1547 | |
| 1548 | return cmpval; |
| 1549 | } |
| 1550 | |
| 1551 | /* |
| 1552 | * partition_hbound_cmp |
| 1553 | * |
| 1554 | * Compares modulus first, then remainder if modulus is equal. |
| 1555 | */ |
| 1556 | static int32 |
| 1557 | partition_hbound_cmp(int modulus1, int remainder1, int modulus2, int remainder2) |
| 1558 | { |
| 1559 | if (modulus1 < modulus2) |
| 1560 | return -1; |
| 1561 | if (modulus1 > modulus2) |
| 1562 | return 1; |
| 1563 | if (modulus1 == modulus2 && remainder1 != remainder2) |
| 1564 | return (remainder1 > remainder2) ? 1 : -1; |
| 1565 | return 0; |
| 1566 | } |
| 1567 | |
| 1568 | /* |
| 1569 | * partition_list_bsearch |
| 1570 | * Returns the index of the greatest bound datum that is less than equal |
| 1571 | * to the given value or -1 if all of the bound datums are greater |
| 1572 | * |
| 1573 | * *is_equal is set to true if the bound datum at the returned index is equal |
| 1574 | * to the input value. |
| 1575 | */ |
| 1576 | int |
| 1577 | partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, |
| 1578 | PartitionBoundInfo boundinfo, |
| 1579 | Datum value, bool *is_equal) |
| 1580 | { |
| 1581 | int lo, |
| 1582 | hi, |
| 1583 | mid; |
| 1584 | |
| 1585 | lo = -1; |
| 1586 | hi = boundinfo->ndatums - 1; |
| 1587 | while (lo < hi) |
| 1588 | { |
| 1589 | int32 cmpval; |
| 1590 | |
| 1591 | mid = (lo + hi + 1) / 2; |
| 1592 | cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0], |
| 1593 | partcollation[0], |
| 1594 | boundinfo->datums[mid][0], |
| 1595 | value)); |
| 1596 | if (cmpval <= 0) |
| 1597 | { |
| 1598 | lo = mid; |
| 1599 | *is_equal = (cmpval == 0); |
| 1600 | if (*is_equal) |
| 1601 | break; |
| 1602 | } |
| 1603 | else |
| 1604 | hi = mid - 1; |
| 1605 | } |
| 1606 | |
| 1607 | return lo; |
| 1608 | } |
| 1609 | |
| 1610 | /* |
| 1611 | * partition_range_bsearch |
| 1612 | * Returns the index of the greatest range bound that is less than or |
| 1613 | * equal to the given range bound or -1 if all of the range bounds are |
| 1614 | * greater |
| 1615 | * |
| 1616 | * *is_equal is set to true if the range bound at the returned index is equal |
| 1617 | * to the input range bound |
| 1618 | */ |
| 1619 | static int |
| 1620 | partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc, |
| 1621 | Oid *partcollation, |
| 1622 | PartitionBoundInfo boundinfo, |
| 1623 | PartitionRangeBound *probe, bool *is_equal) |
| 1624 | { |
| 1625 | int lo, |
| 1626 | hi, |
| 1627 | mid; |
| 1628 | |
| 1629 | lo = -1; |
| 1630 | hi = boundinfo->ndatums - 1; |
| 1631 | while (lo < hi) |
| 1632 | { |
| 1633 | int32 cmpval; |
| 1634 | |
| 1635 | mid = (lo + hi + 1) / 2; |
| 1636 | cmpval = partition_rbound_cmp(partnatts, partsupfunc, |
| 1637 | partcollation, |
| 1638 | boundinfo->datums[mid], |
| 1639 | boundinfo->kind[mid], |
| 1640 | (boundinfo->indexes[mid] == -1), |
| 1641 | probe); |
| 1642 | if (cmpval <= 0) |
| 1643 | { |
| 1644 | lo = mid; |
| 1645 | *is_equal = (cmpval == 0); |
| 1646 | |
| 1647 | if (*is_equal) |
| 1648 | break; |
| 1649 | } |
| 1650 | else |
| 1651 | hi = mid - 1; |
| 1652 | } |
| 1653 | |
| 1654 | return lo; |
| 1655 | } |
| 1656 | |
| 1657 | /* |
| 1658 | * partition_range_bsearch |
| 1659 | * Returns the index of the greatest range bound that is less than or |
| 1660 | * equal to the given tuple or -1 if all of the range bounds are greater |
| 1661 | * |
| 1662 | * *is_equal is set to true if the range bound at the returned index is equal |
| 1663 | * to the input tuple. |
| 1664 | */ |
| 1665 | int |
| 1666 | partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, |
| 1667 | PartitionBoundInfo boundinfo, |
| 1668 | int nvalues, Datum *values, bool *is_equal) |
| 1669 | { |
| 1670 | int lo, |
| 1671 | hi, |
| 1672 | mid; |
| 1673 | |
| 1674 | lo = -1; |
| 1675 | hi = boundinfo->ndatums - 1; |
| 1676 | while (lo < hi) |
| 1677 | { |
| 1678 | int32 cmpval; |
| 1679 | |
| 1680 | mid = (lo + hi + 1) / 2; |
| 1681 | cmpval = partition_rbound_datum_cmp(partsupfunc, |
| 1682 | partcollation, |
| 1683 | boundinfo->datums[mid], |
| 1684 | boundinfo->kind[mid], |
| 1685 | values, |
| 1686 | nvalues); |
| 1687 | if (cmpval <= 0) |
| 1688 | { |
| 1689 | lo = mid; |
| 1690 | *is_equal = (cmpval == 0); |
| 1691 | |
| 1692 | if (*is_equal) |
| 1693 | break; |
| 1694 | } |
| 1695 | else |
| 1696 | hi = mid - 1; |
| 1697 | } |
| 1698 | |
| 1699 | return lo; |
| 1700 | } |
| 1701 | |
| 1702 | /* |
| 1703 | * partition_hash_bsearch |
| 1704 | * Returns the index of the greatest (modulus, remainder) pair that is |
| 1705 | * less than or equal to the given (modulus, remainder) pair or -1 if |
| 1706 | * all of them are greater |
| 1707 | */ |
| 1708 | int |
| 1709 | partition_hash_bsearch(PartitionBoundInfo boundinfo, |
| 1710 | int modulus, int remainder) |
| 1711 | { |
| 1712 | int lo, |
| 1713 | hi, |
| 1714 | mid; |
| 1715 | |
| 1716 | lo = -1; |
| 1717 | hi = boundinfo->ndatums - 1; |
| 1718 | while (lo < hi) |
| 1719 | { |
| 1720 | int32 cmpval, |
| 1721 | bound_modulus, |
| 1722 | bound_remainder; |
| 1723 | |
| 1724 | mid = (lo + hi + 1) / 2; |
| 1725 | bound_modulus = DatumGetInt32(boundinfo->datums[mid][0]); |
| 1726 | bound_remainder = DatumGetInt32(boundinfo->datums[mid][1]); |
| 1727 | cmpval = partition_hbound_cmp(bound_modulus, bound_remainder, |
| 1728 | modulus, remainder); |
| 1729 | if (cmpval <= 0) |
| 1730 | { |
| 1731 | lo = mid; |
| 1732 | |
| 1733 | if (cmpval == 0) |
| 1734 | break; |
| 1735 | } |
| 1736 | else |
| 1737 | hi = mid - 1; |
| 1738 | } |
| 1739 | |
| 1740 | return lo; |
| 1741 | } |
| 1742 | |
| 1743 | /* |
| 1744 | * qsort_partition_hbound_cmp |
| 1745 | * |
| 1746 | * Hash bounds are sorted by modulus, then by remainder. |
| 1747 | */ |
| 1748 | static int32 |
| 1749 | qsort_partition_hbound_cmp(const void *a, const void *b) |
| 1750 | { |
| 1751 | PartitionHashBound *h1 = (*(PartitionHashBound *const *) a); |
| 1752 | PartitionHashBound *h2 = (*(PartitionHashBound *const *) b); |
| 1753 | |
| 1754 | return partition_hbound_cmp(h1->modulus, h1->remainder, |
| 1755 | h2->modulus, h2->remainder); |
| 1756 | } |
| 1757 | |
| 1758 | /* |
| 1759 | * qsort_partition_list_value_cmp |
| 1760 | * |
| 1761 | * Compare two list partition bound datums. |
| 1762 | */ |
| 1763 | static int32 |
| 1764 | qsort_partition_list_value_cmp(const void *a, const void *b, void *arg) |
| 1765 | { |
| 1766 | Datum val1 = (*(PartitionListValue *const *) a)->value, |
| 1767 | val2 = (*(PartitionListValue *const *) b)->value; |
| 1768 | PartitionKey key = (PartitionKey) arg; |
| 1769 | |
| 1770 | return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0], |
| 1771 | key->partcollation[0], |
| 1772 | val1, val2)); |
| 1773 | } |
| 1774 | |
| 1775 | /* |
| 1776 | * qsort_partition_rbound_cmp |
| 1777 | * |
| 1778 | * Used when sorting range bounds across all range partitions. |
| 1779 | */ |
| 1780 | static int32 |
| 1781 | qsort_partition_rbound_cmp(const void *a, const void *b, void *arg) |
| 1782 | { |
| 1783 | PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a); |
| 1784 | PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b); |
| 1785 | PartitionKey key = (PartitionKey) arg; |
| 1786 | |
| 1787 | return partition_rbound_cmp(key->partnatts, key->partsupfunc, |
| 1788 | key->partcollation, b1->datums, b1->kind, |
| 1789 | b1->lower, b2); |
| 1790 | } |
| 1791 | |
| 1792 | /* |
| 1793 | * get_partition_bound_num_indexes |
| 1794 | * |
| 1795 | * Returns the number of the entries in the partition bound indexes array. |
| 1796 | */ |
| 1797 | static int |
| 1798 | get_partition_bound_num_indexes(PartitionBoundInfo bound) |
| 1799 | { |
| 1800 | int num_indexes; |
| 1801 | |
| 1802 | Assert(bound); |
| 1803 | |
| 1804 | switch (bound->strategy) |
| 1805 | { |
| 1806 | case PARTITION_STRATEGY_HASH: |
| 1807 | |
| 1808 | /* |
| 1809 | * The number of the entries in the indexes array is same as the |
| 1810 | * greatest modulus. |
| 1811 | */ |
| 1812 | num_indexes = get_hash_partition_greatest_modulus(bound); |
| 1813 | break; |
| 1814 | |
| 1815 | case PARTITION_STRATEGY_LIST: |
| 1816 | num_indexes = bound->ndatums; |
| 1817 | break; |
| 1818 | |
| 1819 | case PARTITION_STRATEGY_RANGE: |
| 1820 | /* Range partitioned table has an extra index. */ |
| 1821 | num_indexes = bound->ndatums + 1; |
| 1822 | break; |
| 1823 | |
| 1824 | default: |
| 1825 | elog(ERROR, "unexpected partition strategy: %d" , |
| 1826 | (int) bound->strategy); |
| 1827 | } |
| 1828 | |
| 1829 | return num_indexes; |
| 1830 | } |
| 1831 | |
| 1832 | /* |
| 1833 | * get_partition_operator |
| 1834 | * |
| 1835 | * Return oid of the operator of the given strategy for the given partition |
| 1836 | * key column. It is assumed that the partitioning key is of the same type as |
| 1837 | * the chosen partitioning opclass, or at least binary-compatible. In the |
| 1838 | * latter case, *need_relabel is set to true if the opclass is not of a |
| 1839 | * polymorphic type (indicating a RelabelType node needed on top), otherwise |
| 1840 | * false. |
| 1841 | */ |
| 1842 | static Oid |
| 1843 | get_partition_operator(PartitionKey key, int col, StrategyNumber strategy, |
| 1844 | bool *need_relabel) |
| 1845 | { |
| 1846 | Oid operoid; |
| 1847 | |
| 1848 | /* |
| 1849 | * Get the operator in the partitioning opfamily using the opclass' |
| 1850 | * declared input type as both left- and righttype. |
| 1851 | */ |
| 1852 | operoid = get_opfamily_member(key->partopfamily[col], |
| 1853 | key->partopcintype[col], |
| 1854 | key->partopcintype[col], |
| 1855 | strategy); |
| 1856 | if (!OidIsValid(operoid)) |
| 1857 | elog(ERROR, "missing operator %d(%u,%u) in partition opfamily %u" , |
| 1858 | strategy, key->partopcintype[col], key->partopcintype[col], |
| 1859 | key->partopfamily[col]); |
| 1860 | |
| 1861 | /* |
| 1862 | * If the partition key column is not of the same type as the operator |
| 1863 | * class and not polymorphic, tell caller to wrap the non-Const expression |
| 1864 | * in a RelabelType. This matches what parse_coerce.c does. |
| 1865 | */ |
| 1866 | *need_relabel = (key->parttypid[col] != key->partopcintype[col] && |
| 1867 | key->partopcintype[col] != RECORDOID && |
| 1868 | !IsPolymorphicType(key->partopcintype[col])); |
| 1869 | |
| 1870 | return operoid; |
| 1871 | } |
| 1872 | |
| 1873 | /* |
| 1874 | * make_partition_op_expr |
| 1875 | * Returns an Expr for the given partition key column with arg1 and |
| 1876 | * arg2 as its leftop and rightop, respectively |
| 1877 | */ |
| 1878 | static Expr * |
| 1879 | make_partition_op_expr(PartitionKey key, int keynum, |
| 1880 | uint16 strategy, Expr *arg1, Expr *arg2) |
| 1881 | { |
| 1882 | Oid operoid; |
| 1883 | bool need_relabel = false; |
| 1884 | Expr *result = NULL; |
| 1885 | |
| 1886 | /* Get the correct btree operator for this partitioning column */ |
| 1887 | operoid = get_partition_operator(key, keynum, strategy, &need_relabel); |
| 1888 | |
| 1889 | /* |
| 1890 | * Chosen operator may be such that the non-Const operand needs to be |
| 1891 | * coerced, so apply the same; see the comment in |
| 1892 | * get_partition_operator(). |
| 1893 | */ |
| 1894 | if (!IsA(arg1, Const) && |
| 1895 | (need_relabel || |
| 1896 | key->partcollation[keynum] != key->parttypcoll[keynum])) |
| 1897 | arg1 = (Expr *) makeRelabelType(arg1, |
| 1898 | key->partopcintype[keynum], |
| 1899 | -1, |
| 1900 | key->partcollation[keynum], |
| 1901 | COERCE_EXPLICIT_CAST); |
| 1902 | |
| 1903 | /* Generate the actual expression */ |
| 1904 | switch (key->strategy) |
| 1905 | { |
| 1906 | case PARTITION_STRATEGY_LIST: |
| 1907 | { |
| 1908 | List *elems = (List *) arg2; |
| 1909 | int nelems = list_length(elems); |
| 1910 | |
| 1911 | Assert(nelems >= 1); |
| 1912 | Assert(keynum == 0); |
| 1913 | |
| 1914 | if (nelems > 1 && |
| 1915 | !type_is_array(key->parttypid[keynum])) |
| 1916 | { |
| 1917 | ArrayExpr *arrexpr; |
| 1918 | ScalarArrayOpExpr *saopexpr; |
| 1919 | |
| 1920 | /* Construct an ArrayExpr for the right-hand inputs */ |
| 1921 | arrexpr = makeNode(ArrayExpr); |
| 1922 | arrexpr->array_typeid = |
| 1923 | get_array_type(key->parttypid[keynum]); |
| 1924 | arrexpr->array_collid = key->parttypcoll[keynum]; |
| 1925 | arrexpr->element_typeid = key->parttypid[keynum]; |
| 1926 | arrexpr->elements = elems; |
| 1927 | arrexpr->multidims = false; |
| 1928 | arrexpr->location = -1; |
| 1929 | |
| 1930 | /* Build leftop = ANY (rightop) */ |
| 1931 | saopexpr = makeNode(ScalarArrayOpExpr); |
| 1932 | saopexpr->opno = operoid; |
| 1933 | saopexpr->opfuncid = get_opcode(operoid); |
| 1934 | saopexpr->useOr = true; |
| 1935 | saopexpr->inputcollid = key->partcollation[keynum]; |
| 1936 | saopexpr->args = list_make2(arg1, arrexpr); |
| 1937 | saopexpr->location = -1; |
| 1938 | |
| 1939 | result = (Expr *) saopexpr; |
| 1940 | } |
| 1941 | else |
| 1942 | { |
| 1943 | List *elemops = NIL; |
| 1944 | ListCell *lc; |
| 1945 | |
| 1946 | foreach(lc, elems) |
| 1947 | { |
| 1948 | Expr *elem = lfirst(lc), |
| 1949 | *elemop; |
| 1950 | |
| 1951 | elemop = make_opclause(operoid, |
| 1952 | BOOLOID, |
| 1953 | false, |
| 1954 | arg1, elem, |
| 1955 | InvalidOid, |
| 1956 | key->partcollation[keynum]); |
| 1957 | elemops = lappend(elemops, elemop); |
| 1958 | } |
| 1959 | |
| 1960 | result = nelems > 1 ? makeBoolExpr(OR_EXPR, elemops, -1) : linitial(elemops); |
| 1961 | } |
| 1962 | break; |
| 1963 | } |
| 1964 | |
| 1965 | case PARTITION_STRATEGY_RANGE: |
| 1966 | result = make_opclause(operoid, |
| 1967 | BOOLOID, |
| 1968 | false, |
| 1969 | arg1, arg2, |
| 1970 | InvalidOid, |
| 1971 | key->partcollation[keynum]); |
| 1972 | break; |
| 1973 | |
| 1974 | default: |
| 1975 | elog(ERROR, "invalid partitioning strategy" ); |
| 1976 | break; |
| 1977 | } |
| 1978 | |
| 1979 | return result; |
| 1980 | } |
| 1981 | |
| 1982 | /* |
| 1983 | * get_qual_for_hash |
| 1984 | * |
| 1985 | * Returns a CHECK constraint expression to use as a hash partition's |
| 1986 | * constraint, given the parent relation and partition bound structure. |
| 1987 | * |
| 1988 | * The partition constraint for a hash partition is always a call to the |
| 1989 | * built-in function satisfies_hash_partition(). |
| 1990 | */ |
| 1991 | static List * |
| 1992 | get_qual_for_hash(Relation parent, PartitionBoundSpec *spec) |
| 1993 | { |
| 1994 | PartitionKey key = RelationGetPartitionKey(parent); |
| 1995 | FuncExpr *fexpr; |
| 1996 | Node *relidConst; |
| 1997 | Node *modulusConst; |
| 1998 | Node *remainderConst; |
| 1999 | List *args; |
| 2000 | ListCell *partexprs_item; |
| 2001 | int i; |
| 2002 | |
| 2003 | /* Fixed arguments. */ |
| 2004 | relidConst = (Node *) makeConst(OIDOID, |
| 2005 | -1, |
| 2006 | InvalidOid, |
| 2007 | sizeof(Oid), |
| 2008 | ObjectIdGetDatum(RelationGetRelid(parent)), |
| 2009 | false, |
| 2010 | true); |
| 2011 | |
| 2012 | modulusConst = (Node *) makeConst(INT4OID, |
| 2013 | -1, |
| 2014 | InvalidOid, |
| 2015 | sizeof(int32), |
| 2016 | Int32GetDatum(spec->modulus), |
| 2017 | false, |
| 2018 | true); |
| 2019 | |
| 2020 | remainderConst = (Node *) makeConst(INT4OID, |
| 2021 | -1, |
| 2022 | InvalidOid, |
| 2023 | sizeof(int32), |
| 2024 | Int32GetDatum(spec->remainder), |
| 2025 | false, |
| 2026 | true); |
| 2027 | |
| 2028 | args = list_make3(relidConst, modulusConst, remainderConst); |
| 2029 | partexprs_item = list_head(key->partexprs); |
| 2030 | |
| 2031 | /* Add an argument for each key column. */ |
| 2032 | for (i = 0; i < key->partnatts; i++) |
| 2033 | { |
| 2034 | Node *keyCol; |
| 2035 | |
| 2036 | /* Left operand */ |
| 2037 | if (key->partattrs[i] != 0) |
| 2038 | { |
| 2039 | keyCol = (Node *) makeVar(1, |
| 2040 | key->partattrs[i], |
| 2041 | key->parttypid[i], |
| 2042 | key->parttypmod[i], |
| 2043 | key->parttypcoll[i], |
| 2044 | 0); |
| 2045 | } |
| 2046 | else |
| 2047 | { |
| 2048 | keyCol = (Node *) copyObject(lfirst(partexprs_item)); |
| 2049 | partexprs_item = lnext(partexprs_item); |
| 2050 | } |
| 2051 | |
| 2052 | args = lappend(args, keyCol); |
| 2053 | } |
| 2054 | |
| 2055 | fexpr = makeFuncExpr(F_SATISFIES_HASH_PARTITION, |
| 2056 | BOOLOID, |
| 2057 | args, |
| 2058 | InvalidOid, |
| 2059 | InvalidOid, |
| 2060 | COERCE_EXPLICIT_CALL); |
| 2061 | |
| 2062 | return list_make1(fexpr); |
| 2063 | } |
| 2064 | |
| 2065 | /* |
| 2066 | * get_qual_for_list |
| 2067 | * |
| 2068 | * Returns an implicit-AND list of expressions to use as a list partition's |
| 2069 | * constraint, given the parent relation and partition bound structure. |
| 2070 | * |
| 2071 | * The function returns NIL for a default partition when it's the only |
| 2072 | * partition since in that case there is no constraint. |
| 2073 | */ |
| 2074 | static List * |
| 2075 | get_qual_for_list(Relation parent, PartitionBoundSpec *spec) |
| 2076 | { |
| 2077 | PartitionKey key = RelationGetPartitionKey(parent); |
| 2078 | List *result; |
| 2079 | Expr *keyCol; |
| 2080 | Expr *opexpr; |
| 2081 | NullTest *nulltest; |
| 2082 | ListCell *cell; |
| 2083 | List *elems = NIL; |
| 2084 | bool list_has_null = false; |
| 2085 | |
| 2086 | /* |
| 2087 | * Only single-column list partitioning is supported, so we are worried |
| 2088 | * only about the partition key with index 0. |
| 2089 | */ |
| 2090 | Assert(key->partnatts == 1); |
| 2091 | |
| 2092 | /* Construct Var or expression representing the partition column */ |
| 2093 | if (key->partattrs[0] != 0) |
| 2094 | keyCol = (Expr *) makeVar(1, |
| 2095 | key->partattrs[0], |
| 2096 | key->parttypid[0], |
| 2097 | key->parttypmod[0], |
| 2098 | key->parttypcoll[0], |
| 2099 | 0); |
| 2100 | else |
| 2101 | keyCol = (Expr *) copyObject(linitial(key->partexprs)); |
| 2102 | |
| 2103 | /* |
| 2104 | * For default list partition, collect datums for all the partitions. The |
| 2105 | * default partition constraint should check that the partition key is |
| 2106 | * equal to none of those. |
| 2107 | */ |
| 2108 | if (spec->is_default) |
| 2109 | { |
| 2110 | int i; |
| 2111 | int ndatums = 0; |
| 2112 | PartitionDesc pdesc = RelationGetPartitionDesc(parent); |
| 2113 | PartitionBoundInfo boundinfo = pdesc->boundinfo; |
| 2114 | |
| 2115 | if (boundinfo) |
| 2116 | { |
| 2117 | ndatums = boundinfo->ndatums; |
| 2118 | |
| 2119 | if (partition_bound_accepts_nulls(boundinfo)) |
| 2120 | list_has_null = true; |
| 2121 | } |
| 2122 | |
| 2123 | /* |
| 2124 | * If default is the only partition, there need not be any partition |
| 2125 | * constraint on it. |
| 2126 | */ |
| 2127 | if (ndatums == 0 && !list_has_null) |
| 2128 | return NIL; |
| 2129 | |
| 2130 | for (i = 0; i < ndatums; i++) |
| 2131 | { |
| 2132 | Const *val; |
| 2133 | |
| 2134 | /* |
| 2135 | * Construct Const from known-not-null datum. We must be careful |
| 2136 | * to copy the value, because our result has to be able to outlive |
| 2137 | * the relcache entry we're copying from. |
| 2138 | */ |
| 2139 | val = makeConst(key->parttypid[0], |
| 2140 | key->parttypmod[0], |
| 2141 | key->parttypcoll[0], |
| 2142 | key->parttyplen[0], |
| 2143 | datumCopy(*boundinfo->datums[i], |
| 2144 | key->parttypbyval[0], |
| 2145 | key->parttyplen[0]), |
| 2146 | false, /* isnull */ |
| 2147 | key->parttypbyval[0]); |
| 2148 | |
| 2149 | elems = lappend(elems, val); |
| 2150 | } |
| 2151 | } |
| 2152 | else |
| 2153 | { |
| 2154 | /* |
| 2155 | * Create list of Consts for the allowed values, excluding any nulls. |
| 2156 | */ |
| 2157 | foreach(cell, spec->listdatums) |
| 2158 | { |
| 2159 | Const *val = castNode(Const, lfirst(cell)); |
| 2160 | |
| 2161 | if (val->constisnull) |
| 2162 | list_has_null = true; |
| 2163 | else |
| 2164 | elems = lappend(elems, copyObject(val)); |
| 2165 | } |
| 2166 | } |
| 2167 | |
| 2168 | if (elems) |
| 2169 | { |
| 2170 | /* |
| 2171 | * Generate the operator expression from the non-null partition |
| 2172 | * values. |
| 2173 | */ |
| 2174 | opexpr = make_partition_op_expr(key, 0, BTEqualStrategyNumber, |
| 2175 | keyCol, (Expr *) elems); |
| 2176 | } |
| 2177 | else |
| 2178 | { |
| 2179 | /* |
| 2180 | * If there are no partition values, we don't need an operator |
| 2181 | * expression. |
| 2182 | */ |
| 2183 | opexpr = NULL; |
| 2184 | } |
| 2185 | |
| 2186 | if (!list_has_null) |
| 2187 | { |
| 2188 | /* |
| 2189 | * Gin up a "col IS NOT NULL" test that will be AND'd with the main |
| 2190 | * expression. This might seem redundant, but the partition routing |
| 2191 | * machinery needs it. |
| 2192 | */ |
| 2193 | nulltest = makeNode(NullTest); |
| 2194 | nulltest->arg = keyCol; |
| 2195 | nulltest->nulltesttype = IS_NOT_NULL; |
| 2196 | nulltest->argisrow = false; |
| 2197 | nulltest->location = -1; |
| 2198 | |
| 2199 | result = opexpr ? list_make2(nulltest, opexpr) : list_make1(nulltest); |
| 2200 | } |
| 2201 | else |
| 2202 | { |
| 2203 | /* |
| 2204 | * Gin up a "col IS NULL" test that will be OR'd with the main |
| 2205 | * expression. |
| 2206 | */ |
| 2207 | nulltest = makeNode(NullTest); |
| 2208 | nulltest->arg = keyCol; |
| 2209 | nulltest->nulltesttype = IS_NULL; |
| 2210 | nulltest->argisrow = false; |
| 2211 | nulltest->location = -1; |
| 2212 | |
| 2213 | if (opexpr) |
| 2214 | { |
| 2215 | Expr *or; |
| 2216 | |
| 2217 | or = makeBoolExpr(OR_EXPR, list_make2(nulltest, opexpr), -1); |
| 2218 | result = list_make1(or); |
| 2219 | } |
| 2220 | else |
| 2221 | result = list_make1(nulltest); |
| 2222 | } |
| 2223 | |
| 2224 | /* |
| 2225 | * Note that, in general, applying NOT to a constraint expression doesn't |
| 2226 | * necessarily invert the set of rows it accepts, because NOT (NULL) is |
| 2227 | * NULL. However, the partition constraints we construct here never |
| 2228 | * evaluate to NULL, so applying NOT works as intended. |
| 2229 | */ |
| 2230 | if (spec->is_default) |
| 2231 | { |
| 2232 | result = list_make1(make_ands_explicit(result)); |
| 2233 | result = list_make1(makeBoolExpr(NOT_EXPR, result, -1)); |
| 2234 | } |
| 2235 | |
| 2236 | return result; |
| 2237 | } |
| 2238 | |
| 2239 | /* |
| 2240 | * get_qual_for_range |
| 2241 | * |
| 2242 | * Returns an implicit-AND list of expressions to use as a range partition's |
| 2243 | * constraint, given the parent relation and partition bound structure. |
| 2244 | * |
| 2245 | * For a multi-column range partition key, say (a, b, c), with (al, bl, cl) |
| 2246 | * as the lower bound tuple and (au, bu, cu) as the upper bound tuple, we |
| 2247 | * generate an expression tree of the following form: |
| 2248 | * |
| 2249 | * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL) |
| 2250 | * AND |
| 2251 | * (a > al OR (a = al AND b > bl) OR (a = al AND b = bl AND c >= cl)) |
| 2252 | * AND |
| 2253 | * (a < au OR (a = au AND b < bu) OR (a = au AND b = bu AND c < cu)) |
| 2254 | * |
| 2255 | * It is often the case that a prefix of lower and upper bound tuples contains |
| 2256 | * the same values, for example, (al = au), in which case, we will emit an |
| 2257 | * expression tree of the following form: |
| 2258 | * |
| 2259 | * (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL) |
| 2260 | * AND |
| 2261 | * (a = al) |
| 2262 | * AND |
| 2263 | * (b > bl OR (b = bl AND c >= cl)) |
| 2264 | * AND |
| 2265 | * (b < bu) OR (b = bu AND c < cu)) |
| 2266 | * |
| 2267 | * If a bound datum is either MINVALUE or MAXVALUE, these expressions are |
| 2268 | * simplified using the fact that any value is greater than MINVALUE and less |
| 2269 | * than MAXVALUE. So, for example, if cu = MAXVALUE, c < cu is automatically |
| 2270 | * true, and we need not emit any expression for it, and the last line becomes |
| 2271 | * |
| 2272 | * (b < bu) OR (b = bu), which is simplified to (b <= bu) |
| 2273 | * |
| 2274 | * In most common cases with only one partition column, say a, the following |
| 2275 | * expression tree will be generated: a IS NOT NULL AND a >= al AND a < au |
| 2276 | * |
| 2277 | * For default partition, it returns the negation of the constraints of all |
| 2278 | * the other partitions. |
| 2279 | * |
| 2280 | * External callers should pass for_default as false; we set it to true only |
| 2281 | * when recursing. |
| 2282 | */ |
| 2283 | static List * |
| 2284 | get_qual_for_range(Relation parent, PartitionBoundSpec *spec, |
| 2285 | bool for_default) |
| 2286 | { |
| 2287 | List *result = NIL; |
| 2288 | ListCell *cell1, |
| 2289 | *cell2, |
| 2290 | *partexprs_item, |
| 2291 | *partexprs_item_saved; |
| 2292 | int i, |
| 2293 | j; |
| 2294 | PartitionRangeDatum *ldatum, |
| 2295 | *udatum; |
| 2296 | PartitionKey key = RelationGetPartitionKey(parent); |
| 2297 | Expr *keyCol; |
| 2298 | Const *lower_val, |
| 2299 | *upper_val; |
| 2300 | List *lower_or_arms, |
| 2301 | *upper_or_arms; |
| 2302 | int num_or_arms, |
| 2303 | current_or_arm; |
| 2304 | ListCell *lower_or_start_datum, |
| 2305 | *upper_or_start_datum; |
| 2306 | bool need_next_lower_arm, |
| 2307 | need_next_upper_arm; |
| 2308 | |
| 2309 | if (spec->is_default) |
| 2310 | { |
| 2311 | List *or_expr_args = NIL; |
| 2312 | PartitionDesc pdesc = RelationGetPartitionDesc(parent); |
| 2313 | Oid *inhoids = pdesc->oids; |
| 2314 | int nparts = pdesc->nparts, |
| 2315 | i; |
| 2316 | |
| 2317 | for (i = 0; i < nparts; i++) |
| 2318 | { |
| 2319 | Oid inhrelid = inhoids[i]; |
| 2320 | HeapTuple tuple; |
| 2321 | Datum datum; |
| 2322 | bool isnull; |
| 2323 | PartitionBoundSpec *bspec; |
| 2324 | |
| 2325 | tuple = SearchSysCache1(RELOID, inhrelid); |
| 2326 | if (!HeapTupleIsValid(tuple)) |
| 2327 | elog(ERROR, "cache lookup failed for relation %u" , inhrelid); |
| 2328 | |
| 2329 | datum = SysCacheGetAttr(RELOID, tuple, |
| 2330 | Anum_pg_class_relpartbound, |
| 2331 | &isnull); |
| 2332 | if (isnull) |
| 2333 | elog(ERROR, "null relpartbound for relation %u" , inhrelid); |
| 2334 | |
| 2335 | bspec = (PartitionBoundSpec *) |
| 2336 | stringToNode(TextDatumGetCString(datum)); |
| 2337 | if (!IsA(bspec, PartitionBoundSpec)) |
| 2338 | elog(ERROR, "expected PartitionBoundSpec" ); |
| 2339 | |
| 2340 | if (!bspec->is_default) |
| 2341 | { |
| 2342 | List *part_qual; |
| 2343 | |
| 2344 | part_qual = get_qual_for_range(parent, bspec, true); |
| 2345 | |
| 2346 | /* |
| 2347 | * AND the constraints of the partition and add to |
| 2348 | * or_expr_args |
| 2349 | */ |
| 2350 | or_expr_args = lappend(or_expr_args, list_length(part_qual) > 1 |
| 2351 | ? makeBoolExpr(AND_EXPR, part_qual, -1) |
| 2352 | : linitial(part_qual)); |
| 2353 | } |
| 2354 | ReleaseSysCache(tuple); |
| 2355 | } |
| 2356 | |
| 2357 | if (or_expr_args != NIL) |
| 2358 | { |
| 2359 | Expr *other_parts_constr; |
| 2360 | |
| 2361 | /* |
| 2362 | * Combine the constraints obtained for non-default partitions |
| 2363 | * using OR. As requested, each of the OR's args doesn't include |
| 2364 | * the NOT NULL test for partition keys (which is to avoid its |
| 2365 | * useless repetition). Add the same now. |
| 2366 | */ |
| 2367 | other_parts_constr = |
| 2368 | makeBoolExpr(AND_EXPR, |
| 2369 | lappend(get_range_nulltest(key), |
| 2370 | list_length(or_expr_args) > 1 |
| 2371 | ? makeBoolExpr(OR_EXPR, or_expr_args, |
| 2372 | -1) |
| 2373 | : linitial(or_expr_args)), |
| 2374 | -1); |
| 2375 | |
| 2376 | /* |
| 2377 | * Finally, the default partition contains everything *NOT* |
| 2378 | * contained in the non-default partitions. |
| 2379 | */ |
| 2380 | result = list_make1(makeBoolExpr(NOT_EXPR, |
| 2381 | list_make1(other_parts_constr), -1)); |
| 2382 | } |
| 2383 | |
| 2384 | return result; |
| 2385 | } |
| 2386 | |
| 2387 | lower_or_start_datum = list_head(spec->lowerdatums); |
| 2388 | upper_or_start_datum = list_head(spec->upperdatums); |
| 2389 | num_or_arms = key->partnatts; |
| 2390 | |
| 2391 | /* |
| 2392 | * If it is the recursive call for default, we skip the get_range_nulltest |
| 2393 | * to avoid accumulating the NullTest on the same keys for each partition. |
| 2394 | */ |
| 2395 | if (!for_default) |
| 2396 | result = get_range_nulltest(key); |
| 2397 | |
| 2398 | /* |
| 2399 | * Iterate over the key columns and check if the corresponding lower and |
| 2400 | * upper datums are equal using the btree equality operator for the |
| 2401 | * column's type. If equal, we emit single keyCol = common_value |
| 2402 | * expression. Starting from the first column for which the corresponding |
| 2403 | * lower and upper bound datums are not equal, we generate OR expressions |
| 2404 | * as shown in the function's header comment. |
| 2405 | */ |
| 2406 | i = 0; |
| 2407 | partexprs_item = list_head(key->partexprs); |
| 2408 | partexprs_item_saved = partexprs_item; /* placate compiler */ |
| 2409 | forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums) |
| 2410 | { |
| 2411 | EState *estate; |
| 2412 | MemoryContext oldcxt; |
| 2413 | Expr *test_expr; |
| 2414 | ExprState *test_exprstate; |
| 2415 | Datum test_result; |
| 2416 | bool isNull; |
| 2417 | |
| 2418 | ldatum = castNode(PartitionRangeDatum, lfirst(cell1)); |
| 2419 | udatum = castNode(PartitionRangeDatum, lfirst(cell2)); |
| 2420 | |
| 2421 | /* |
| 2422 | * Since get_range_key_properties() modifies partexprs_item, and we |
| 2423 | * might need to start over from the previous expression in the later |
| 2424 | * part of this function, save away the current value. |
| 2425 | */ |
| 2426 | partexprs_item_saved = partexprs_item; |
| 2427 | |
| 2428 | get_range_key_properties(key, i, ldatum, udatum, |
| 2429 | &partexprs_item, |
| 2430 | &keyCol, |
| 2431 | &lower_val, &upper_val); |
| 2432 | |
| 2433 | /* |
| 2434 | * If either value is NULL, the corresponding partition bound is |
| 2435 | * either MINVALUE or MAXVALUE, and we treat them as unequal, because |
| 2436 | * even if they're the same, there is no common value to equate the |
| 2437 | * key column with. |
| 2438 | */ |
| 2439 | if (!lower_val || !upper_val) |
| 2440 | break; |
| 2441 | |
| 2442 | /* Create the test expression */ |
| 2443 | estate = CreateExecutorState(); |
| 2444 | oldcxt = MemoryContextSwitchTo(estate->es_query_cxt); |
| 2445 | test_expr = make_partition_op_expr(key, i, BTEqualStrategyNumber, |
| 2446 | (Expr *) lower_val, |
| 2447 | (Expr *) upper_val); |
| 2448 | fix_opfuncids((Node *) test_expr); |
| 2449 | test_exprstate = ExecInitExpr(test_expr, NULL); |
| 2450 | test_result = ExecEvalExprSwitchContext(test_exprstate, |
| 2451 | GetPerTupleExprContext(estate), |
| 2452 | &isNull); |
| 2453 | MemoryContextSwitchTo(oldcxt); |
| 2454 | FreeExecutorState(estate); |
| 2455 | |
| 2456 | /* If not equal, go generate the OR expressions */ |
| 2457 | if (!DatumGetBool(test_result)) |
| 2458 | break; |
| 2459 | |
| 2460 | /* |
| 2461 | * The bounds for the last key column can't be equal, because such a |
| 2462 | * range partition would never be allowed to be defined (it would have |
| 2463 | * an empty range otherwise). |
| 2464 | */ |
| 2465 | if (i == key->partnatts - 1) |
| 2466 | elog(ERROR, "invalid range bound specification" ); |
| 2467 | |
| 2468 | /* Equal, so generate keyCol = lower_val expression */ |
| 2469 | result = lappend(result, |
| 2470 | make_partition_op_expr(key, i, BTEqualStrategyNumber, |
| 2471 | keyCol, (Expr *) lower_val)); |
| 2472 | |
| 2473 | i++; |
| 2474 | } |
| 2475 | |
| 2476 | /* First pair of lower_val and upper_val that are not equal. */ |
| 2477 | lower_or_start_datum = cell1; |
| 2478 | upper_or_start_datum = cell2; |
| 2479 | |
| 2480 | /* OR will have as many arms as there are key columns left. */ |
| 2481 | num_or_arms = key->partnatts - i; |
| 2482 | current_or_arm = 0; |
| 2483 | lower_or_arms = upper_or_arms = NIL; |
| 2484 | need_next_lower_arm = need_next_upper_arm = true; |
| 2485 | while (current_or_arm < num_or_arms) |
| 2486 | { |
| 2487 | List *lower_or_arm_args = NIL, |
| 2488 | *upper_or_arm_args = NIL; |
| 2489 | |
| 2490 | /* Restart scan of columns from the i'th one */ |
| 2491 | j = i; |
| 2492 | partexprs_item = partexprs_item_saved; |
| 2493 | |
| 2494 | for_both_cell(cell1, lower_or_start_datum, cell2, upper_or_start_datum) |
| 2495 | { |
| 2496 | PartitionRangeDatum *ldatum_next = NULL, |
| 2497 | *udatum_next = NULL; |
| 2498 | |
| 2499 | ldatum = castNode(PartitionRangeDatum, lfirst(cell1)); |
| 2500 | if (lnext(cell1)) |
| 2501 | ldatum_next = castNode(PartitionRangeDatum, |
| 2502 | lfirst(lnext(cell1))); |
| 2503 | udatum = castNode(PartitionRangeDatum, lfirst(cell2)); |
| 2504 | if (lnext(cell2)) |
| 2505 | udatum_next = castNode(PartitionRangeDatum, |
| 2506 | lfirst(lnext(cell2))); |
| 2507 | get_range_key_properties(key, j, ldatum, udatum, |
| 2508 | &partexprs_item, |
| 2509 | &keyCol, |
| 2510 | &lower_val, &upper_val); |
| 2511 | |
| 2512 | if (need_next_lower_arm && lower_val) |
| 2513 | { |
| 2514 | uint16 strategy; |
| 2515 | |
| 2516 | /* |
| 2517 | * For the non-last columns of this arm, use the EQ operator. |
| 2518 | * For the last column of this arm, use GT, unless this is the |
| 2519 | * last column of the whole bound check, or the next bound |
| 2520 | * datum is MINVALUE, in which case use GE. |
| 2521 | */ |
| 2522 | if (j - i < current_or_arm) |
| 2523 | strategy = BTEqualStrategyNumber; |
| 2524 | else if (j == key->partnatts - 1 || |
| 2525 | (ldatum_next && |
| 2526 | ldatum_next->kind == PARTITION_RANGE_DATUM_MINVALUE)) |
| 2527 | strategy = BTGreaterEqualStrategyNumber; |
| 2528 | else |
| 2529 | strategy = BTGreaterStrategyNumber; |
| 2530 | |
| 2531 | lower_or_arm_args = lappend(lower_or_arm_args, |
| 2532 | make_partition_op_expr(key, j, |
| 2533 | strategy, |
| 2534 | keyCol, |
| 2535 | (Expr *) lower_val)); |
| 2536 | } |
| 2537 | |
| 2538 | if (need_next_upper_arm && upper_val) |
| 2539 | { |
| 2540 | uint16 strategy; |
| 2541 | |
| 2542 | /* |
| 2543 | * For the non-last columns of this arm, use the EQ operator. |
| 2544 | * For the last column of this arm, use LT, unless the next |
| 2545 | * bound datum is MAXVALUE, in which case use LE. |
| 2546 | */ |
| 2547 | if (j - i < current_or_arm) |
| 2548 | strategy = BTEqualStrategyNumber; |
| 2549 | else if (udatum_next && |
| 2550 | udatum_next->kind == PARTITION_RANGE_DATUM_MAXVALUE) |
| 2551 | strategy = BTLessEqualStrategyNumber; |
| 2552 | else |
| 2553 | strategy = BTLessStrategyNumber; |
| 2554 | |
| 2555 | upper_or_arm_args = lappend(upper_or_arm_args, |
| 2556 | make_partition_op_expr(key, j, |
| 2557 | strategy, |
| 2558 | keyCol, |
| 2559 | (Expr *) upper_val)); |
| 2560 | } |
| 2561 | |
| 2562 | /* |
| 2563 | * Did we generate enough of OR's arguments? First arm considers |
| 2564 | * the first of the remaining columns, second arm considers first |
| 2565 | * two of the remaining columns, and so on. |
| 2566 | */ |
| 2567 | ++j; |
| 2568 | if (j - i > current_or_arm) |
| 2569 | { |
| 2570 | /* |
| 2571 | * We must not emit any more arms if the new column that will |
| 2572 | * be considered is unbounded, or this one was. |
| 2573 | */ |
| 2574 | if (!lower_val || !ldatum_next || |
| 2575 | ldatum_next->kind != PARTITION_RANGE_DATUM_VALUE) |
| 2576 | need_next_lower_arm = false; |
| 2577 | if (!upper_val || !udatum_next || |
| 2578 | udatum_next->kind != PARTITION_RANGE_DATUM_VALUE) |
| 2579 | need_next_upper_arm = false; |
| 2580 | break; |
| 2581 | } |
| 2582 | } |
| 2583 | |
| 2584 | if (lower_or_arm_args != NIL) |
| 2585 | lower_or_arms = lappend(lower_or_arms, |
| 2586 | list_length(lower_or_arm_args) > 1 |
| 2587 | ? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1) |
| 2588 | : linitial(lower_or_arm_args)); |
| 2589 | |
| 2590 | if (upper_or_arm_args != NIL) |
| 2591 | upper_or_arms = lappend(upper_or_arms, |
| 2592 | list_length(upper_or_arm_args) > 1 |
| 2593 | ? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1) |
| 2594 | : linitial(upper_or_arm_args)); |
| 2595 | |
| 2596 | /* If no work to do in the next iteration, break away. */ |
| 2597 | if (!need_next_lower_arm && !need_next_upper_arm) |
| 2598 | break; |
| 2599 | |
| 2600 | ++current_or_arm; |
| 2601 | } |
| 2602 | |
| 2603 | /* |
| 2604 | * Generate the OR expressions for each of lower and upper bounds (if |
| 2605 | * required), and append to the list of implicitly ANDed list of |
| 2606 | * expressions. |
| 2607 | */ |
| 2608 | if (lower_or_arms != NIL) |
| 2609 | result = lappend(result, |
| 2610 | list_length(lower_or_arms) > 1 |
| 2611 | ? makeBoolExpr(OR_EXPR, lower_or_arms, -1) |
| 2612 | : linitial(lower_or_arms)); |
| 2613 | if (upper_or_arms != NIL) |
| 2614 | result = lappend(result, |
| 2615 | list_length(upper_or_arms) > 1 |
| 2616 | ? makeBoolExpr(OR_EXPR, upper_or_arms, -1) |
| 2617 | : linitial(upper_or_arms)); |
| 2618 | |
| 2619 | /* |
| 2620 | * As noted above, for non-default, we return list with constant TRUE. If |
| 2621 | * the result is NIL during the recursive call for default, it implies |
| 2622 | * this is the only other partition which can hold every value of the key |
| 2623 | * except NULL. Hence we return the NullTest result skipped earlier. |
| 2624 | */ |
| 2625 | if (result == NIL) |
| 2626 | result = for_default |
| 2627 | ? get_range_nulltest(key) |
| 2628 | : list_make1(makeBoolConst(true, false)); |
| 2629 | |
| 2630 | return result; |
| 2631 | } |
| 2632 | |
| 2633 | /* |
| 2634 | * get_range_key_properties |
| 2635 | * Returns range partition key information for a given column |
| 2636 | * |
| 2637 | * This is a subroutine for get_qual_for_range, and its API is pretty |
| 2638 | * specialized to that caller. |
| 2639 | * |
| 2640 | * Constructs an Expr for the key column (returned in *keyCol) and Consts |
| 2641 | * for the lower and upper range limits (returned in *lower_val and |
| 2642 | * *upper_val). For MINVALUE/MAXVALUE limits, NULL is returned instead of |
| 2643 | * a Const. All of these structures are freshly palloc'd. |
| 2644 | * |
| 2645 | * *partexprs_item points to the cell containing the next expression in |
| 2646 | * the key->partexprs list, or NULL. It may be advanced upon return. |
| 2647 | */ |
| 2648 | static void |
| 2649 | get_range_key_properties(PartitionKey key, int keynum, |
| 2650 | PartitionRangeDatum *ldatum, |
| 2651 | PartitionRangeDatum *udatum, |
| 2652 | ListCell **partexprs_item, |
| 2653 | Expr **keyCol, |
| 2654 | Const **lower_val, Const **upper_val) |
| 2655 | { |
| 2656 | /* Get partition key expression for this column */ |
| 2657 | if (key->partattrs[keynum] != 0) |
| 2658 | { |
| 2659 | *keyCol = (Expr *) makeVar(1, |
| 2660 | key->partattrs[keynum], |
| 2661 | key->parttypid[keynum], |
| 2662 | key->parttypmod[keynum], |
| 2663 | key->parttypcoll[keynum], |
| 2664 | 0); |
| 2665 | } |
| 2666 | else |
| 2667 | { |
| 2668 | if (*partexprs_item == NULL) |
| 2669 | elog(ERROR, "wrong number of partition key expressions" ); |
| 2670 | *keyCol = copyObject(lfirst(*partexprs_item)); |
| 2671 | *partexprs_item = lnext(*partexprs_item); |
| 2672 | } |
| 2673 | |
| 2674 | /* Get appropriate Const nodes for the bounds */ |
| 2675 | if (ldatum->kind == PARTITION_RANGE_DATUM_VALUE) |
| 2676 | *lower_val = castNode(Const, copyObject(ldatum->value)); |
| 2677 | else |
| 2678 | *lower_val = NULL; |
| 2679 | |
| 2680 | if (udatum->kind == PARTITION_RANGE_DATUM_VALUE) |
| 2681 | *upper_val = castNode(Const, copyObject(udatum->value)); |
| 2682 | else |
| 2683 | *upper_val = NULL; |
| 2684 | } |
| 2685 | |
| 2686 | /* |
| 2687 | * get_range_nulltest |
| 2688 | * |
| 2689 | * A non-default range partition table does not currently allow partition |
| 2690 | * keys to be null, so emit an IS NOT NULL expression for each key column. |
| 2691 | */ |
| 2692 | static List * |
| 2693 | get_range_nulltest(PartitionKey key) |
| 2694 | { |
| 2695 | List *result = NIL; |
| 2696 | NullTest *nulltest; |
| 2697 | ListCell *partexprs_item; |
| 2698 | int i; |
| 2699 | |
| 2700 | partexprs_item = list_head(key->partexprs); |
| 2701 | for (i = 0; i < key->partnatts; i++) |
| 2702 | { |
| 2703 | Expr *keyCol; |
| 2704 | |
| 2705 | if (key->partattrs[i] != 0) |
| 2706 | { |
| 2707 | keyCol = (Expr *) makeVar(1, |
| 2708 | key->partattrs[i], |
| 2709 | key->parttypid[i], |
| 2710 | key->parttypmod[i], |
| 2711 | key->parttypcoll[i], |
| 2712 | 0); |
| 2713 | } |
| 2714 | else |
| 2715 | { |
| 2716 | if (partexprs_item == NULL) |
| 2717 | elog(ERROR, "wrong number of partition key expressions" ); |
| 2718 | keyCol = copyObject(lfirst(partexprs_item)); |
| 2719 | partexprs_item = lnext(partexprs_item); |
| 2720 | } |
| 2721 | |
| 2722 | nulltest = makeNode(NullTest); |
| 2723 | nulltest->arg = keyCol; |
| 2724 | nulltest->nulltesttype = IS_NOT_NULL; |
| 2725 | nulltest->argisrow = false; |
| 2726 | nulltest->location = -1; |
| 2727 | result = lappend(result, nulltest); |
| 2728 | } |
| 2729 | |
| 2730 | return result; |
| 2731 | } |
| 2732 | |
| 2733 | /* |
| 2734 | * compute_partition_hash_value |
| 2735 | * |
| 2736 | * Compute the hash value for given partition key values. |
| 2737 | */ |
| 2738 | uint64 |
| 2739 | compute_partition_hash_value(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation, |
| 2740 | Datum *values, bool *isnull) |
| 2741 | { |
| 2742 | int i; |
| 2743 | uint64 rowHash = 0; |
| 2744 | Datum seed = UInt64GetDatum(HASH_PARTITION_SEED); |
| 2745 | |
| 2746 | for (i = 0; i < partnatts; i++) |
| 2747 | { |
| 2748 | /* Nulls are just ignored */ |
| 2749 | if (!isnull[i]) |
| 2750 | { |
| 2751 | Datum hash; |
| 2752 | |
| 2753 | Assert(OidIsValid(partsupfunc[i].fn_oid)); |
| 2754 | |
| 2755 | /* |
| 2756 | * Compute hash for each datum value by calling respective |
| 2757 | * datatype-specific hash functions of each partition key |
| 2758 | * attribute. |
| 2759 | */ |
| 2760 | hash = FunctionCall2Coll(&partsupfunc[i], partcollation[i], |
| 2761 | values[i], seed); |
| 2762 | |
| 2763 | /* Form a single 64-bit hash value */ |
| 2764 | rowHash = hash_combine64(rowHash, DatumGetUInt64(hash)); |
| 2765 | } |
| 2766 | } |
| 2767 | |
| 2768 | return rowHash; |
| 2769 | } |
| 2770 | |
| 2771 | /* |
| 2772 | * satisfies_hash_partition |
| 2773 | * |
| 2774 | * This is an SQL-callable function for use in hash partition constraints. |
| 2775 | * The first three arguments are the parent table OID, modulus, and remainder. |
| 2776 | * The remaining arguments are the value of the partitioning columns (or |
| 2777 | * expressions); these are hashed and the results are combined into a single |
| 2778 | * hash value by calling hash_combine64. |
| 2779 | * |
| 2780 | * Returns true if remainder produced when this computed single hash value is |
| 2781 | * divided by the given modulus is equal to given remainder, otherwise false. |
| 2782 | * |
| 2783 | * See get_qual_for_hash() for usage. |
| 2784 | */ |
| 2785 | Datum |
| 2786 | satisfies_hash_partition(PG_FUNCTION_ARGS) |
| 2787 | { |
| 2788 | typedef struct ColumnsHashData |
| 2789 | { |
| 2790 | Oid relid; |
| 2791 | int nkeys; |
| 2792 | Oid variadic_type; |
| 2793 | int16 variadic_typlen; |
| 2794 | bool variadic_typbyval; |
| 2795 | char variadic_typalign; |
| 2796 | Oid partcollid[PARTITION_MAX_KEYS]; |
| 2797 | FmgrInfo partsupfunc[FLEXIBLE_ARRAY_MEMBER]; |
| 2798 | } ColumnsHashData; |
| 2799 | Oid parentId; |
| 2800 | int modulus; |
| 2801 | int remainder; |
| 2802 | Datum seed = UInt64GetDatum(HASH_PARTITION_SEED); |
| 2803 | ColumnsHashData *; |
| 2804 | uint64 rowHash = 0; |
| 2805 | |
| 2806 | /* Return null if the parent OID, modulus, or remainder is NULL. */ |
| 2807 | if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2)) |
| 2808 | PG_RETURN_NULL(); |
| 2809 | parentId = PG_GETARG_OID(0); |
| 2810 | modulus = PG_GETARG_INT32(1); |
| 2811 | remainder = PG_GETARG_INT32(2); |
| 2812 | |
| 2813 | /* Sanity check modulus and remainder. */ |
| 2814 | if (modulus <= 0) |
| 2815 | ereport(ERROR, |
| 2816 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| 2817 | errmsg("modulus for hash partition must be a positive integer" ))); |
| 2818 | if (remainder < 0) |
| 2819 | ereport(ERROR, |
| 2820 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| 2821 | errmsg("remainder for hash partition must be a non-negative integer" ))); |
| 2822 | if (remainder >= modulus) |
| 2823 | ereport(ERROR, |
| 2824 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| 2825 | errmsg("remainder for hash partition must be less than modulus" ))); |
| 2826 | |
| 2827 | /* |
| 2828 | * Cache hash function information. |
| 2829 | */ |
| 2830 | my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra; |
| 2831 | if (my_extra == NULL || my_extra->relid != parentId) |
| 2832 | { |
| 2833 | Relation parent; |
| 2834 | PartitionKey key; |
| 2835 | int j; |
| 2836 | |
| 2837 | /* Open parent relation and fetch partition keyinfo */ |
| 2838 | parent = try_relation_open(parentId, AccessShareLock); |
| 2839 | if (parent == NULL) |
| 2840 | PG_RETURN_NULL(); |
| 2841 | key = RelationGetPartitionKey(parent); |
| 2842 | |
| 2843 | /* Reject parent table that is not hash-partitioned. */ |
| 2844 | if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE || |
| 2845 | key->strategy != PARTITION_STRATEGY_HASH) |
| 2846 | ereport(ERROR, |
| 2847 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| 2848 | errmsg("\"%s\" is not a hash partitioned table" , |
| 2849 | get_rel_name(parentId)))); |
| 2850 | |
| 2851 | if (!get_fn_expr_variadic(fcinfo->flinfo)) |
| 2852 | { |
| 2853 | int nargs = PG_NARGS() - 3; |
| 2854 | |
| 2855 | /* complain if wrong number of column values */ |
| 2856 | if (key->partnatts != nargs) |
| 2857 | ereport(ERROR, |
| 2858 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| 2859 | errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)" , |
| 2860 | key->partnatts, nargs))); |
| 2861 | |
| 2862 | /* allocate space for our cache */ |
| 2863 | fcinfo->flinfo->fn_extra = |
| 2864 | MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt, |
| 2865 | offsetof(ColumnsHashData, partsupfunc) + |
| 2866 | sizeof(FmgrInfo) * nargs); |
| 2867 | my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra; |
| 2868 | my_extra->relid = parentId; |
| 2869 | my_extra->nkeys = key->partnatts; |
| 2870 | memcpy(my_extra->partcollid, key->partcollation, |
| 2871 | key->partnatts * sizeof(Oid)); |
| 2872 | |
| 2873 | /* check argument types and save fmgr_infos */ |
| 2874 | for (j = 0; j < key->partnatts; ++j) |
| 2875 | { |
| 2876 | Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, j + 3); |
| 2877 | |
| 2878 | if (argtype != key->parttypid[j] && !IsBinaryCoercible(argtype, key->parttypid[j])) |
| 2879 | ereport(ERROR, |
| 2880 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| 2881 | errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"" , |
| 2882 | j + 1, format_type_be(key->parttypid[j]), format_type_be(argtype)))); |
| 2883 | |
| 2884 | fmgr_info_copy(&my_extra->partsupfunc[j], |
| 2885 | &key->partsupfunc[j], |
| 2886 | fcinfo->flinfo->fn_mcxt); |
| 2887 | } |
| 2888 | } |
| 2889 | else |
| 2890 | { |
| 2891 | ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3); |
| 2892 | |
| 2893 | /* allocate space for our cache -- just one FmgrInfo in this case */ |
| 2894 | fcinfo->flinfo->fn_extra = |
| 2895 | MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt, |
| 2896 | offsetof(ColumnsHashData, partsupfunc) + |
| 2897 | sizeof(FmgrInfo)); |
| 2898 | my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra; |
| 2899 | my_extra->relid = parentId; |
| 2900 | my_extra->nkeys = key->partnatts; |
| 2901 | my_extra->variadic_type = ARR_ELEMTYPE(variadic_array); |
| 2902 | get_typlenbyvalalign(my_extra->variadic_type, |
| 2903 | &my_extra->variadic_typlen, |
| 2904 | &my_extra->variadic_typbyval, |
| 2905 | &my_extra->variadic_typalign); |
| 2906 | my_extra->partcollid[0] = key->partcollation[0]; |
| 2907 | |
| 2908 | /* check argument types */ |
| 2909 | for (j = 0; j < key->partnatts; ++j) |
| 2910 | if (key->parttypid[j] != my_extra->variadic_type) |
| 2911 | ereport(ERROR, |
| 2912 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| 2913 | errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"" , |
| 2914 | j + 1, |
| 2915 | format_type_be(key->parttypid[j]), |
| 2916 | format_type_be(my_extra->variadic_type)))); |
| 2917 | |
| 2918 | fmgr_info_copy(&my_extra->partsupfunc[0], |
| 2919 | &key->partsupfunc[0], |
| 2920 | fcinfo->flinfo->fn_mcxt); |
| 2921 | } |
| 2922 | |
| 2923 | /* Hold lock until commit */ |
| 2924 | relation_close(parent, NoLock); |
| 2925 | } |
| 2926 | |
| 2927 | if (!OidIsValid(my_extra->variadic_type)) |
| 2928 | { |
| 2929 | int nkeys = my_extra->nkeys; |
| 2930 | int i; |
| 2931 | |
| 2932 | /* |
| 2933 | * For a non-variadic call, neither the number of arguments nor their |
| 2934 | * types can change across calls, so avoid the expense of rechecking |
| 2935 | * here. |
| 2936 | */ |
| 2937 | |
| 2938 | for (i = 0; i < nkeys; i++) |
| 2939 | { |
| 2940 | Datum hash; |
| 2941 | |
| 2942 | /* keys start from fourth argument of function. */ |
| 2943 | int argno = i + 3; |
| 2944 | |
| 2945 | if (PG_ARGISNULL(argno)) |
| 2946 | continue; |
| 2947 | |
| 2948 | hash = FunctionCall2Coll(&my_extra->partsupfunc[i], |
| 2949 | my_extra->partcollid[i], |
| 2950 | PG_GETARG_DATUM(argno), |
| 2951 | seed); |
| 2952 | |
| 2953 | /* Form a single 64-bit hash value */ |
| 2954 | rowHash = hash_combine64(rowHash, DatumGetUInt64(hash)); |
| 2955 | } |
| 2956 | } |
| 2957 | else |
| 2958 | { |
| 2959 | ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3); |
| 2960 | int i; |
| 2961 | int nelems; |
| 2962 | Datum *datum; |
| 2963 | bool *isnull; |
| 2964 | |
| 2965 | deconstruct_array(variadic_array, |
| 2966 | my_extra->variadic_type, |
| 2967 | my_extra->variadic_typlen, |
| 2968 | my_extra->variadic_typbyval, |
| 2969 | my_extra->variadic_typalign, |
| 2970 | &datum, &isnull, &nelems); |
| 2971 | |
| 2972 | /* complain if wrong number of column values */ |
| 2973 | if (nelems != my_extra->nkeys) |
| 2974 | ereport(ERROR, |
| 2975 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| 2976 | errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)" , |
| 2977 | my_extra->nkeys, nelems))); |
| 2978 | |
| 2979 | for (i = 0; i < nelems; i++) |
| 2980 | { |
| 2981 | Datum hash; |
| 2982 | |
| 2983 | if (isnull[i]) |
| 2984 | continue; |
| 2985 | |
| 2986 | hash = FunctionCall2Coll(&my_extra->partsupfunc[0], |
| 2987 | my_extra->partcollid[0], |
| 2988 | datum[i], |
| 2989 | seed); |
| 2990 | |
| 2991 | /* Form a single 64-bit hash value */ |
| 2992 | rowHash = hash_combine64(rowHash, DatumGetUInt64(hash)); |
| 2993 | } |
| 2994 | } |
| 2995 | |
| 2996 | PG_RETURN_BOOL(rowHash % modulus == remainder); |
| 2997 | } |
| 2998 | |