| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * tsquery_gist.c |
| 4 | * GiST index support for tsquery |
| 5 | * |
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 7 | * |
| 8 | * |
| 9 | * IDENTIFICATION |
| 10 | * src/backend/utils/adt/tsquery_gist.c |
| 11 | * |
| 12 | *------------------------------------------------------------------------- |
| 13 | */ |
| 14 | |
| 15 | #include "postgres.h" |
| 16 | |
| 17 | #include "access/stratnum.h" |
| 18 | #include "access/gist.h" |
| 19 | #include "tsearch/ts_utils.h" |
| 20 | #include "utils/builtins.h" |
| 21 | |
| 22 | #define GETENTRY(vec,pos) DatumGetTSQuerySign((vec)->vector[pos].key) |
| 23 | |
| 24 | |
| 25 | Datum |
| 26 | gtsquery_compress(PG_FUNCTION_ARGS) |
| 27 | { |
| 28 | GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); |
| 29 | GISTENTRY *retval = entry; |
| 30 | |
| 31 | if (entry->leafkey) |
| 32 | { |
| 33 | TSQuerySign sign; |
| 34 | |
| 35 | retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); |
| 36 | sign = makeTSQuerySign(DatumGetTSQuery(entry->key)); |
| 37 | |
| 38 | gistentryinit(*retval, TSQuerySignGetDatum(sign), |
| 39 | entry->rel, entry->page, |
| 40 | entry->offset, false); |
| 41 | } |
| 42 | |
| 43 | PG_RETURN_POINTER(retval); |
| 44 | } |
| 45 | |
| 46 | /* |
| 47 | * We do not need a decompress function, because the other gtsquery |
| 48 | * support functions work with the compressed representation. |
| 49 | */ |
| 50 | |
| 51 | Datum |
| 52 | gtsquery_consistent(PG_FUNCTION_ARGS) |
| 53 | { |
| 54 | GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); |
| 55 | TSQuery query = PG_GETARG_TSQUERY(1); |
| 56 | StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); |
| 57 | |
| 58 | /* Oid subtype = PG_GETARG_OID(3); */ |
| 59 | bool *recheck = (bool *) PG_GETARG_POINTER(4); |
| 60 | TSQuerySign key = DatumGetTSQuerySign(entry->key); |
| 61 | TSQuerySign sq = makeTSQuerySign(query); |
| 62 | bool retval; |
| 63 | |
| 64 | /* All cases served by this function are inexact */ |
| 65 | *recheck = true; |
| 66 | |
| 67 | switch (strategy) |
| 68 | { |
| 69 | case RTContainsStrategyNumber: |
| 70 | if (GIST_LEAF(entry)) |
| 71 | retval = (key & sq) == sq; |
| 72 | else |
| 73 | retval = (key & sq) != 0; |
| 74 | break; |
| 75 | case RTContainedByStrategyNumber: |
| 76 | if (GIST_LEAF(entry)) |
| 77 | retval = (key & sq) == key; |
| 78 | else |
| 79 | retval = (key & sq) != 0; |
| 80 | break; |
| 81 | default: |
| 82 | retval = false; |
| 83 | } |
| 84 | PG_RETURN_BOOL(retval); |
| 85 | } |
| 86 | |
| 87 | Datum |
| 88 | gtsquery_union(PG_FUNCTION_ARGS) |
| 89 | { |
| 90 | GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); |
| 91 | int *size = (int *) PG_GETARG_POINTER(1); |
| 92 | TSQuerySign sign; |
| 93 | int i; |
| 94 | |
| 95 | sign = 0; |
| 96 | |
| 97 | for (i = 0; i < entryvec->n; i++) |
| 98 | sign |= GETENTRY(entryvec, i); |
| 99 | |
| 100 | *size = sizeof(TSQuerySign); |
| 101 | |
| 102 | PG_RETURN_TSQUERYSIGN(sign); |
| 103 | } |
| 104 | |
| 105 | Datum |
| 106 | gtsquery_same(PG_FUNCTION_ARGS) |
| 107 | { |
| 108 | TSQuerySign a = PG_GETARG_TSQUERYSIGN(0); |
| 109 | TSQuerySign b = PG_GETARG_TSQUERYSIGN(1); |
| 110 | bool *result = (bool *) PG_GETARG_POINTER(2); |
| 111 | |
| 112 | *result = (a == b) ? true : false; |
| 113 | |
| 114 | PG_RETURN_POINTER(result); |
| 115 | } |
| 116 | |
| 117 | static int |
| 118 | sizebitvec(TSQuerySign sign) |
| 119 | { |
| 120 | int size = 0, |
| 121 | i; |
| 122 | |
| 123 | for (i = 0; i < TSQS_SIGLEN; i++) |
| 124 | size += 0x01 & (sign >> i); |
| 125 | |
| 126 | return size; |
| 127 | } |
| 128 | |
| 129 | static int |
| 130 | hemdist(TSQuerySign a, TSQuerySign b) |
| 131 | { |
| 132 | TSQuerySign res = a ^ b; |
| 133 | |
| 134 | return sizebitvec(res); |
| 135 | } |
| 136 | |
| 137 | Datum |
| 138 | gtsquery_penalty(PG_FUNCTION_ARGS) |
| 139 | { |
| 140 | TSQuerySign origval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(0))->key); |
| 141 | TSQuerySign newval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(1))->key); |
| 142 | float *penalty = (float *) PG_GETARG_POINTER(2); |
| 143 | |
| 144 | *penalty = hemdist(origval, newval); |
| 145 | |
| 146 | PG_RETURN_POINTER(penalty); |
| 147 | } |
| 148 | |
| 149 | |
| 150 | typedef struct |
| 151 | { |
| 152 | OffsetNumber pos; |
| 153 | int32 cost; |
| 154 | } SPLITCOST; |
| 155 | |
| 156 | static int |
| 157 | comparecost(const void *a, const void *b) |
| 158 | { |
| 159 | if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost) |
| 160 | return 0; |
| 161 | else |
| 162 | return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1; |
| 163 | } |
| 164 | |
| 165 | #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) ) |
| 166 | |
| 167 | Datum |
| 168 | gtsquery_picksplit(PG_FUNCTION_ARGS) |
| 169 | { |
| 170 | GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); |
| 171 | GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); |
| 172 | OffsetNumber maxoff = entryvec->n - 2; |
| 173 | OffsetNumber k, |
| 174 | j; |
| 175 | TSQuerySign datum_l, |
| 176 | datum_r; |
| 177 | int32 size_alpha, |
| 178 | size_beta; |
| 179 | int32 size_waste, |
| 180 | waste = -1; |
| 181 | int32 nbytes; |
| 182 | OffsetNumber seed_1 = 0, |
| 183 | seed_2 = 0; |
| 184 | OffsetNumber *left, |
| 185 | *right; |
| 186 | |
| 187 | SPLITCOST *costvector; |
| 188 | |
| 189 | nbytes = (maxoff + 2) * sizeof(OffsetNumber); |
| 190 | left = v->spl_left = (OffsetNumber *) palloc(nbytes); |
| 191 | right = v->spl_right = (OffsetNumber *) palloc(nbytes); |
| 192 | v->spl_nleft = v->spl_nright = 0; |
| 193 | |
| 194 | for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) |
| 195 | for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) |
| 196 | { |
| 197 | size_waste = hemdist(GETENTRY(entryvec, j), GETENTRY(entryvec, k)); |
| 198 | if (size_waste > waste) |
| 199 | { |
| 200 | waste = size_waste; |
| 201 | seed_1 = k; |
| 202 | seed_2 = j; |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | |
| 207 | if (seed_1 == 0 || seed_2 == 0) |
| 208 | { |
| 209 | seed_1 = 1; |
| 210 | seed_2 = 2; |
| 211 | } |
| 212 | |
| 213 | datum_l = GETENTRY(entryvec, seed_1); |
| 214 | datum_r = GETENTRY(entryvec, seed_2); |
| 215 | |
| 216 | maxoff = OffsetNumberNext(maxoff); |
| 217 | costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); |
| 218 | for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) |
| 219 | { |
| 220 | costvector[j - 1].pos = j; |
| 221 | size_alpha = hemdist(GETENTRY(entryvec, seed_1), GETENTRY(entryvec, j)); |
| 222 | size_beta = hemdist(GETENTRY(entryvec, seed_2), GETENTRY(entryvec, j)); |
| 223 | costvector[j - 1].cost = abs(size_alpha - size_beta); |
| 224 | } |
| 225 | qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost); |
| 226 | |
| 227 | for (k = 0; k < maxoff; k++) |
| 228 | { |
| 229 | j = costvector[k].pos; |
| 230 | if (j == seed_1) |
| 231 | { |
| 232 | *left++ = j; |
| 233 | v->spl_nleft++; |
| 234 | continue; |
| 235 | } |
| 236 | else if (j == seed_2) |
| 237 | { |
| 238 | *right++ = j; |
| 239 | v->spl_nright++; |
| 240 | continue; |
| 241 | } |
| 242 | size_alpha = hemdist(datum_l, GETENTRY(entryvec, j)); |
| 243 | size_beta = hemdist(datum_r, GETENTRY(entryvec, j)); |
| 244 | |
| 245 | if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05)) |
| 246 | { |
| 247 | datum_l |= GETENTRY(entryvec, j); |
| 248 | *left++ = j; |
| 249 | v->spl_nleft++; |
| 250 | } |
| 251 | else |
| 252 | { |
| 253 | datum_r |= GETENTRY(entryvec, j); |
| 254 | *right++ = j; |
| 255 | v->spl_nright++; |
| 256 | } |
| 257 | } |
| 258 | |
| 259 | *right = *left = FirstOffsetNumber; |
| 260 | v->spl_ldatum = TSQuerySignGetDatum(datum_l); |
| 261 | v->spl_rdatum = TSQuerySignGetDatum(datum_r); |
| 262 | |
| 263 | PG_RETURN_POINTER(v); |
| 264 | } |
| 265 | |
| 266 | /* |
| 267 | * Formerly, gtsquery_consistent was declared in pg_proc.h with arguments |
| 268 | * that did not match the documented conventions for GiST support functions. |
| 269 | * We fixed that, but we still need a pg_proc entry with the old signature |
| 270 | * to support reloading pre-9.6 contrib/tsearch2 opclass declarations. |
| 271 | * This compatibility function should go away eventually. |
| 272 | */ |
| 273 | Datum |
| 274 | gtsquery_consistent_oldsig(PG_FUNCTION_ARGS) |
| 275 | { |
| 276 | return gtsquery_consistent(fcinfo); |
| 277 | } |
| 278 | |