1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * tsquery_gist.c |
4 | * GiST index support for tsquery |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * |
8 | * |
9 | * IDENTIFICATION |
10 | * src/backend/utils/adt/tsquery_gist.c |
11 | * |
12 | *------------------------------------------------------------------------- |
13 | */ |
14 | |
15 | #include "postgres.h" |
16 | |
17 | #include "access/stratnum.h" |
18 | #include "access/gist.h" |
19 | #include "tsearch/ts_utils.h" |
20 | #include "utils/builtins.h" |
21 | |
22 | #define GETENTRY(vec,pos) DatumGetTSQuerySign((vec)->vector[pos].key) |
23 | |
24 | |
25 | Datum |
26 | gtsquery_compress(PG_FUNCTION_ARGS) |
27 | { |
28 | GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); |
29 | GISTENTRY *retval = entry; |
30 | |
31 | if (entry->leafkey) |
32 | { |
33 | TSQuerySign sign; |
34 | |
35 | retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); |
36 | sign = makeTSQuerySign(DatumGetTSQuery(entry->key)); |
37 | |
38 | gistentryinit(*retval, TSQuerySignGetDatum(sign), |
39 | entry->rel, entry->page, |
40 | entry->offset, false); |
41 | } |
42 | |
43 | PG_RETURN_POINTER(retval); |
44 | } |
45 | |
46 | /* |
47 | * We do not need a decompress function, because the other gtsquery |
48 | * support functions work with the compressed representation. |
49 | */ |
50 | |
51 | Datum |
52 | gtsquery_consistent(PG_FUNCTION_ARGS) |
53 | { |
54 | GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); |
55 | TSQuery query = PG_GETARG_TSQUERY(1); |
56 | StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); |
57 | |
58 | /* Oid subtype = PG_GETARG_OID(3); */ |
59 | bool *recheck = (bool *) PG_GETARG_POINTER(4); |
60 | TSQuerySign key = DatumGetTSQuerySign(entry->key); |
61 | TSQuerySign sq = makeTSQuerySign(query); |
62 | bool retval; |
63 | |
64 | /* All cases served by this function are inexact */ |
65 | *recheck = true; |
66 | |
67 | switch (strategy) |
68 | { |
69 | case RTContainsStrategyNumber: |
70 | if (GIST_LEAF(entry)) |
71 | retval = (key & sq) == sq; |
72 | else |
73 | retval = (key & sq) != 0; |
74 | break; |
75 | case RTContainedByStrategyNumber: |
76 | if (GIST_LEAF(entry)) |
77 | retval = (key & sq) == key; |
78 | else |
79 | retval = (key & sq) != 0; |
80 | break; |
81 | default: |
82 | retval = false; |
83 | } |
84 | PG_RETURN_BOOL(retval); |
85 | } |
86 | |
87 | Datum |
88 | gtsquery_union(PG_FUNCTION_ARGS) |
89 | { |
90 | GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); |
91 | int *size = (int *) PG_GETARG_POINTER(1); |
92 | TSQuerySign sign; |
93 | int i; |
94 | |
95 | sign = 0; |
96 | |
97 | for (i = 0; i < entryvec->n; i++) |
98 | sign |= GETENTRY(entryvec, i); |
99 | |
100 | *size = sizeof(TSQuerySign); |
101 | |
102 | PG_RETURN_TSQUERYSIGN(sign); |
103 | } |
104 | |
105 | Datum |
106 | gtsquery_same(PG_FUNCTION_ARGS) |
107 | { |
108 | TSQuerySign a = PG_GETARG_TSQUERYSIGN(0); |
109 | TSQuerySign b = PG_GETARG_TSQUERYSIGN(1); |
110 | bool *result = (bool *) PG_GETARG_POINTER(2); |
111 | |
112 | *result = (a == b) ? true : false; |
113 | |
114 | PG_RETURN_POINTER(result); |
115 | } |
116 | |
117 | static int |
118 | sizebitvec(TSQuerySign sign) |
119 | { |
120 | int size = 0, |
121 | i; |
122 | |
123 | for (i = 0; i < TSQS_SIGLEN; i++) |
124 | size += 0x01 & (sign >> i); |
125 | |
126 | return size; |
127 | } |
128 | |
129 | static int |
130 | hemdist(TSQuerySign a, TSQuerySign b) |
131 | { |
132 | TSQuerySign res = a ^ b; |
133 | |
134 | return sizebitvec(res); |
135 | } |
136 | |
137 | Datum |
138 | gtsquery_penalty(PG_FUNCTION_ARGS) |
139 | { |
140 | TSQuerySign origval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(0))->key); |
141 | TSQuerySign newval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(1))->key); |
142 | float *penalty = (float *) PG_GETARG_POINTER(2); |
143 | |
144 | *penalty = hemdist(origval, newval); |
145 | |
146 | PG_RETURN_POINTER(penalty); |
147 | } |
148 | |
149 | |
150 | typedef struct |
151 | { |
152 | OffsetNumber pos; |
153 | int32 cost; |
154 | } SPLITCOST; |
155 | |
156 | static int |
157 | comparecost(const void *a, const void *b) |
158 | { |
159 | if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost) |
160 | return 0; |
161 | else |
162 | return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1; |
163 | } |
164 | |
165 | #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) ) |
166 | |
167 | Datum |
168 | gtsquery_picksplit(PG_FUNCTION_ARGS) |
169 | { |
170 | GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0); |
171 | GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); |
172 | OffsetNumber maxoff = entryvec->n - 2; |
173 | OffsetNumber k, |
174 | j; |
175 | TSQuerySign datum_l, |
176 | datum_r; |
177 | int32 size_alpha, |
178 | size_beta; |
179 | int32 size_waste, |
180 | waste = -1; |
181 | int32 nbytes; |
182 | OffsetNumber seed_1 = 0, |
183 | seed_2 = 0; |
184 | OffsetNumber *left, |
185 | *right; |
186 | |
187 | SPLITCOST *costvector; |
188 | |
189 | nbytes = (maxoff + 2) * sizeof(OffsetNumber); |
190 | left = v->spl_left = (OffsetNumber *) palloc(nbytes); |
191 | right = v->spl_right = (OffsetNumber *) palloc(nbytes); |
192 | v->spl_nleft = v->spl_nright = 0; |
193 | |
194 | for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) |
195 | for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) |
196 | { |
197 | size_waste = hemdist(GETENTRY(entryvec, j), GETENTRY(entryvec, k)); |
198 | if (size_waste > waste) |
199 | { |
200 | waste = size_waste; |
201 | seed_1 = k; |
202 | seed_2 = j; |
203 | } |
204 | } |
205 | |
206 | |
207 | if (seed_1 == 0 || seed_2 == 0) |
208 | { |
209 | seed_1 = 1; |
210 | seed_2 = 2; |
211 | } |
212 | |
213 | datum_l = GETENTRY(entryvec, seed_1); |
214 | datum_r = GETENTRY(entryvec, seed_2); |
215 | |
216 | maxoff = OffsetNumberNext(maxoff); |
217 | costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); |
218 | for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) |
219 | { |
220 | costvector[j - 1].pos = j; |
221 | size_alpha = hemdist(GETENTRY(entryvec, seed_1), GETENTRY(entryvec, j)); |
222 | size_beta = hemdist(GETENTRY(entryvec, seed_2), GETENTRY(entryvec, j)); |
223 | costvector[j - 1].cost = abs(size_alpha - size_beta); |
224 | } |
225 | qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost); |
226 | |
227 | for (k = 0; k < maxoff; k++) |
228 | { |
229 | j = costvector[k].pos; |
230 | if (j == seed_1) |
231 | { |
232 | *left++ = j; |
233 | v->spl_nleft++; |
234 | continue; |
235 | } |
236 | else if (j == seed_2) |
237 | { |
238 | *right++ = j; |
239 | v->spl_nright++; |
240 | continue; |
241 | } |
242 | size_alpha = hemdist(datum_l, GETENTRY(entryvec, j)); |
243 | size_beta = hemdist(datum_r, GETENTRY(entryvec, j)); |
244 | |
245 | if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05)) |
246 | { |
247 | datum_l |= GETENTRY(entryvec, j); |
248 | *left++ = j; |
249 | v->spl_nleft++; |
250 | } |
251 | else |
252 | { |
253 | datum_r |= GETENTRY(entryvec, j); |
254 | *right++ = j; |
255 | v->spl_nright++; |
256 | } |
257 | } |
258 | |
259 | *right = *left = FirstOffsetNumber; |
260 | v->spl_ldatum = TSQuerySignGetDatum(datum_l); |
261 | v->spl_rdatum = TSQuerySignGetDatum(datum_r); |
262 | |
263 | PG_RETURN_POINTER(v); |
264 | } |
265 | |
266 | /* |
267 | * Formerly, gtsquery_consistent was declared in pg_proc.h with arguments |
268 | * that did not match the documented conventions for GiST support functions. |
269 | * We fixed that, but we still need a pg_proc entry with the old signature |
270 | * to support reloading pre-9.6 contrib/tsearch2 opclass declarations. |
271 | * This compatibility function should go away eventually. |
272 | */ |
273 | Datum |
274 | gtsquery_consistent_oldsig(PG_FUNCTION_ARGS) |
275 | { |
276 | return gtsquery_consistent(fcinfo); |
277 | } |
278 | |