1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * tsginidx.c |
4 | * GIN support functions for tsvector_ops |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * |
8 | * |
9 | * IDENTIFICATION |
10 | * src/backend/utils/adt/tsginidx.c |
11 | * |
12 | *------------------------------------------------------------------------- |
13 | */ |
14 | #include "postgres.h" |
15 | |
16 | #include "access/gin.h" |
17 | #include "access/stratnum.h" |
18 | #include "miscadmin.h" |
19 | #include "tsearch/ts_type.h" |
20 | #include "tsearch/ts_utils.h" |
21 | #include "utils/builtins.h" |
22 | |
23 | |
24 | Datum |
25 | gin_cmp_tslexeme(PG_FUNCTION_ARGS) |
26 | { |
27 | text *a = PG_GETARG_TEXT_PP(0); |
28 | text *b = PG_GETARG_TEXT_PP(1); |
29 | int cmp; |
30 | |
31 | cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a), |
32 | VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b), |
33 | false); |
34 | |
35 | PG_FREE_IF_COPY(a, 0); |
36 | PG_FREE_IF_COPY(b, 1); |
37 | PG_RETURN_INT32(cmp); |
38 | } |
39 | |
40 | Datum |
41 | gin_cmp_prefix(PG_FUNCTION_ARGS) |
42 | { |
43 | text *a = PG_GETARG_TEXT_PP(0); |
44 | text *b = PG_GETARG_TEXT_PP(1); |
45 | |
46 | #ifdef NOT_USED |
47 | StrategyNumber strategy = PG_GETARG_UINT16(2); |
48 | Pointer extra_data = PG_GETARG_POINTER(3); |
49 | #endif |
50 | int cmp; |
51 | |
52 | cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a), |
53 | VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b), |
54 | true); |
55 | |
56 | if (cmp < 0) |
57 | cmp = 1; /* prevent continue scan */ |
58 | |
59 | PG_FREE_IF_COPY(a, 0); |
60 | PG_FREE_IF_COPY(b, 1); |
61 | PG_RETURN_INT32(cmp); |
62 | } |
63 | |
64 | Datum |
65 | (PG_FUNCTION_ARGS) |
66 | { |
67 | TSVector vector = PG_GETARG_TSVECTOR(0); |
68 | int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
69 | Datum *entries = NULL; |
70 | |
71 | *nentries = vector->size; |
72 | if (vector->size > 0) |
73 | { |
74 | int i; |
75 | WordEntry *we = ARRPTR(vector); |
76 | |
77 | entries = (Datum *) palloc(sizeof(Datum) * vector->size); |
78 | |
79 | for (i = 0; i < vector->size; i++) |
80 | { |
81 | text *txt; |
82 | |
83 | txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len); |
84 | entries[i] = PointerGetDatum(txt); |
85 | |
86 | we++; |
87 | } |
88 | } |
89 | |
90 | PG_FREE_IF_COPY(vector, 0); |
91 | PG_RETURN_POINTER(entries); |
92 | } |
93 | |
94 | Datum |
95 | (PG_FUNCTION_ARGS) |
96 | { |
97 | TSQuery query = PG_GETARG_TSQUERY(0); |
98 | int32 *nentries = (int32 *) PG_GETARG_POINTER(1); |
99 | |
100 | /* StrategyNumber strategy = PG_GETARG_UINT16(2); */ |
101 | bool **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3); |
102 | Pointer ** = (Pointer **) PG_GETARG_POINTER(4); |
103 | |
104 | /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */ |
105 | int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); |
106 | Datum *entries = NULL; |
107 | |
108 | *nentries = 0; |
109 | |
110 | if (query->size > 0) |
111 | { |
112 | QueryItem *item = GETQUERY(query); |
113 | int32 i, |
114 | j; |
115 | bool *partialmatch; |
116 | int *map_item_operand; |
117 | |
118 | /* |
119 | * If the query doesn't have any required positive matches (for |
120 | * instance, it's something like '! foo'), we have to do a full index |
121 | * scan. |
122 | */ |
123 | if (tsquery_requires_match(item)) |
124 | *searchMode = GIN_SEARCH_MODE_DEFAULT; |
125 | else |
126 | *searchMode = GIN_SEARCH_MODE_ALL; |
127 | |
128 | /* count number of VAL items */ |
129 | j = 0; |
130 | for (i = 0; i < query->size; i++) |
131 | { |
132 | if (item[i].type == QI_VAL) |
133 | j++; |
134 | } |
135 | *nentries = j; |
136 | |
137 | entries = (Datum *) palloc(sizeof(Datum) * j); |
138 | partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j); |
139 | |
140 | /* |
141 | * Make map to convert item's number to corresponding operand's (the |
142 | * same, entry's) number. Entry's number is used in check array in |
143 | * consistent method. We use the same map for each entry. |
144 | */ |
145 | *extra_data = (Pointer *) palloc(sizeof(Pointer) * j); |
146 | map_item_operand = (int *) palloc0(sizeof(int) * query->size); |
147 | |
148 | /* Now rescan the VAL items and fill in the arrays */ |
149 | j = 0; |
150 | for (i = 0; i < query->size; i++) |
151 | { |
152 | if (item[i].type == QI_VAL) |
153 | { |
154 | QueryOperand *val = &item[i].qoperand; |
155 | text *txt; |
156 | |
157 | txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance, |
158 | val->length); |
159 | entries[j] = PointerGetDatum(txt); |
160 | partialmatch[j] = val->prefix; |
161 | (*extra_data)[j] = (Pointer) map_item_operand; |
162 | map_item_operand[i] = j; |
163 | j++; |
164 | } |
165 | } |
166 | } |
167 | |
168 | PG_FREE_IF_COPY(query, 0); |
169 | |
170 | PG_RETURN_POINTER(entries); |
171 | } |
172 | |
173 | typedef struct |
174 | { |
175 | QueryItem *first_item; |
176 | GinTernaryValue *check; |
177 | int *map_item_operand; |
178 | bool *need_recheck; |
179 | } GinChkVal; |
180 | |
181 | static GinTernaryValue |
182 | checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *data) |
183 | { |
184 | int j; |
185 | |
186 | /* |
187 | * if any val requiring a weight is used or caller needs position |
188 | * information then set recheck flag |
189 | */ |
190 | if (val->weight != 0 || data != NULL) |
191 | *(gcv->need_recheck) = true; |
192 | |
193 | /* convert item's number to corresponding entry's (operand's) number */ |
194 | j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item]; |
195 | |
196 | /* return presence of current entry in indexed value */ |
197 | return gcv->check[j]; |
198 | } |
199 | |
200 | /* |
201 | * Wrapper of check condition function for TS_execute. |
202 | */ |
203 | static bool |
204 | checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data) |
205 | { |
206 | return checkcondition_gin_internal((GinChkVal *) checkval, |
207 | val, |
208 | data) != GIN_FALSE; |
209 | } |
210 | |
211 | /* |
212 | * Evaluate tsquery boolean expression using ternary logic. |
213 | */ |
214 | static GinTernaryValue |
215 | TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase) |
216 | { |
217 | GinTernaryValue val1, |
218 | val2, |
219 | result; |
220 | |
221 | /* since this function recurses, it could be driven to stack overflow */ |
222 | check_stack_depth(); |
223 | |
224 | if (curitem->type == QI_VAL) |
225 | return |
226 | checkcondition_gin_internal(gcv, |
227 | (QueryOperand *) curitem, |
228 | NULL /* don't have position info */ ); |
229 | |
230 | switch (curitem->qoperator.oper) |
231 | { |
232 | case OP_NOT: |
233 | /* In phrase search, always return MAYBE since we lack positions */ |
234 | if (in_phrase) |
235 | return GIN_MAYBE; |
236 | result = TS_execute_ternary(gcv, curitem + 1, in_phrase); |
237 | if (result == GIN_MAYBE) |
238 | return result; |
239 | return !result; |
240 | |
241 | case OP_PHRASE: |
242 | |
243 | /* |
244 | * GIN doesn't contain any information about positions, so treat |
245 | * OP_PHRASE as OP_AND with recheck requirement |
246 | */ |
247 | *(gcv->need_recheck) = true; |
248 | /* Pass down in_phrase == true in case there's a NOT below */ |
249 | in_phrase = true; |
250 | |
251 | /* FALL THRU */ |
252 | |
253 | case OP_AND: |
254 | val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left, |
255 | in_phrase); |
256 | if (val1 == GIN_FALSE) |
257 | return GIN_FALSE; |
258 | val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase); |
259 | if (val2 == GIN_FALSE) |
260 | return GIN_FALSE; |
261 | if (val1 == GIN_TRUE && val2 == GIN_TRUE) |
262 | return GIN_TRUE; |
263 | else |
264 | return GIN_MAYBE; |
265 | |
266 | case OP_OR: |
267 | val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left, |
268 | in_phrase); |
269 | if (val1 == GIN_TRUE) |
270 | return GIN_TRUE; |
271 | val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase); |
272 | if (val2 == GIN_TRUE) |
273 | return GIN_TRUE; |
274 | if (val1 == GIN_FALSE && val2 == GIN_FALSE) |
275 | return GIN_FALSE; |
276 | else |
277 | return GIN_MAYBE; |
278 | |
279 | default: |
280 | elog(ERROR, "unrecognized operator: %d" , curitem->qoperator.oper); |
281 | } |
282 | |
283 | /* not reachable, but keep compiler quiet */ |
284 | return false; |
285 | } |
286 | |
287 | Datum |
288 | gin_tsquery_consistent(PG_FUNCTION_ARGS) |
289 | { |
290 | bool *check = (bool *) PG_GETARG_POINTER(0); |
291 | |
292 | /* StrategyNumber strategy = PG_GETARG_UINT16(1); */ |
293 | TSQuery query = PG_GETARG_TSQUERY(2); |
294 | |
295 | /* int32 nkeys = PG_GETARG_INT32(3); */ |
296 | Pointer * = (Pointer *) PG_GETARG_POINTER(4); |
297 | bool *recheck = (bool *) PG_GETARG_POINTER(5); |
298 | bool res = false; |
299 | |
300 | /* Initially assume query doesn't require recheck */ |
301 | *recheck = false; |
302 | |
303 | if (query->size > 0) |
304 | { |
305 | GinChkVal gcv; |
306 | |
307 | /* |
308 | * check-parameter array has one entry for each value (operand) in the |
309 | * query. |
310 | */ |
311 | gcv.first_item = GETQUERY(query); |
312 | StaticAssertStmt(sizeof(GinTernaryValue) == sizeof(bool), |
313 | "sizes of GinTernaryValue and bool are not equal" ); |
314 | gcv.check = (GinTernaryValue *) check; |
315 | gcv.map_item_operand = (int *) (extra_data[0]); |
316 | gcv.need_recheck = recheck; |
317 | |
318 | res = TS_execute(GETQUERY(query), |
319 | &gcv, |
320 | TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS, |
321 | checkcondition_gin); |
322 | } |
323 | |
324 | PG_RETURN_BOOL(res); |
325 | } |
326 | |
327 | Datum |
328 | gin_tsquery_triconsistent(PG_FUNCTION_ARGS) |
329 | { |
330 | GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); |
331 | |
332 | /* StrategyNumber strategy = PG_GETARG_UINT16(1); */ |
333 | TSQuery query = PG_GETARG_TSQUERY(2); |
334 | |
335 | /* int32 nkeys = PG_GETARG_INT32(3); */ |
336 | Pointer * = (Pointer *) PG_GETARG_POINTER(4); |
337 | GinTernaryValue res = GIN_FALSE; |
338 | bool recheck; |
339 | |
340 | /* Initially assume query doesn't require recheck */ |
341 | recheck = false; |
342 | |
343 | if (query->size > 0) |
344 | { |
345 | GinChkVal gcv; |
346 | |
347 | /* |
348 | * check-parameter array has one entry for each value (operand) in the |
349 | * query. |
350 | */ |
351 | gcv.first_item = GETQUERY(query); |
352 | gcv.check = check; |
353 | gcv.map_item_operand = (int *) (extra_data[0]); |
354 | gcv.need_recheck = &recheck; |
355 | |
356 | res = TS_execute_ternary(&gcv, GETQUERY(query), false); |
357 | |
358 | if (res == GIN_TRUE && recheck) |
359 | res = GIN_MAYBE; |
360 | } |
361 | |
362 | PG_RETURN_GIN_TERNARY_VALUE(res); |
363 | } |
364 | |
365 | /* |
366 | * Formerly, gin_extract_tsvector had only two arguments. Now it has three, |
367 | * but we still need a pg_proc entry with two args to support reloading |
368 | * pre-9.1 contrib/tsearch2 opclass declarations. This compatibility |
369 | * function should go away eventually. (Note: you might say "hey, but the |
370 | * code above is only *using* two args, so let's just declare it that way". |
371 | * If you try that you'll find the opr_sanity regression test complains.) |
372 | */ |
373 | Datum |
374 | (PG_FUNCTION_ARGS) |
375 | { |
376 | if (PG_NARGS() < 3) /* should not happen */ |
377 | elog(ERROR, "gin_extract_tsvector requires three arguments" ); |
378 | return gin_extract_tsvector(fcinfo); |
379 | } |
380 | |
381 | /* |
382 | * Likewise, we need a stub version of gin_extract_tsquery declared with |
383 | * only five arguments. |
384 | */ |
385 | Datum |
386 | (PG_FUNCTION_ARGS) |
387 | { |
388 | if (PG_NARGS() < 7) /* should not happen */ |
389 | elog(ERROR, "gin_extract_tsquery requires seven arguments" ); |
390 | return gin_extract_tsquery(fcinfo); |
391 | } |
392 | |
393 | /* |
394 | * Likewise, we need a stub version of gin_tsquery_consistent declared with |
395 | * only six arguments. |
396 | */ |
397 | Datum |
398 | gin_tsquery_consistent_6args(PG_FUNCTION_ARGS) |
399 | { |
400 | if (PG_NARGS() < 8) /* should not happen */ |
401 | elog(ERROR, "gin_tsquery_consistent requires eight arguments" ); |
402 | return gin_tsquery_consistent(fcinfo); |
403 | } |
404 | |
405 | /* |
406 | * Likewise, a stub version of gin_extract_tsquery declared with argument |
407 | * types that are no longer considered appropriate. |
408 | */ |
409 | Datum |
410 | (PG_FUNCTION_ARGS) |
411 | { |
412 | return gin_extract_tsquery(fcinfo); |
413 | } |
414 | |
415 | /* |
416 | * Likewise, a stub version of gin_tsquery_consistent declared with argument |
417 | * types that are no longer considered appropriate. |
418 | */ |
419 | Datum |
420 | gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS) |
421 | { |
422 | return gin_tsquery_consistent(fcinfo); |
423 | } |
424 | |