1/*-------------------------------------------------------------------------
2 *
3 * tsginidx.c
4 * GIN support functions for tsvector_ops
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/utils/adt/tsginidx.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "access/gin.h"
17#include "access/stratnum.h"
18#include "miscadmin.h"
19#include "tsearch/ts_type.h"
20#include "tsearch/ts_utils.h"
21#include "utils/builtins.h"
22
23
24Datum
25gin_cmp_tslexeme(PG_FUNCTION_ARGS)
26{
27 text *a = PG_GETARG_TEXT_PP(0);
28 text *b = PG_GETARG_TEXT_PP(1);
29 int cmp;
30
31 cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
32 VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
33 false);
34
35 PG_FREE_IF_COPY(a, 0);
36 PG_FREE_IF_COPY(b, 1);
37 PG_RETURN_INT32(cmp);
38}
39
40Datum
41gin_cmp_prefix(PG_FUNCTION_ARGS)
42{
43 text *a = PG_GETARG_TEXT_PP(0);
44 text *b = PG_GETARG_TEXT_PP(1);
45
46#ifdef NOT_USED
47 StrategyNumber strategy = PG_GETARG_UINT16(2);
48 Pointer extra_data = PG_GETARG_POINTER(3);
49#endif
50 int cmp;
51
52 cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
53 VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
54 true);
55
56 if (cmp < 0)
57 cmp = 1; /* prevent continue scan */
58
59 PG_FREE_IF_COPY(a, 0);
60 PG_FREE_IF_COPY(b, 1);
61 PG_RETURN_INT32(cmp);
62}
63
64Datum
65gin_extract_tsvector(PG_FUNCTION_ARGS)
66{
67 TSVector vector = PG_GETARG_TSVECTOR(0);
68 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
69 Datum *entries = NULL;
70
71 *nentries = vector->size;
72 if (vector->size > 0)
73 {
74 int i;
75 WordEntry *we = ARRPTR(vector);
76
77 entries = (Datum *) palloc(sizeof(Datum) * vector->size);
78
79 for (i = 0; i < vector->size; i++)
80 {
81 text *txt;
82
83 txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
84 entries[i] = PointerGetDatum(txt);
85
86 we++;
87 }
88 }
89
90 PG_FREE_IF_COPY(vector, 0);
91 PG_RETURN_POINTER(entries);
92}
93
94Datum
95gin_extract_tsquery(PG_FUNCTION_ARGS)
96{
97 TSQuery query = PG_GETARG_TSQUERY(0);
98 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
99
100 /* StrategyNumber strategy = PG_GETARG_UINT16(2); */
101 bool **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3);
102 Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
103
104 /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
105 int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
106 Datum *entries = NULL;
107
108 *nentries = 0;
109
110 if (query->size > 0)
111 {
112 QueryItem *item = GETQUERY(query);
113 int32 i,
114 j;
115 bool *partialmatch;
116 int *map_item_operand;
117
118 /*
119 * If the query doesn't have any required positive matches (for
120 * instance, it's something like '! foo'), we have to do a full index
121 * scan.
122 */
123 if (tsquery_requires_match(item))
124 *searchMode = GIN_SEARCH_MODE_DEFAULT;
125 else
126 *searchMode = GIN_SEARCH_MODE_ALL;
127
128 /* count number of VAL items */
129 j = 0;
130 for (i = 0; i < query->size; i++)
131 {
132 if (item[i].type == QI_VAL)
133 j++;
134 }
135 *nentries = j;
136
137 entries = (Datum *) palloc(sizeof(Datum) * j);
138 partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j);
139
140 /*
141 * Make map to convert item's number to corresponding operand's (the
142 * same, entry's) number. Entry's number is used in check array in
143 * consistent method. We use the same map for each entry.
144 */
145 *extra_data = (Pointer *) palloc(sizeof(Pointer) * j);
146 map_item_operand = (int *) palloc0(sizeof(int) * query->size);
147
148 /* Now rescan the VAL items and fill in the arrays */
149 j = 0;
150 for (i = 0; i < query->size; i++)
151 {
152 if (item[i].type == QI_VAL)
153 {
154 QueryOperand *val = &item[i].qoperand;
155 text *txt;
156
157 txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
158 val->length);
159 entries[j] = PointerGetDatum(txt);
160 partialmatch[j] = val->prefix;
161 (*extra_data)[j] = (Pointer) map_item_operand;
162 map_item_operand[i] = j;
163 j++;
164 }
165 }
166 }
167
168 PG_FREE_IF_COPY(query, 0);
169
170 PG_RETURN_POINTER(entries);
171}
172
173typedef struct
174{
175 QueryItem *first_item;
176 GinTernaryValue *check;
177 int *map_item_operand;
178 bool *need_recheck;
179} GinChkVal;
180
181static GinTernaryValue
182checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *data)
183{
184 int j;
185
186 /*
187 * if any val requiring a weight is used or caller needs position
188 * information then set recheck flag
189 */
190 if (val->weight != 0 || data != NULL)
191 *(gcv->need_recheck) = true;
192
193 /* convert item's number to corresponding entry's (operand's) number */
194 j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
195
196 /* return presence of current entry in indexed value */
197 return gcv->check[j];
198}
199
200/*
201 * Wrapper of check condition function for TS_execute.
202 */
203static bool
204checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
205{
206 return checkcondition_gin_internal((GinChkVal *) checkval,
207 val,
208 data) != GIN_FALSE;
209}
210
211/*
212 * Evaluate tsquery boolean expression using ternary logic.
213 */
214static GinTernaryValue
215TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase)
216{
217 GinTernaryValue val1,
218 val2,
219 result;
220
221 /* since this function recurses, it could be driven to stack overflow */
222 check_stack_depth();
223
224 if (curitem->type == QI_VAL)
225 return
226 checkcondition_gin_internal(gcv,
227 (QueryOperand *) curitem,
228 NULL /* don't have position info */ );
229
230 switch (curitem->qoperator.oper)
231 {
232 case OP_NOT:
233 /* In phrase search, always return MAYBE since we lack positions */
234 if (in_phrase)
235 return GIN_MAYBE;
236 result = TS_execute_ternary(gcv, curitem + 1, in_phrase);
237 if (result == GIN_MAYBE)
238 return result;
239 return !result;
240
241 case OP_PHRASE:
242
243 /*
244 * GIN doesn't contain any information about positions, so treat
245 * OP_PHRASE as OP_AND with recheck requirement
246 */
247 *(gcv->need_recheck) = true;
248 /* Pass down in_phrase == true in case there's a NOT below */
249 in_phrase = true;
250
251 /* FALL THRU */
252
253 case OP_AND:
254 val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
255 in_phrase);
256 if (val1 == GIN_FALSE)
257 return GIN_FALSE;
258 val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
259 if (val2 == GIN_FALSE)
260 return GIN_FALSE;
261 if (val1 == GIN_TRUE && val2 == GIN_TRUE)
262 return GIN_TRUE;
263 else
264 return GIN_MAYBE;
265
266 case OP_OR:
267 val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
268 in_phrase);
269 if (val1 == GIN_TRUE)
270 return GIN_TRUE;
271 val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
272 if (val2 == GIN_TRUE)
273 return GIN_TRUE;
274 if (val1 == GIN_FALSE && val2 == GIN_FALSE)
275 return GIN_FALSE;
276 else
277 return GIN_MAYBE;
278
279 default:
280 elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
281 }
282
283 /* not reachable, but keep compiler quiet */
284 return false;
285}
286
287Datum
288gin_tsquery_consistent(PG_FUNCTION_ARGS)
289{
290 bool *check = (bool *) PG_GETARG_POINTER(0);
291
292 /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
293 TSQuery query = PG_GETARG_TSQUERY(2);
294
295 /* int32 nkeys = PG_GETARG_INT32(3); */
296 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
297 bool *recheck = (bool *) PG_GETARG_POINTER(5);
298 bool res = false;
299
300 /* Initially assume query doesn't require recheck */
301 *recheck = false;
302
303 if (query->size > 0)
304 {
305 GinChkVal gcv;
306
307 /*
308 * check-parameter array has one entry for each value (operand) in the
309 * query.
310 */
311 gcv.first_item = GETQUERY(query);
312 StaticAssertStmt(sizeof(GinTernaryValue) == sizeof(bool),
313 "sizes of GinTernaryValue and bool are not equal");
314 gcv.check = (GinTernaryValue *) check;
315 gcv.map_item_operand = (int *) (extra_data[0]);
316 gcv.need_recheck = recheck;
317
318 res = TS_execute(GETQUERY(query),
319 &gcv,
320 TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS,
321 checkcondition_gin);
322 }
323
324 PG_RETURN_BOOL(res);
325}
326
327Datum
328gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
329{
330 GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
331
332 /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
333 TSQuery query = PG_GETARG_TSQUERY(2);
334
335 /* int32 nkeys = PG_GETARG_INT32(3); */
336 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
337 GinTernaryValue res = GIN_FALSE;
338 bool recheck;
339
340 /* Initially assume query doesn't require recheck */
341 recheck = false;
342
343 if (query->size > 0)
344 {
345 GinChkVal gcv;
346
347 /*
348 * check-parameter array has one entry for each value (operand) in the
349 * query.
350 */
351 gcv.first_item = GETQUERY(query);
352 gcv.check = check;
353 gcv.map_item_operand = (int *) (extra_data[0]);
354 gcv.need_recheck = &recheck;
355
356 res = TS_execute_ternary(&gcv, GETQUERY(query), false);
357
358 if (res == GIN_TRUE && recheck)
359 res = GIN_MAYBE;
360 }
361
362 PG_RETURN_GIN_TERNARY_VALUE(res);
363}
364
365/*
366 * Formerly, gin_extract_tsvector had only two arguments. Now it has three,
367 * but we still need a pg_proc entry with two args to support reloading
368 * pre-9.1 contrib/tsearch2 opclass declarations. This compatibility
369 * function should go away eventually. (Note: you might say "hey, but the
370 * code above is only *using* two args, so let's just declare it that way".
371 * If you try that you'll find the opr_sanity regression test complains.)
372 */
373Datum
374gin_extract_tsvector_2args(PG_FUNCTION_ARGS)
375{
376 if (PG_NARGS() < 3) /* should not happen */
377 elog(ERROR, "gin_extract_tsvector requires three arguments");
378 return gin_extract_tsvector(fcinfo);
379}
380
381/*
382 * Likewise, we need a stub version of gin_extract_tsquery declared with
383 * only five arguments.
384 */
385Datum
386gin_extract_tsquery_5args(PG_FUNCTION_ARGS)
387{
388 if (PG_NARGS() < 7) /* should not happen */
389 elog(ERROR, "gin_extract_tsquery requires seven arguments");
390 return gin_extract_tsquery(fcinfo);
391}
392
393/*
394 * Likewise, we need a stub version of gin_tsquery_consistent declared with
395 * only six arguments.
396 */
397Datum
398gin_tsquery_consistent_6args(PG_FUNCTION_ARGS)
399{
400 if (PG_NARGS() < 8) /* should not happen */
401 elog(ERROR, "gin_tsquery_consistent requires eight arguments");
402 return gin_tsquery_consistent(fcinfo);
403}
404
405/*
406 * Likewise, a stub version of gin_extract_tsquery declared with argument
407 * types that are no longer considered appropriate.
408 */
409Datum
410gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS)
411{
412 return gin_extract_tsquery(fcinfo);
413}
414
415/*
416 * Likewise, a stub version of gin_tsquery_consistent declared with argument
417 * types that are no longer considered appropriate.
418 */
419Datum
420gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS)
421{
422 return gin_tsquery_consistent(fcinfo);
423}
424