1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * tsquery_rewrite.c |
4 | * Utilities for reconstructing tsquery |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * |
8 | * |
9 | * IDENTIFICATION |
10 | * src/backend/utils/adt/tsquery_rewrite.c |
11 | * |
12 | *------------------------------------------------------------------------- |
13 | */ |
14 | |
15 | #include "postgres.h" |
16 | |
17 | #include "catalog/pg_type.h" |
18 | #include "executor/spi.h" |
19 | #include "miscadmin.h" |
20 | #include "tsearch/ts_utils.h" |
21 | #include "utils/builtins.h" |
22 | |
23 | |
24 | /* |
25 | * If "node" is equal to "ex", return a copy of "subs" instead. |
26 | * If "ex" matches a subset of node's children, return a modified version |
27 | * of "node" in which those children are replaced with a copy of "subs". |
28 | * Otherwise return "node" unmodified. |
29 | * |
30 | * The QTN_NOCHANGE bit is set in successfully modified nodes, so that |
31 | * we won't uselessly recurse into them. |
32 | * Also, set *isfind true if we make a replacement. |
33 | */ |
34 | static QTNode * |
35 | findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind) |
36 | { |
37 | /* Can't match unless signature matches and node type matches. */ |
38 | if ((node->sign & ex->sign) != ex->sign || |
39 | node->valnode->type != ex->valnode->type) |
40 | return node; |
41 | |
42 | /* Ignore nodes marked NOCHANGE, too. */ |
43 | if (node->flags & QTN_NOCHANGE) |
44 | return node; |
45 | |
46 | if (node->valnode->type == QI_OPR) |
47 | { |
48 | /* Must be same operator. */ |
49 | if (node->valnode->qoperator.oper != ex->valnode->qoperator.oper) |
50 | return node; |
51 | |
52 | if (node->nchild == ex->nchild) |
53 | { |
54 | /* |
55 | * Simple case: when same number of children, match if equal. |
56 | * (This is reliable when the children were sorted earlier.) |
57 | */ |
58 | if (QTNEq(node, ex)) |
59 | { |
60 | /* Match; delete node and return a copy of subs instead. */ |
61 | QTNFree(node); |
62 | if (subs) |
63 | { |
64 | node = QTNCopy(subs); |
65 | node->flags |= QTN_NOCHANGE; |
66 | } |
67 | else |
68 | node = NULL; |
69 | *isfind = true; |
70 | } |
71 | } |
72 | else if (node->nchild > ex->nchild && ex->nchild > 0) |
73 | { |
74 | /* |
75 | * AND and OR are commutative/associative, so we should check if a |
76 | * subset of the children match. For example, if node is A|B|C, |
77 | * and ex is B|C, we have a match after we notionally convert node |
78 | * to A|(B|C). This does not work for NOT or PHRASE nodes, but we |
79 | * can't get here for those node types because they have a fixed |
80 | * number of children. |
81 | * |
82 | * Because we expect that the children are sorted, it suffices to |
83 | * make one pass through the two lists to find the matches. |
84 | */ |
85 | bool *matched; |
86 | int nmatched; |
87 | int i, |
88 | j; |
89 | |
90 | /* Assert that the subset rule is OK */ |
91 | Assert(node->valnode->qoperator.oper == OP_AND || |
92 | node->valnode->qoperator.oper == OP_OR); |
93 | |
94 | /* matched[] will record which children of node matched */ |
95 | matched = (bool *) palloc0(node->nchild * sizeof(bool)); |
96 | nmatched = 0; |
97 | i = j = 0; |
98 | while (i < node->nchild && j < ex->nchild) |
99 | { |
100 | int cmp = QTNodeCompare(node->child[i], ex->child[j]); |
101 | |
102 | if (cmp == 0) |
103 | { |
104 | /* match! */ |
105 | matched[i] = true; |
106 | nmatched++; |
107 | i++, j++; |
108 | } |
109 | else if (cmp < 0) |
110 | { |
111 | /* node->child[i] has no match, ignore it */ |
112 | i++; |
113 | } |
114 | else |
115 | { |
116 | /* ex->child[j] has no match; we can give up immediately */ |
117 | break; |
118 | } |
119 | } |
120 | |
121 | if (nmatched == ex->nchild) |
122 | { |
123 | /* collapse out the matched children of node */ |
124 | j = 0; |
125 | for (i = 0; i < node->nchild; i++) |
126 | { |
127 | if (matched[i]) |
128 | QTNFree(node->child[i]); |
129 | else |
130 | node->child[j++] = node->child[i]; |
131 | } |
132 | |
133 | /* and instead insert a copy of subs */ |
134 | if (subs) |
135 | { |
136 | subs = QTNCopy(subs); |
137 | subs->flags |= QTN_NOCHANGE; |
138 | node->child[j++] = subs; |
139 | } |
140 | |
141 | node->nchild = j; |
142 | |
143 | /* |
144 | * At this point we might have a node with zero or one child, |
145 | * which should be simplified. But we leave it to our caller |
146 | * (dofindsubquery) to take care of that. |
147 | */ |
148 | |
149 | /* |
150 | * Re-sort the node to put new child in the right place. This |
151 | * is a bit bogus, because it won't matter for findsubquery's |
152 | * remaining processing, and it's insufficient to prepare the |
153 | * tree for another search (we would need to re-flatten as |
154 | * well, and we don't want to do that because we'd lose the |
155 | * QTN_NOCHANGE marking on the new child). But it's needed to |
156 | * keep the results the same as the regression tests expect. |
157 | */ |
158 | QTNSort(node); |
159 | |
160 | *isfind = true; |
161 | } |
162 | |
163 | pfree(matched); |
164 | } |
165 | } |
166 | else |
167 | { |
168 | Assert(node->valnode->type == QI_VAL); |
169 | |
170 | if (node->valnode->qoperand.valcrc != ex->valnode->qoperand.valcrc) |
171 | return node; |
172 | else if (QTNEq(node, ex)) |
173 | { |
174 | QTNFree(node); |
175 | if (subs) |
176 | { |
177 | node = QTNCopy(subs); |
178 | node->flags |= QTN_NOCHANGE; |
179 | } |
180 | else |
181 | { |
182 | node = NULL; |
183 | } |
184 | *isfind = true; |
185 | } |
186 | } |
187 | |
188 | return node; |
189 | } |
190 | |
191 | /* |
192 | * Recursive guts of findsubquery(): attempt to replace "ex" with "subs" |
193 | * at the root node, and if we failed to do so, recursively match against |
194 | * child nodes. |
195 | * |
196 | * Delete any void subtrees resulting from the replacement. |
197 | * In the following example '5' is replaced by empty operand: |
198 | * |
199 | * AND -> 6 |
200 | * / \ |
201 | * 5 OR |
202 | * / \ |
203 | * 6 5 |
204 | */ |
205 | static QTNode * |
206 | dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind) |
207 | { |
208 | /* since this function recurses, it could be driven to stack overflow. */ |
209 | check_stack_depth(); |
210 | |
211 | /* also, since it's a bit expensive, let's check for query cancel. */ |
212 | CHECK_FOR_INTERRUPTS(); |
213 | |
214 | /* match at the node itself */ |
215 | root = findeq(root, ex, subs, isfind); |
216 | |
217 | /* unless we matched here, consider matches at child nodes */ |
218 | if (root && (root->flags & QTN_NOCHANGE) == 0 && |
219 | root->valnode->type == QI_OPR) |
220 | { |
221 | int i, |
222 | j = 0; |
223 | |
224 | /* |
225 | * Any subtrees that are replaced by NULL must be dropped from the |
226 | * tree. |
227 | */ |
228 | for (i = 0; i < root->nchild; i++) |
229 | { |
230 | root->child[j] = dofindsubquery(root->child[i], ex, subs, isfind); |
231 | if (root->child[j]) |
232 | j++; |
233 | } |
234 | |
235 | root->nchild = j; |
236 | |
237 | /* |
238 | * If we have just zero or one remaining child node, simplify out this |
239 | * operator node. |
240 | */ |
241 | if (root->nchild == 0) |
242 | { |
243 | QTNFree(root); |
244 | root = NULL; |
245 | } |
246 | else if (root->nchild == 1 && root->valnode->qoperator.oper != OP_NOT) |
247 | { |
248 | QTNode *nroot = root->child[0]; |
249 | |
250 | pfree(root); |
251 | root = nroot; |
252 | } |
253 | } |
254 | |
255 | return root; |
256 | } |
257 | |
258 | /* |
259 | * Substitute "subs" for "ex" throughout the QTNode tree at root. |
260 | * |
261 | * If isfind isn't NULL, set *isfind to show whether we made any substitution. |
262 | * |
263 | * Both "root" and "ex" must have been through QTNTernary and QTNSort |
264 | * to ensure reliable matching. |
265 | */ |
266 | QTNode * |
267 | findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind) |
268 | { |
269 | bool DidFind = false; |
270 | |
271 | root = dofindsubquery(root, ex, subs, &DidFind); |
272 | |
273 | if (isfind) |
274 | *isfind = DidFind; |
275 | |
276 | return root; |
277 | } |
278 | |
279 | Datum |
280 | tsquery_rewrite_query(PG_FUNCTION_ARGS) |
281 | { |
282 | TSQuery query = PG_GETARG_TSQUERY_COPY(0); |
283 | text *in = PG_GETARG_TEXT_PP(1); |
284 | TSQuery rewrited = query; |
285 | MemoryContext outercontext = CurrentMemoryContext; |
286 | MemoryContext oldcontext; |
287 | QTNode *tree; |
288 | char *buf; |
289 | SPIPlanPtr plan; |
290 | Portal portal; |
291 | bool isnull; |
292 | |
293 | if (query->size == 0) |
294 | { |
295 | PG_FREE_IF_COPY(in, 1); |
296 | PG_RETURN_POINTER(rewrited); |
297 | } |
298 | |
299 | tree = QT2QTN(GETQUERY(query), GETOPERAND(query)); |
300 | QTNTernary(tree); |
301 | QTNSort(tree); |
302 | |
303 | buf = text_to_cstring(in); |
304 | |
305 | SPI_connect(); |
306 | |
307 | if ((plan = SPI_prepare(buf, 0, NULL)) == NULL) |
308 | elog(ERROR, "SPI_prepare(\"%s\") failed" , buf); |
309 | |
310 | if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL) |
311 | elog(ERROR, "SPI_cursor_open(\"%s\") failed" , buf); |
312 | |
313 | SPI_cursor_fetch(portal, true, 100); |
314 | |
315 | if (SPI_tuptable == NULL || |
316 | SPI_tuptable->tupdesc->natts != 2 || |
317 | SPI_gettypeid(SPI_tuptable->tupdesc, 1) != TSQUERYOID || |
318 | SPI_gettypeid(SPI_tuptable->tupdesc, 2) != TSQUERYOID) |
319 | ereport(ERROR, |
320 | (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
321 | errmsg("ts_rewrite query must return two tsquery columns" ))); |
322 | |
323 | while (SPI_processed > 0 && tree) |
324 | { |
325 | uint64 i; |
326 | |
327 | for (i = 0; i < SPI_processed && tree; i++) |
328 | { |
329 | Datum qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull); |
330 | Datum sdata; |
331 | |
332 | if (isnull) |
333 | continue; |
334 | |
335 | sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull); |
336 | |
337 | if (!isnull) |
338 | { |
339 | TSQuery qtex = DatumGetTSQuery(qdata); |
340 | TSQuery qtsubs = DatumGetTSQuery(sdata); |
341 | QTNode *qex, |
342 | *qsubs = NULL; |
343 | |
344 | if (qtex->size == 0) |
345 | { |
346 | if (qtex != (TSQuery) DatumGetPointer(qdata)) |
347 | pfree(qtex); |
348 | if (qtsubs != (TSQuery) DatumGetPointer(sdata)) |
349 | pfree(qtsubs); |
350 | continue; |
351 | } |
352 | |
353 | qex = QT2QTN(GETQUERY(qtex), GETOPERAND(qtex)); |
354 | |
355 | QTNTernary(qex); |
356 | QTNSort(qex); |
357 | |
358 | if (qtsubs->size) |
359 | qsubs = QT2QTN(GETQUERY(qtsubs), GETOPERAND(qtsubs)); |
360 | |
361 | oldcontext = MemoryContextSwitchTo(outercontext); |
362 | tree = findsubquery(tree, qex, qsubs, NULL); |
363 | MemoryContextSwitchTo(oldcontext); |
364 | |
365 | QTNFree(qex); |
366 | if (qtex != (TSQuery) DatumGetPointer(qdata)) |
367 | pfree(qtex); |
368 | QTNFree(qsubs); |
369 | if (qtsubs != (TSQuery) DatumGetPointer(sdata)) |
370 | pfree(qtsubs); |
371 | |
372 | if (tree) |
373 | { |
374 | /* ready the tree for another pass */ |
375 | QTNClearFlags(tree, QTN_NOCHANGE); |
376 | QTNTernary(tree); |
377 | QTNSort(tree); |
378 | } |
379 | } |
380 | } |
381 | |
382 | SPI_freetuptable(SPI_tuptable); |
383 | SPI_cursor_fetch(portal, true, 100); |
384 | } |
385 | |
386 | SPI_freetuptable(SPI_tuptable); |
387 | SPI_cursor_close(portal); |
388 | SPI_freeplan(plan); |
389 | SPI_finish(); |
390 | |
391 | if (tree) |
392 | { |
393 | QTNBinary(tree); |
394 | rewrited = QTN2QT(tree); |
395 | QTNFree(tree); |
396 | PG_FREE_IF_COPY(query, 0); |
397 | } |
398 | else |
399 | { |
400 | SET_VARSIZE(rewrited, HDRSIZETQ); |
401 | rewrited->size = 0; |
402 | } |
403 | |
404 | pfree(buf); |
405 | PG_FREE_IF_COPY(in, 1); |
406 | PG_RETURN_POINTER(rewrited); |
407 | } |
408 | |
409 | Datum |
410 | tsquery_rewrite(PG_FUNCTION_ARGS) |
411 | { |
412 | TSQuery query = PG_GETARG_TSQUERY_COPY(0); |
413 | TSQuery ex = PG_GETARG_TSQUERY(1); |
414 | TSQuery subst = PG_GETARG_TSQUERY(2); |
415 | TSQuery rewrited = query; |
416 | QTNode *tree, |
417 | *qex, |
418 | *subs = NULL; |
419 | |
420 | if (query->size == 0 || ex->size == 0) |
421 | { |
422 | PG_FREE_IF_COPY(ex, 1); |
423 | PG_FREE_IF_COPY(subst, 2); |
424 | PG_RETURN_POINTER(rewrited); |
425 | } |
426 | |
427 | tree = QT2QTN(GETQUERY(query), GETOPERAND(query)); |
428 | QTNTernary(tree); |
429 | QTNSort(tree); |
430 | |
431 | qex = QT2QTN(GETQUERY(ex), GETOPERAND(ex)); |
432 | QTNTernary(qex); |
433 | QTNSort(qex); |
434 | |
435 | if (subst->size) |
436 | subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst)); |
437 | |
438 | tree = findsubquery(tree, qex, subs, NULL); |
439 | |
440 | QTNFree(qex); |
441 | QTNFree(subs); |
442 | |
443 | if (!tree) |
444 | { |
445 | SET_VARSIZE(rewrited, HDRSIZETQ); |
446 | rewrited->size = 0; |
447 | PG_FREE_IF_COPY(ex, 1); |
448 | PG_FREE_IF_COPY(subst, 2); |
449 | PG_RETURN_POINTER(rewrited); |
450 | } |
451 | else |
452 | { |
453 | QTNBinary(tree); |
454 | rewrited = QTN2QT(tree); |
455 | QTNFree(tree); |
456 | } |
457 | |
458 | PG_FREE_IF_COPY(query, 0); |
459 | PG_FREE_IF_COPY(ex, 1); |
460 | PG_FREE_IF_COPY(subst, 2); |
461 | PG_RETURN_POINTER(rewrited); |
462 | } |
463 | |