1/*-------------------------------------------------------------------------
2 *
3 * tsquery.c
4 * I/O functions for tsquery
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/utils/adt/tsquery.c
11 *
12 *-------------------------------------------------------------------------
13 */
14
15#include "postgres.h"
16
17#include "libpq/pqformat.h"
18#include "miscadmin.h"
19#include "tsearch/ts_type.h"
20#include "tsearch/ts_locale.h"
21#include "tsearch/ts_utils.h"
22#include "utils/builtins.h"
23#include "utils/memutils.h"
24#include "utils/pg_crc.h"
25
26/* FTS operator priorities, see ts_type.h */
27const int tsearch_op_priority[OP_COUNT] =
28{
29 4, /* OP_NOT */
30 2, /* OP_AND */
31 1, /* OP_OR */
32 3 /* OP_PHRASE */
33};
34
35/*
36 * parser's states
37 */
38typedef enum
39{
40 WAITOPERAND = 1,
41 WAITOPERATOR = 2,
42 WAITFIRSTOPERAND = 3
43} ts_parserstate;
44
45/*
46 * token types for parsing
47 */
48typedef enum
49{
50 PT_END = 0,
51 PT_ERR = 1,
52 PT_VAL = 2,
53 PT_OPR = 3,
54 PT_OPEN = 4,
55 PT_CLOSE = 5
56} ts_tokentype;
57
58/*
59 * get token from query string
60 *
61 * *operator is filled in with OP_* when return values is PT_OPR,
62 * but *weight could contain a distance value in case of phrase operator.
63 * *strval, *lenval and *weight are filled in when return value is PT_VAL
64 *
65 */
66typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator,
67 int *lenval, char **strval,
68 int16 *weight, bool *prefix);
69
70struct TSQueryParserStateData
71{
72 /* Tokenizer used for parsing tsquery */
73 ts_tokenizer gettoken;
74
75 /* State of tokenizer function */
76 char *buffer; /* entire string we are scanning */
77 char *buf; /* current scan point */
78 int count; /* nesting count, incremented by (,
79 * decremented by ) */
80 bool in_quotes; /* phrase in quotes "" */
81 ts_parserstate state;
82
83 /* polish (prefix) notation in list, filled in by push* functions */
84 List *polstr;
85
86 /*
87 * Strings from operands are collected in op. curop is a pointer to the
88 * end of used space of op.
89 */
90 char *op;
91 char *curop;
92 int lenop; /* allocated size of op */
93 int sumlen; /* used size of op */
94
95 /* state for value's parser */
96 TSVectorParseState valstate;
97};
98
99/*
100 * subroutine to parse the modifiers (weight and prefix flag currently)
101 * part, like ':AB*' of a query.
102 */
103static char *
104get_modifiers(char *buf, int16 *weight, bool *prefix)
105{
106 *weight = 0;
107 *prefix = false;
108
109 if (!t_iseq(buf, ':'))
110 return buf;
111
112 buf++;
113 while (*buf && pg_mblen(buf) == 1)
114 {
115 switch (*buf)
116 {
117 case 'a':
118 case 'A':
119 *weight |= 1 << 3;
120 break;
121 case 'b':
122 case 'B':
123 *weight |= 1 << 2;
124 break;
125 case 'c':
126 case 'C':
127 *weight |= 1 << 1;
128 break;
129 case 'd':
130 case 'D':
131 *weight |= 1;
132 break;
133 case '*':
134 *prefix = true;
135 break;
136 default:
137 return buf;
138 }
139 buf++;
140 }
141
142 return buf;
143}
144
145/*
146 * Parse phrase operator. The operator
147 * may take the following forms:
148 *
149 * a <N> b (distance is exactly N lexemes)
150 * a <-> b (default distance = 1)
151 *
152 * The buffer should begin with '<' char
153 */
154static bool
155parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
156{
157 enum
158 {
159 PHRASE_OPEN = 0,
160 PHRASE_DIST,
161 PHRASE_CLOSE,
162 PHRASE_FINISH
163 } state = PHRASE_OPEN;
164 char *ptr = pstate->buf;
165 char *endptr;
166 long l = 1; /* default distance */
167
168 while (*ptr)
169 {
170 switch (state)
171 {
172 case PHRASE_OPEN:
173 if (t_iseq(ptr, '<'))
174 {
175 state = PHRASE_DIST;
176 ptr++;
177 }
178 else
179 return false;
180 break;
181
182 case PHRASE_DIST:
183 if (t_iseq(ptr, '-'))
184 {
185 state = PHRASE_CLOSE;
186 ptr++;
187 continue;
188 }
189
190 if (!t_isdigit(ptr))
191 return false;
192
193 errno = 0;
194 l = strtol(ptr, &endptr, 10);
195 if (ptr == endptr)
196 return false;
197 else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
198 ereport(ERROR,
199 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
200 errmsg("distance in phrase operator should not be greater than %d",
201 MAXENTRYPOS)));
202 else
203 {
204 state = PHRASE_CLOSE;
205 ptr = endptr;
206 }
207 break;
208
209 case PHRASE_CLOSE:
210 if (t_iseq(ptr, '>'))
211 {
212 state = PHRASE_FINISH;
213 ptr++;
214 }
215 else
216 return false;
217 break;
218
219 case PHRASE_FINISH:
220 *distance = (int16) l;
221 pstate->buf = ptr;
222 return true;
223 }
224 }
225
226 return false;
227}
228
229/*
230 * Parse OR operator used in websearch_to_tsquery(), returns true if we
231 * believe that "OR" literal could be an operator OR
232 */
233static bool
234parse_or_operator(TSQueryParserState pstate)
235{
236 char *ptr = pstate->buf;
237
238 if (pstate->in_quotes)
239 return false;
240
241 /* it should begin with "OR" literal */
242 if (pg_strncasecmp(ptr, "or", 2) != 0)
243 return false;
244
245 ptr += 2;
246
247 /*
248 * it shouldn't be a part of any word but somewhere later it should be
249 * some operand
250 */
251 if (*ptr == '\0') /* no operand */
252 return false;
253
254 /* it shouldn't be a part of any word */
255 if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalpha(ptr) || t_isdigit(ptr))
256 return false;
257
258 for (;;)
259 {
260 ptr += pg_mblen(ptr);
261
262 if (*ptr == '\0') /* got end of string without operand */
263 return false;
264
265 /*
266 * Suppose, we found an operand, but could be a not correct operand.
267 * So we still treat OR literal as operation with possibly incorrect
268 * operand and will not search it as lexeme
269 */
270 if (!t_isspace(ptr))
271 break;
272 }
273
274 pstate->buf += 2;
275 return true;
276}
277
278static ts_tokentype
279gettoken_query_standard(TSQueryParserState state, int8 *operator,
280 int *lenval, char **strval,
281 int16 *weight, bool *prefix)
282{
283 *weight = 0;
284 *prefix = false;
285
286 while (true)
287 {
288 switch (state->state)
289 {
290 case WAITFIRSTOPERAND:
291 case WAITOPERAND:
292 if (t_iseq(state->buf, '!'))
293 {
294 state->buf++;
295 state->state = WAITOPERAND;
296 *operator = OP_NOT;
297 return PT_OPR;
298 }
299 else if (t_iseq(state->buf, '('))
300 {
301 state->buf++;
302 state->state = WAITOPERAND;
303 state->count++;
304 return PT_OPEN;
305 }
306 else if (t_iseq(state->buf, ':'))
307 {
308 ereport(ERROR,
309 (errcode(ERRCODE_SYNTAX_ERROR),
310 errmsg("syntax error in tsquery: \"%s\"",
311 state->buffer)));
312 }
313 else if (!t_isspace(state->buf))
314 {
315 /*
316 * We rely on the tsvector parser to parse the value for
317 * us
318 */
319 reset_tsvector_parser(state->valstate, state->buf);
320 if (gettoken_tsvector(state->valstate, strval, lenval,
321 NULL, NULL, &state->buf))
322 {
323 state->buf = get_modifiers(state->buf, weight, prefix);
324 state->state = WAITOPERATOR;
325 return PT_VAL;
326 }
327 else if (state->state == WAITFIRSTOPERAND)
328 {
329 return PT_END;
330 }
331 else
332 ereport(ERROR,
333 (errcode(ERRCODE_SYNTAX_ERROR),
334 errmsg("no operand in tsquery: \"%s\"",
335 state->buffer)));
336 }
337 break;
338
339 case WAITOPERATOR:
340 if (t_iseq(state->buf, '&'))
341 {
342 state->buf++;
343 state->state = WAITOPERAND;
344 *operator = OP_AND;
345 return PT_OPR;
346 }
347 else if (t_iseq(state->buf, '|'))
348 {
349 state->buf++;
350 state->state = WAITOPERAND;
351 *operator = OP_OR;
352 return PT_OPR;
353 }
354 else if (parse_phrase_operator(state, weight))
355 {
356 /* weight var is used as storage for distance */
357 state->state = WAITOPERAND;
358 *operator = OP_PHRASE;
359 return PT_OPR;
360 }
361 else if (t_iseq(state->buf, ')'))
362 {
363 state->buf++;
364 state->count--;
365 return (state->count < 0) ? PT_ERR : PT_CLOSE;
366 }
367 else if (*state->buf == '\0')
368 {
369 return (state->count) ? PT_ERR : PT_END;
370 }
371 else if (!t_isspace(state->buf))
372 {
373 return PT_ERR;
374 }
375 break;
376 }
377
378 state->buf += pg_mblen(state->buf);
379 }
380}
381
382static ts_tokentype
383gettoken_query_websearch(TSQueryParserState state, int8 *operator,
384 int *lenval, char **strval,
385 int16 *weight, bool *prefix)
386{
387 *weight = 0;
388 *prefix = false;
389
390 while (true)
391 {
392 switch (state->state)
393 {
394 case WAITFIRSTOPERAND:
395 case WAITOPERAND:
396 if (t_iseq(state->buf, '-'))
397 {
398 state->buf++;
399 state->state = WAITOPERAND;
400
401 if (state->in_quotes)
402 continue;
403
404 *operator = OP_NOT;
405 return PT_OPR;
406 }
407 else if (t_iseq(state->buf, '"'))
408 {
409 state->buf++;
410
411 if (!state->in_quotes)
412 {
413 state->state = WAITOPERAND;
414
415 if (strchr(state->buf, '"'))
416 {
417 /* quoted text should be ordered <-> */
418 state->in_quotes = true;
419 return PT_OPEN;
420 }
421
422 /* web search tolerates missing quotes */
423 continue;
424 }
425 else
426 {
427 /* we have to provide an operand */
428 state->in_quotes = false;
429 state->state = WAITOPERATOR;
430 pushStop(state);
431 return PT_CLOSE;
432 }
433 }
434 else if (ISOPERATOR(state->buf))
435 {
436 /* or else gettoken_tsvector() will raise an error */
437 state->buf++;
438 state->state = WAITOPERAND;
439 continue;
440 }
441 else if (!t_isspace(state->buf))
442 {
443 /*
444 * We rely on the tsvector parser to parse the value for
445 * us
446 */
447 reset_tsvector_parser(state->valstate, state->buf);
448 if (gettoken_tsvector(state->valstate, strval, lenval,
449 NULL, NULL, &state->buf))
450 {
451 state->state = WAITOPERATOR;
452 return PT_VAL;
453 }
454 else if (state->state == WAITFIRSTOPERAND)
455 {
456 return PT_END;
457 }
458 else
459 {
460 /* finally, we have to provide an operand */
461 pushStop(state);
462 return PT_END;
463 }
464 }
465 break;
466
467 case WAITOPERATOR:
468 if (t_iseq(state->buf, '"'))
469 {
470 if (!state->in_quotes)
471 {
472 /*
473 * put implicit AND after an operand and handle this
474 * quote in WAITOPERAND
475 */
476 state->state = WAITOPERAND;
477 *operator = OP_AND;
478 return PT_OPR;
479 }
480 else
481 {
482 state->buf++;
483
484 /* just close quotes */
485 state->in_quotes = false;
486 return PT_CLOSE;
487 }
488 }
489 else if (parse_or_operator(state))
490 {
491 state->state = WAITOPERAND;
492 *operator = OP_OR;
493 return PT_OPR;
494 }
495 else if (*state->buf == '\0')
496 {
497 return PT_END;
498 }
499 else if (!t_isspace(state->buf))
500 {
501 if (state->in_quotes)
502 {
503 /* put implicit <-> after an operand */
504 *operator = OP_PHRASE;
505 *weight = 1;
506 }
507 else
508 {
509 /* put implicit AND after an operand */
510 *operator = OP_AND;
511 }
512
513 state->state = WAITOPERAND;
514 return PT_OPR;
515 }
516 break;
517 }
518
519 state->buf += pg_mblen(state->buf);
520 }
521}
522
523static ts_tokentype
524gettoken_query_plain(TSQueryParserState state, int8 *operator,
525 int *lenval, char **strval,
526 int16 *weight, bool *prefix)
527{
528 *weight = 0;
529 *prefix = false;
530
531 if (*state->buf == '\0')
532 return PT_END;
533
534 *strval = state->buf;
535 *lenval = strlen(state->buf);
536 state->buf += *lenval;
537 state->count++;
538 return PT_VAL;
539}
540
541/*
542 * Push an operator to state->polstr
543 */
544void
545pushOperator(TSQueryParserState state, int8 oper, int16 distance)
546{
547 QueryOperator *tmp;
548
549 Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE);
550
551 tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
552 tmp->type = QI_OPR;
553 tmp->oper = oper;
554 tmp->distance = (oper == OP_PHRASE) ? distance : 0;
555 /* left is filled in later with findoprnd */
556
557 state->polstr = lcons(tmp, state->polstr);
558}
559
560static void
561pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
562{
563 QueryOperand *tmp;
564
565 if (distance >= MAXSTRPOS)
566 ereport(ERROR,
567 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
568 errmsg("value is too big in tsquery: \"%s\"",
569 state->buffer)));
570 if (lenval >= MAXSTRLEN)
571 ereport(ERROR,
572 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
573 errmsg("operand is too long in tsquery: \"%s\"",
574 state->buffer)));
575
576 tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
577 tmp->type = QI_VAL;
578 tmp->weight = weight;
579 tmp->prefix = prefix;
580 tmp->valcrc = (int32) valcrc;
581 tmp->length = lenval;
582 tmp->distance = distance;
583
584 state->polstr = lcons(tmp, state->polstr);
585}
586
587/*
588 * Push an operand to state->polstr.
589 *
590 * strval must point to a string equal to state->curop. lenval is the length
591 * of the string.
592 */
593void
594pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
595{
596 pg_crc32 valcrc;
597
598 if (lenval >= MAXSTRLEN)
599 ereport(ERROR,
600 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
601 errmsg("word is too long in tsquery: \"%s\"",
602 state->buffer)));
603
604 INIT_LEGACY_CRC32(valcrc);
605 COMP_LEGACY_CRC32(valcrc, strval, lenval);
606 FIN_LEGACY_CRC32(valcrc);
607 pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
608
609 /* append the value string to state.op, enlarging buffer if needed first */
610 while (state->curop - state->op + lenval + 1 >= state->lenop)
611 {
612 int used = state->curop - state->op;
613
614 state->lenop *= 2;
615 state->op = (char *) repalloc((void *) state->op, state->lenop);
616 state->curop = state->op + used;
617 }
618 memcpy((void *) state->curop, (void *) strval, lenval);
619 state->curop += lenval;
620 *(state->curop) = '\0';
621 state->curop++;
622 state->sumlen += lenval + 1 /* \0 */ ;
623}
624
625
626/*
627 * Push a stopword placeholder to state->polstr
628 */
629void
630pushStop(TSQueryParserState state)
631{
632 QueryOperand *tmp;
633
634 tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
635 tmp->type = QI_VALSTOP;
636
637 state->polstr = lcons(tmp, state->polstr);
638}
639
640
641#define STACKDEPTH 32
642
643typedef struct OperatorElement
644{
645 int8 op;
646 int16 distance;
647} OperatorElement;
648
649static void
650pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance)
651{
652 if (*lenstack == STACKDEPTH) /* internal error */
653 elog(ERROR, "tsquery stack too small");
654
655 stack[*lenstack].op = op;
656 stack[*lenstack].distance = distance;
657
658 (*lenstack)++;
659}
660
661static void
662cleanOpStack(TSQueryParserState state,
663 OperatorElement *stack, int *lenstack, int8 op)
664{
665 int opPriority = OP_PRIORITY(op);
666
667 while (*lenstack)
668 {
669 /* NOT is right associative unlike to others */
670 if ((op != OP_NOT && opPriority > OP_PRIORITY(stack[*lenstack - 1].op)) ||
671 (op == OP_NOT && opPriority >= OP_PRIORITY(stack[*lenstack - 1].op)))
672 break;
673
674 (*lenstack)--;
675 pushOperator(state, stack[*lenstack].op,
676 stack[*lenstack].distance);
677 }
678}
679
680/*
681 * Make polish (prefix) notation of query.
682 *
683 * See parse_tsquery for explanation of pushval.
684 */
685static void
686makepol(TSQueryParserState state,
687 PushFunction pushval,
688 Datum opaque)
689{
690 int8 operator = 0;
691 ts_tokentype type;
692 int lenval = 0;
693 char *strval = NULL;
694 OperatorElement opstack[STACKDEPTH];
695 int lenstack = 0;
696 int16 weight = 0;
697 bool prefix;
698
699 /* since this function recurses, it could be driven to stack overflow */
700 check_stack_depth();
701
702 while ((type = state->gettoken(state, &operator,
703 &lenval, &strval,
704 &weight, &prefix)) != PT_END)
705 {
706 switch (type)
707 {
708 case PT_VAL:
709 pushval(opaque, state, strval, lenval, weight, prefix);
710 break;
711 case PT_OPR:
712 cleanOpStack(state, opstack, &lenstack, operator);
713 pushOpStack(opstack, &lenstack, operator, weight);
714 break;
715 case PT_OPEN:
716 makepol(state, pushval, opaque);
717 break;
718 case PT_CLOSE:
719 cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
720 return;
721 case PT_ERR:
722 default:
723 ereport(ERROR,
724 (errcode(ERRCODE_SYNTAX_ERROR),
725 errmsg("syntax error in tsquery: \"%s\"",
726 state->buffer)));
727 }
728 }
729
730 cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
731}
732
733static void
734findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup)
735{
736 /* since this function recurses, it could be driven to stack overflow. */
737 check_stack_depth();
738
739 if (*pos >= nnodes)
740 elog(ERROR, "malformed tsquery: operand not found");
741
742 if (ptr[*pos].type == QI_VAL)
743 {
744 (*pos)++;
745 }
746 else if (ptr[*pos].type == QI_VALSTOP)
747 {
748 *needcleanup = true; /* we'll have to remove stop words */
749 (*pos)++;
750 }
751 else
752 {
753 Assert(ptr[*pos].type == QI_OPR);
754
755 if (ptr[*pos].qoperator.oper == OP_NOT)
756 {
757 ptr[*pos].qoperator.left = 1; /* fixed offset */
758 (*pos)++;
759
760 /* process the only argument */
761 findoprnd_recurse(ptr, pos, nnodes, needcleanup);
762 }
763 else
764 {
765 QueryOperator *curitem = &ptr[*pos].qoperator;
766 int tmp = *pos; /* save current position */
767
768 Assert(curitem->oper == OP_AND ||
769 curitem->oper == OP_OR ||
770 curitem->oper == OP_PHRASE);
771
772 (*pos)++;
773
774 /* process RIGHT argument */
775 findoprnd_recurse(ptr, pos, nnodes, needcleanup);
776
777 curitem->left = *pos - tmp; /* set LEFT arg's offset */
778
779 /* process LEFT argument */
780 findoprnd_recurse(ptr, pos, nnodes, needcleanup);
781 }
782 }
783}
784
785
786/*
787 * Fill in the left-fields previously left unfilled.
788 * The input QueryItems must be in polish (prefix) notation.
789 * Also, set *needcleanup to true if there are any QI_VALSTOP nodes.
790 */
791static void
792findoprnd(QueryItem *ptr, int size, bool *needcleanup)
793{
794 uint32 pos;
795
796 *needcleanup = false;
797 pos = 0;
798 findoprnd_recurse(ptr, &pos, size, needcleanup);
799
800 if (pos != size)
801 elog(ERROR, "malformed tsquery: extra nodes");
802}
803
804
805/*
806 * Each value (operand) in the query is passed to pushval. pushval can
807 * transform the simple value to an arbitrarily complex expression using
808 * pushValue and pushOperator. It must push a single value with pushValue,
809 * a complete expression with all operands, or a stopword placeholder
810 * with pushStop, otherwise the prefix notation representation will be broken,
811 * having an operator with no operand.
812 *
813 * opaque is passed on to pushval as is, pushval can use it to store its
814 * private state.
815 */
816TSQuery
817parse_tsquery(char *buf,
818 PushFunction pushval,
819 Datum opaque,
820 int flags)
821{
822 struct TSQueryParserStateData state;
823 int i;
824 TSQuery query;
825 int commonlen;
826 QueryItem *ptr;
827 ListCell *cell;
828 bool needcleanup;
829 int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
830
831 /* plain should not be used with web */
832 Assert((flags & (P_TSQ_PLAIN | P_TSQ_WEB)) != (P_TSQ_PLAIN | P_TSQ_WEB));
833
834 /* select suitable tokenizer */
835 if (flags & P_TSQ_PLAIN)
836 state.gettoken = gettoken_query_plain;
837 else if (flags & P_TSQ_WEB)
838 {
839 state.gettoken = gettoken_query_websearch;
840 tsv_flags |= P_TSV_IS_WEB;
841 }
842 else
843 state.gettoken = gettoken_query_standard;
844
845 /* init state */
846 state.buffer = buf;
847 state.buf = buf;
848 state.count = 0;
849 state.in_quotes = false;
850 state.state = WAITFIRSTOPERAND;
851 state.polstr = NIL;
852
853 /* init value parser's state */
854 state.valstate = init_tsvector_parser(state.buffer, tsv_flags);
855
856 /* init list of operand */
857 state.sumlen = 0;
858 state.lenop = 64;
859 state.curop = state.op = (char *) palloc(state.lenop);
860 *(state.curop) = '\0';
861
862 /* parse query & make polish notation (postfix, but in reverse order) */
863 makepol(&state, pushval, opaque);
864
865 close_tsvector_parser(state.valstate);
866
867 if (list_length(state.polstr) == 0)
868 {
869 ereport(NOTICE,
870 (errmsg("text-search query doesn't contain lexemes: \"%s\"",
871 state.buffer)));
872 query = (TSQuery) palloc(HDRSIZETQ);
873 SET_VARSIZE(query, HDRSIZETQ);
874 query->size = 0;
875 return query;
876 }
877
878 if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen))
879 ereport(ERROR,
880 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
881 errmsg("tsquery is too large")));
882 commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
883
884 /* Pack the QueryItems in the final TSQuery struct to return to caller */
885 query = (TSQuery) palloc0(commonlen);
886 SET_VARSIZE(query, commonlen);
887 query->size = list_length(state.polstr);
888 ptr = GETQUERY(query);
889
890 /* Copy QueryItems to TSQuery */
891 i = 0;
892 foreach(cell, state.polstr)
893 {
894 QueryItem *item = (QueryItem *) lfirst(cell);
895
896 switch (item->type)
897 {
898 case QI_VAL:
899 memcpy(&ptr[i], item, sizeof(QueryOperand));
900 break;
901 case QI_VALSTOP:
902 ptr[i].type = QI_VALSTOP;
903 break;
904 case QI_OPR:
905 memcpy(&ptr[i], item, sizeof(QueryOperator));
906 break;
907 default:
908 elog(ERROR, "unrecognized QueryItem type: %d", item->type);
909 }
910 i++;
911 }
912
913 /* Copy all the operand strings to TSQuery */
914 memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
915 pfree(state.op);
916
917 /*
918 * Set left operand pointers for every operator. While we're at it,
919 * detect whether there are any QI_VALSTOP nodes.
920 */
921 findoprnd(ptr, query->size, &needcleanup);
922
923 /*
924 * If there are QI_VALSTOP nodes, delete them and simplify the tree.
925 */
926 if (needcleanup)
927 query = cleanup_tsquery_stopwords(query);
928
929 return query;
930}
931
932static void
933pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
934 int16 weight, bool prefix)
935{
936 pushValue(state, strval, lenval, weight, prefix);
937}
938
939/*
940 * in without morphology
941 */
942Datum
943tsqueryin(PG_FUNCTION_ARGS)
944{
945 char *in = PG_GETARG_CSTRING(0);
946
947 PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), 0));
948}
949
950/*
951 * out function
952 */
953typedef struct
954{
955 QueryItem *curpol;
956 char *buf;
957 char *cur;
958 char *op;
959 int buflen;
960} INFIX;
961
962/* Makes sure inf->buf is large enough for adding 'addsize' bytes */
963#define RESIZEBUF(inf, addsize) \
964while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
965{ \
966 int len = (inf)->cur - (inf)->buf; \
967 (inf)->buflen *= 2; \
968 (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
969 (inf)->cur = (inf)->buf + len; \
970}
971
972/*
973 * recursively traverse the tree and
974 * print it in infix (human-readable) form
975 */
976static void
977infix(INFIX *in, int parentPriority, bool rightPhraseOp)
978{
979 /* since this function recurses, it could be driven to stack overflow. */
980 check_stack_depth();
981
982 if (in->curpol->type == QI_VAL)
983 {
984 QueryOperand *curpol = &in->curpol->qoperand;
985 char *op = in->op + curpol->distance;
986 int clen;
987
988 RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
989 *(in->cur) = '\'';
990 in->cur++;
991 while (*op)
992 {
993 if (t_iseq(op, '\''))
994 {
995 *(in->cur) = '\'';
996 in->cur++;
997 }
998 else if (t_iseq(op, '\\'))
999 {
1000 *(in->cur) = '\\';
1001 in->cur++;
1002 }
1003 COPYCHAR(in->cur, op);
1004
1005 clen = pg_mblen(op);
1006 op += clen;
1007 in->cur += clen;
1008 }
1009 *(in->cur) = '\'';
1010 in->cur++;
1011 if (curpol->weight || curpol->prefix)
1012 {
1013 *(in->cur) = ':';
1014 in->cur++;
1015 if (curpol->prefix)
1016 {
1017 *(in->cur) = '*';
1018 in->cur++;
1019 }
1020 if (curpol->weight & (1 << 3))
1021 {
1022 *(in->cur) = 'A';
1023 in->cur++;
1024 }
1025 if (curpol->weight & (1 << 2))
1026 {
1027 *(in->cur) = 'B';
1028 in->cur++;
1029 }
1030 if (curpol->weight & (1 << 1))
1031 {
1032 *(in->cur) = 'C';
1033 in->cur++;
1034 }
1035 if (curpol->weight & 1)
1036 {
1037 *(in->cur) = 'D';
1038 in->cur++;
1039 }
1040 }
1041 *(in->cur) = '\0';
1042 in->curpol++;
1043 }
1044 else if (in->curpol->qoperator.oper == OP_NOT)
1045 {
1046 int priority = QO_PRIORITY(in->curpol);
1047
1048 if (priority < parentPriority)
1049 {
1050 RESIZEBUF(in, 2);
1051 sprintf(in->cur, "( ");
1052 in->cur = strchr(in->cur, '\0');
1053 }
1054 RESIZEBUF(in, 1);
1055 *(in->cur) = '!';
1056 in->cur++;
1057 *(in->cur) = '\0';
1058 in->curpol++;
1059
1060 infix(in, priority, false);
1061 if (priority < parentPriority)
1062 {
1063 RESIZEBUF(in, 2);
1064 sprintf(in->cur, " )");
1065 in->cur = strchr(in->cur, '\0');
1066 }
1067 }
1068 else
1069 {
1070 int8 op = in->curpol->qoperator.oper;
1071 int priority = QO_PRIORITY(in->curpol);
1072 int16 distance = in->curpol->qoperator.distance;
1073 INFIX nrm;
1074 bool needParenthesis = false;
1075
1076 in->curpol++;
1077 if (priority < parentPriority ||
1078 /* phrase operator depends on order */
1079 (op == OP_PHRASE && rightPhraseOp))
1080 {
1081 needParenthesis = true;
1082 RESIZEBUF(in, 2);
1083 sprintf(in->cur, "( ");
1084 in->cur = strchr(in->cur, '\0');
1085 }
1086
1087 nrm.curpol = in->curpol;
1088 nrm.op = in->op;
1089 nrm.buflen = 16;
1090 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
1091
1092 /* get right operand */
1093 infix(&nrm, priority, (op == OP_PHRASE));
1094
1095 /* get & print left operand */
1096 in->curpol = nrm.curpol;
1097 infix(in, priority, false);
1098
1099 /* print operator & right operand */
1100 RESIZEBUF(in, 3 + (2 + 10 /* distance */ ) + (nrm.cur - nrm.buf));
1101 switch (op)
1102 {
1103 case OP_OR:
1104 sprintf(in->cur, " | %s", nrm.buf);
1105 break;
1106 case OP_AND:
1107 sprintf(in->cur, " & %s", nrm.buf);
1108 break;
1109 case OP_PHRASE:
1110 if (distance != 1)
1111 sprintf(in->cur, " <%d> %s", distance, nrm.buf);
1112 else
1113 sprintf(in->cur, " <-> %s", nrm.buf);
1114 break;
1115 default:
1116 /* OP_NOT is handled in above if-branch */
1117 elog(ERROR, "unrecognized operator type: %d", op);
1118 }
1119 in->cur = strchr(in->cur, '\0');
1120 pfree(nrm.buf);
1121
1122 if (needParenthesis)
1123 {
1124 RESIZEBUF(in, 2);
1125 sprintf(in->cur, " )");
1126 in->cur = strchr(in->cur, '\0');
1127 }
1128 }
1129}
1130
1131Datum
1132tsqueryout(PG_FUNCTION_ARGS)
1133{
1134 TSQuery query = PG_GETARG_TSQUERY(0);
1135 INFIX nrm;
1136
1137 if (query->size == 0)
1138 {
1139 char *b = palloc(1);
1140
1141 *b = '\0';
1142 PG_RETURN_POINTER(b);
1143 }
1144 nrm.curpol = GETQUERY(query);
1145 nrm.buflen = 32;
1146 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
1147 *(nrm.cur) = '\0';
1148 nrm.op = GETOPERAND(query);
1149 infix(&nrm, -1 /* lowest priority */ , false);
1150
1151 PG_FREE_IF_COPY(query, 0);
1152 PG_RETURN_CSTRING(nrm.buf);
1153}
1154
1155/*
1156 * Binary Input / Output functions. The binary format is as follows:
1157 *
1158 * uint32 number of operators/operands in the query
1159 *
1160 * Followed by the operators and operands, in prefix notation. For each
1161 * operand:
1162 *
1163 * uint8 type, QI_VAL
1164 * uint8 weight
1165 * operand text in client encoding, null-terminated
1166 * uint8 prefix
1167 *
1168 * For each operator:
1169 * uint8 type, QI_OPR
1170 * uint8 operator, one of OP_AND, OP_PHRASE OP_OR, OP_NOT.
1171 * uint16 distance (only for OP_PHRASE)
1172 */
1173Datum
1174tsquerysend(PG_FUNCTION_ARGS)
1175{
1176 TSQuery query = PG_GETARG_TSQUERY(0);
1177 StringInfoData buf;
1178 int i;
1179 QueryItem *item = GETQUERY(query);
1180
1181 pq_begintypsend(&buf);
1182
1183 pq_sendint32(&buf, query->size);
1184 for (i = 0; i < query->size; i++)
1185 {
1186 pq_sendint8(&buf, item->type);
1187
1188 switch (item->type)
1189 {
1190 case QI_VAL:
1191 pq_sendint8(&buf, item->qoperand.weight);
1192 pq_sendint8(&buf, item->qoperand.prefix);
1193 pq_sendstring(&buf, GETOPERAND(query) + item->qoperand.distance);
1194 break;
1195 case QI_OPR:
1196 pq_sendint8(&buf, item->qoperator.oper);
1197 if (item->qoperator.oper == OP_PHRASE)
1198 pq_sendint16(&buf, item->qoperator.distance);
1199 break;
1200 default:
1201 elog(ERROR, "unrecognized tsquery node type: %d", item->type);
1202 }
1203 item++;
1204 }
1205
1206 PG_FREE_IF_COPY(query, 0);
1207
1208 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
1209}
1210
1211Datum
1212tsqueryrecv(PG_FUNCTION_ARGS)
1213{
1214 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
1215 TSQuery query;
1216 int i,
1217 len;
1218 QueryItem *item;
1219 int datalen;
1220 char *ptr;
1221 uint32 size;
1222 const char **operands;
1223 bool needcleanup;
1224
1225 size = pq_getmsgint(buf, sizeof(uint32));
1226 if (size > (MaxAllocSize / sizeof(QueryItem)))
1227 elog(ERROR, "invalid size of tsquery");
1228
1229 /* Allocate space to temporarily hold operand strings */
1230 operands = palloc(size * sizeof(char *));
1231
1232 /* Allocate space for all the QueryItems. */
1233 len = HDRSIZETQ + sizeof(QueryItem) * size;
1234 query = (TSQuery) palloc0(len);
1235 query->size = size;
1236 item = GETQUERY(query);
1237
1238 datalen = 0;
1239 for (i = 0; i < size; i++)
1240 {
1241 item->type = (int8) pq_getmsgint(buf, sizeof(int8));
1242
1243 if (item->type == QI_VAL)
1244 {
1245 size_t val_len; /* length after recoding to server
1246 * encoding */
1247 uint8 weight;
1248 uint8 prefix;
1249 const char *val;
1250 pg_crc32 valcrc;
1251
1252 weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
1253 prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
1254 val = pq_getmsgstring(buf);
1255 val_len = strlen(val);
1256
1257 /* Sanity checks */
1258
1259 if (weight > 0xF)
1260 elog(ERROR, "invalid tsquery: invalid weight bitmap");
1261
1262 if (val_len > MAXSTRLEN)
1263 elog(ERROR, "invalid tsquery: operand too long");
1264
1265 if (datalen > MAXSTRPOS)
1266 elog(ERROR, "invalid tsquery: total operand length exceeded");
1267
1268 /* Looks valid. */
1269
1270 INIT_LEGACY_CRC32(valcrc);
1271 COMP_LEGACY_CRC32(valcrc, val, val_len);
1272 FIN_LEGACY_CRC32(valcrc);
1273
1274 item->qoperand.weight = weight;
1275 item->qoperand.prefix = (prefix) ? true : false;
1276 item->qoperand.valcrc = (int32) valcrc;
1277 item->qoperand.length = val_len;
1278 item->qoperand.distance = datalen;
1279
1280 /*
1281 * Operand strings are copied to the final struct after this loop;
1282 * here we just collect them to an array
1283 */
1284 operands[i] = val;
1285
1286 datalen += val_len + 1; /* + 1 for the '\0' terminator */
1287 }
1288 else if (item->type == QI_OPR)
1289 {
1290 int8 oper;
1291
1292 oper = (int8) pq_getmsgint(buf, sizeof(int8));
1293 if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE)
1294 elog(ERROR, "invalid tsquery: unrecognized operator type %d",
1295 (int) oper);
1296 if (i == size - 1)
1297 elog(ERROR, "invalid pointer to right operand");
1298
1299 item->qoperator.oper = oper;
1300 if (oper == OP_PHRASE)
1301 item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16));
1302 }
1303 else
1304 elog(ERROR, "unrecognized tsquery node type: %d", item->type);
1305
1306 item++;
1307 }
1308
1309 /* Enlarge buffer to make room for the operand values. */
1310 query = (TSQuery) repalloc(query, len + datalen);
1311 item = GETQUERY(query);
1312 ptr = GETOPERAND(query);
1313
1314 /*
1315 * Fill in the left-pointers. Checks that the tree is well-formed as a
1316 * side-effect.
1317 */
1318 findoprnd(item, size, &needcleanup);
1319
1320 /* Can't have found any QI_VALSTOP nodes */
1321 Assert(!needcleanup);
1322
1323 /* Copy operands to output struct */
1324 for (i = 0; i < size; i++)
1325 {
1326 if (item->type == QI_VAL)
1327 {
1328 memcpy(ptr, operands[i], item->qoperand.length + 1);
1329 ptr += item->qoperand.length + 1;
1330 }
1331 item++;
1332 }
1333
1334 pfree(operands);
1335
1336 Assert(ptr - GETOPERAND(query) == datalen);
1337
1338 SET_VARSIZE(query, len + datalen);
1339
1340 PG_RETURN_TSQUERY(query);
1341}
1342
1343/*
1344 * debug function, used only for view query
1345 * which will be executed in non-leaf pages in index
1346 */
1347Datum
1348tsquerytree(PG_FUNCTION_ARGS)
1349{
1350 TSQuery query = PG_GETARG_TSQUERY(0);
1351 INFIX nrm;
1352 text *res;
1353 QueryItem *q;
1354 int len;
1355
1356 if (query->size == 0)
1357 {
1358 res = (text *) palloc(VARHDRSZ);
1359 SET_VARSIZE(res, VARHDRSZ);
1360 PG_RETURN_POINTER(res);
1361 }
1362
1363 q = clean_NOT(GETQUERY(query), &len);
1364
1365 if (!q)
1366 {
1367 res = cstring_to_text("T");
1368 }
1369 else
1370 {
1371 nrm.curpol = q;
1372 nrm.buflen = 32;
1373 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
1374 *(nrm.cur) = '\0';
1375 nrm.op = GETOPERAND(query);
1376 infix(&nrm, -1, false);
1377 res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
1378 pfree(q);
1379 }
1380
1381 PG_FREE_IF_COPY(query, 0);
1382
1383 PG_RETURN_TEXT_P(res);
1384}
1385