1/*-------------------------------------------------------------------------
2 *
3 * ts_utils.h
4 * helper utilities for tsearch
5 *
6 * Copyright (c) 1998-2019, PostgreSQL Global Development Group
7 *
8 * src/include/tsearch/ts_utils.h
9 *
10 *-------------------------------------------------------------------------
11 */
12#ifndef _PG_TS_UTILS_H_
13#define _PG_TS_UTILS_H_
14
15#include "nodes/pg_list.h"
16#include "tsearch/ts_public.h"
17#include "tsearch/ts_type.h"
18
19/*
20 * Common parse definitions for tsvector and tsquery
21 */
22
23/* tsvector parser support. */
24
25struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */
26typedef struct TSVectorParseStateData *TSVectorParseState;
27
28#define P_TSV_OPR_IS_DELIM (1 << 0)
29#define P_TSV_IS_TSQUERY (1 << 1)
30#define P_TSV_IS_WEB (1 << 2)
31
32extern TSVectorParseState init_tsvector_parser(char *input, int flags);
33extern void reset_tsvector_parser(TSVectorParseState state, char *input);
34extern bool gettoken_tsvector(TSVectorParseState state,
35 char **token, int *len,
36 WordEntryPos **pos, int *poslen,
37 char **endptr);
38extern void close_tsvector_parser(TSVectorParseState state);
39
40/* phrase operator begins with '<' */
41#define ISOPERATOR(x) \
42 ( pg_mblen(x) == 1 && ( *(x) == '!' || \
43 *(x) == '&' || \
44 *(x) == '|' || \
45 *(x) == '(' || \
46 *(x) == ')' || \
47 *(x) == '<' \
48 ) )
49
50/* parse_tsquery */
51
52struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */
53typedef struct TSQueryParserStateData *TSQueryParserState;
54
55typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
56 char *token, int tokenlen,
57 int16 tokenweights, /* bitmap as described in
58 * QueryOperand struct */
59 bool prefix);
60
61#define P_TSQ_PLAIN (1 << 0)
62#define P_TSQ_WEB (1 << 1)
63
64extern TSQuery parse_tsquery(char *buf,
65 PushFunction pushval,
66 Datum opaque,
67 int flags);
68
69/* Functions for use by PushFunction implementations */
70extern void pushValue(TSQueryParserState state,
71 char *strval, int lenval, int16 weight, bool prefix);
72extern void pushStop(TSQueryParserState state);
73extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance);
74
75/*
76 * parse plain text and lexize words
77 */
78typedef struct
79{
80 uint16 len;
81 uint16 nvariant;
82 union
83 {
84 uint16 pos;
85
86 /*
87 * When apos array is used, apos[0] is the number of elements in the
88 * array (excluding apos[0]), and alen is the allocated size of the
89 * array.
90 */
91 uint16 *apos;
92 } pos;
93 uint16 flags; /* currently, only TSL_PREFIX */
94 char *word;
95 uint32 alen;
96} ParsedWord;
97
98typedef struct
99{
100 ParsedWord *words;
101 int32 lenwords;
102 int32 curwords;
103 int32 pos;
104} ParsedText;
105
106extern void parsetext(Oid cfgId, ParsedText *prs, char *buf, int32 buflen);
107
108/*
109 * headline framework, flow in common to generate:
110 * 1 parse text with hlparsetext
111 * 2 parser-specific function to find part
112 * 3 generateHeadline to generate result text
113 */
114
115extern void hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query,
116 char *buf, int32 buflen);
117extern text *generateHeadline(HeadlineParsedText *prs);
118
119/*
120 * TSQuery execution support
121 *
122 * TS_execute() executes a tsquery against data that can be represented in
123 * various forms. The TSExecuteCallback callback function is called to check
124 * whether a given primitive tsquery value is matched in the data.
125 */
126
127/*
128 * struct ExecPhraseData is passed to a TSExecuteCallback function if we need
129 * lexeme position data (because of a phrase-match operator in the tsquery).
130 * The callback should fill in position data when it returns true (success).
131 * If it cannot return position data, it may leave "data" unchanged, but
132 * then the caller of TS_execute() must pass the TS_EXEC_PHRASE_NO_POS flag
133 * and must arrange for a later recheck with position data available.
134 *
135 * The reported lexeme positions must be sorted and unique. Callers must only
136 * consult the position bits of the pos array, ie, WEP_GETPOS(data->pos[i]).
137 * This allows the returned "pos" to point directly to the WordEntryPos
138 * portion of a tsvector value. If "allocated" is true then the pos array
139 * is palloc'd workspace and caller may free it when done.
140 *
141 * "negate" means that the pos array contains positions where the query does
142 * not match, rather than positions where it does. "width" is positive when
143 * the match is wider than one lexeme. Neither of these fields normally need
144 * to be touched by TSExecuteCallback functions; they are used for
145 * phrase-search processing within TS_execute.
146 *
147 * All fields of the ExecPhraseData struct are initially zeroed by caller.
148 */
149typedef struct ExecPhraseData
150{
151 int npos; /* number of positions reported */
152 bool allocated; /* pos points to palloc'd data? */
153 bool negate; /* positions are where query is NOT matched */
154 WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */
155 int width; /* width of match in lexemes, less 1 */
156} ExecPhraseData;
157
158/*
159 * Signature for TSQuery lexeme check functions
160 *
161 * arg: opaque value passed through from caller of TS_execute
162 * val: lexeme to test for presence of
163 * data: to be filled with lexeme positions; NULL if position data not needed
164 *
165 * Return true if lexeme is present in data, else false. If data is not
166 * NULL, it should be filled with lexeme positions, but function can leave
167 * it as zeroes if position data is not available.
168 */
169typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val,
170 ExecPhraseData *data);
171
172/*
173 * Flag bits for TS_execute
174 */
175#define TS_EXEC_EMPTY (0x00)
176/*
177 * If TS_EXEC_CALC_NOT is not set, then NOT expressions are automatically
178 * evaluated to be true. Useful in cases where NOT cannot be accurately
179 * computed (GiST) or it isn't important (ranking). From TS_execute's
180 * perspective, !CALC_NOT means that the TSExecuteCallback function might
181 * return false-positive indications of a lexeme's presence.
182 */
183#define TS_EXEC_CALC_NOT (0x01)
184/*
185 * If TS_EXEC_PHRASE_NO_POS is set, allow OP_PHRASE to be executed lossily
186 * in the absence of position information: a true result indicates that the
187 * phrase might be present. Without this flag, OP_PHRASE always returns
188 * false if lexeme position information is not available.
189 */
190#define TS_EXEC_PHRASE_NO_POS (0x02)
191
192extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
193 TSExecuteCallback chkcond);
194extern bool tsquery_requires_match(QueryItem *curitem);
195
196/*
197 * to_ts* - text transformation to tsvector, tsquery
198 */
199extern TSVector make_tsvector(ParsedText *prs);
200extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix);
201
202/*
203 * Possible strategy numbers for indexes
204 * TSearchStrategyNumber - (tsvector|text) @@ tsquery
205 * TSearchWithClassStrategyNumber - tsvector @@@ tsquery
206 */
207#define TSearchStrategyNumber 1
208#define TSearchWithClassStrategyNumber 2
209
210/*
211 * TSQuery Utilities
212 */
213extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
214extern TSQuery cleanup_tsquery_stopwords(TSQuery in);
215
216typedef struct QTNode
217{
218 QueryItem *valnode;
219 uint32 flags;
220 int32 nchild;
221 char *word;
222 uint32 sign;
223 struct QTNode **child;
224} QTNode;
225
226/* bits in QTNode.flags */
227#define QTN_NEEDFREE 0x01
228#define QTN_NOCHANGE 0x02
229#define QTN_WORDFREE 0x04
230
231typedef uint64 TSQuerySign;
232
233#define TSQS_SIGLEN (sizeof(TSQuerySign)*BITS_PER_BYTE)
234
235#define TSQuerySignGetDatum(X) Int64GetDatum((int64) (X))
236#define DatumGetTSQuerySign(X) ((TSQuerySign) DatumGetInt64(X))
237#define PG_RETURN_TSQUERYSIGN(X) return TSQuerySignGetDatum(X)
238#define PG_GETARG_TSQUERYSIGN(n) DatumGetTSQuerySign(PG_GETARG_DATUM(n))
239
240
241extern QTNode *QT2QTN(QueryItem *in, char *operand);
242extern TSQuery QTN2QT(QTNode *in);
243extern void QTNFree(QTNode *in);
244extern void QTNSort(QTNode *in);
245extern void QTNTernary(QTNode *in);
246extern void QTNBinary(QTNode *in);
247extern int QTNodeCompare(QTNode *an, QTNode *bn);
248extern QTNode *QTNCopy(QTNode *in);
249extern void QTNClearFlags(QTNode *in, uint32 flags);
250extern bool QTNEq(QTNode *a, QTNode *b);
251extern TSQuerySign makeTSQuerySign(TSQuery a);
252extern QTNode *findsubquery(QTNode *root, QTNode *ex, QTNode *subs,
253 bool *isfind);
254
255#endif /* _PG_TS_UTILS_H_ */
256