1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * ts_type.h |
4 | * Definitions for the tsvector and tsquery types |
5 | * |
6 | * Copyright (c) 1998-2019, PostgreSQL Global Development Group |
7 | * |
8 | * src/include/tsearch/ts_type.h |
9 | * |
10 | *------------------------------------------------------------------------- |
11 | */ |
12 | #ifndef _PG_TSTYPE_H_ |
13 | #define _PG_TSTYPE_H_ |
14 | |
15 | #include "fmgr.h" |
16 | #include "utils/memutils.h" |
17 | |
18 | |
19 | /* |
20 | * TSVector type. |
21 | * |
22 | * Structure of tsvector datatype: |
23 | * 1) standard varlena header |
24 | * 2) int32 size - number of lexemes (WordEntry array entries) |
25 | * 3) Array of WordEntry - one per lexeme; must be sorted according to |
26 | * tsCompareString() (ie, memcmp of lexeme strings). |
27 | * WordEntry->pos gives the number of bytes from end of WordEntry |
28 | * array to start of lexeme's string, which is of length len. |
29 | * 4) Per-lexeme data storage: |
30 | * lexeme string (not null-terminated) |
31 | * if haspos is true: |
32 | * padding byte if necessary to make the position data 2-byte aligned |
33 | * uint16 number of positions that follow |
34 | * WordEntryPos[] positions |
35 | * |
36 | * The positions for each lexeme must be sorted. |
37 | * |
38 | * Note, tsvectorsend/recv believe that sizeof(WordEntry) == 4 |
39 | */ |
40 | |
41 | typedef struct |
42 | { |
43 | uint32 |
44 | haspos:1, |
45 | len:11, /* MAX 2Kb */ |
46 | pos:20; /* MAX 1Mb */ |
47 | } WordEntry; |
48 | |
49 | #define MAXSTRLEN ( (1<<11) - 1) |
50 | #define MAXSTRPOS ( (1<<20) - 1) |
51 | |
52 | extern int compareWordEntryPos(const void *a, const void *b); |
53 | |
54 | /* |
55 | * Equivalent to |
56 | * typedef struct { |
57 | * uint16 |
58 | * weight:2, |
59 | * pos:14; |
60 | * } |
61 | */ |
62 | |
63 | typedef uint16 WordEntryPos; |
64 | |
65 | typedef struct |
66 | { |
67 | uint16 npos; |
68 | WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER]; |
69 | } WordEntryPosVector; |
70 | |
71 | /* WordEntryPosVector with exactly 1 entry */ |
72 | typedef struct |
73 | { |
74 | uint16 npos; |
75 | WordEntryPos pos[1]; |
76 | } WordEntryPosVector1; |
77 | |
78 | |
79 | #define WEP_GETWEIGHT(x) ( (x) >> 14 ) |
80 | #define WEP_GETPOS(x) ( (x) & 0x3fff ) |
81 | |
82 | #define WEP_SETWEIGHT(x,v) ( (x) = ( (v) << 14 ) | ( (x) & 0x3fff ) ) |
83 | #define WEP_SETPOS(x,v) ( (x) = ( (x) & 0xc000 ) | ( (v) & 0x3fff ) ) |
84 | |
85 | #define MAXENTRYPOS (1<<14) |
86 | #define MAXNUMPOS (256) |
87 | #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) ) |
88 | |
89 | /* This struct represents a complete tsvector datum */ |
90 | typedef struct |
91 | { |
92 | int32 vl_len_; /* varlena header (do not touch directly!) */ |
93 | int32 size; |
94 | WordEntry entries[FLEXIBLE_ARRAY_MEMBER]; |
95 | /* lexemes follow the entries[] array */ |
96 | } TSVectorData; |
97 | |
98 | typedef TSVectorData *TSVector; |
99 | |
100 | #define DATAHDRSIZE (offsetof(TSVectorData, entries)) |
101 | #define CALCDATASIZE(nentries, lenstr) (DATAHDRSIZE + (nentries) * sizeof(WordEntry) + (lenstr) ) |
102 | |
103 | /* pointer to start of a tsvector's WordEntry array */ |
104 | #define ARRPTR(x) ( (x)->entries ) |
105 | |
106 | /* pointer to start of a tsvector's lexeme storage */ |
107 | #define STRPTR(x) ( (char *) &(x)->entries[(x)->size] ) |
108 | |
109 | #define _POSVECPTR(x, e) ((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len))) |
110 | #define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 ) |
111 | #define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos) |
112 | |
113 | /* |
114 | * fmgr interface macros |
115 | */ |
116 | |
117 | #define DatumGetTSVector(X) ((TSVector) PG_DETOAST_DATUM(X)) |
118 | #define DatumGetTSVectorCopy(X) ((TSVector) PG_DETOAST_DATUM_COPY(X)) |
119 | #define TSVectorGetDatum(X) PointerGetDatum(X) |
120 | #define PG_GETARG_TSVECTOR(n) DatumGetTSVector(PG_GETARG_DATUM(n)) |
121 | #define PG_GETARG_TSVECTOR_COPY(n) DatumGetTSVectorCopy(PG_GETARG_DATUM(n)) |
122 | #define PG_RETURN_TSVECTOR(x) return TSVectorGetDatum(x) |
123 | |
124 | |
125 | /* |
126 | * TSQuery |
127 | * |
128 | * |
129 | */ |
130 | |
131 | typedef int8 QueryItemType; |
132 | |
133 | /* Valid values for QueryItemType: */ |
134 | #define QI_VAL 1 |
135 | #define QI_OPR 2 |
136 | #define QI_VALSTOP 3 /* This is only used in an intermediate stack |
137 | * representation in parse_tsquery. It's not a |
138 | * legal type elsewhere. */ |
139 | |
140 | /* |
141 | * QueryItem is one node in tsquery - operator or operand. |
142 | */ |
143 | typedef struct |
144 | { |
145 | QueryItemType type; /* operand or kind of operator (ts_tokentype) */ |
146 | uint8 weight; /* weights of operand to search. It's a |
147 | * bitmask of allowed weights. if it =0 then |
148 | * any weight are allowed. Weights and bit |
149 | * map: A: 1<<3 B: 1<<2 C: 1<<1 D: 1<<0 */ |
150 | bool prefix; /* true if it's a prefix search */ |
151 | int32 valcrc; /* XXX: pg_crc32 would be a more appropriate |
152 | * data type, but we use comparisons to signed |
153 | * integers in the code. They would need to be |
154 | * changed as well. */ |
155 | |
156 | /* pointer to text value of operand, must correlate with WordEntry */ |
157 | uint32 |
158 | length:12, |
159 | distance:20; |
160 | } QueryOperand; |
161 | |
162 | |
163 | /* |
164 | * Legal values for QueryOperator.operator. |
165 | */ |
166 | #define OP_NOT 1 |
167 | #define OP_AND 2 |
168 | #define OP_OR 3 |
169 | #define OP_PHRASE 4 /* highest code, tsquery_cleanup.c */ |
170 | #define OP_COUNT 4 |
171 | |
172 | extern const int tsearch_op_priority[OP_COUNT]; |
173 | |
174 | /* get operation priority by its code*/ |
175 | #define OP_PRIORITY(x) ( tsearch_op_priority[(x) - 1] ) |
176 | /* get QueryOperator priority */ |
177 | #define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper) |
178 | |
179 | typedef struct |
180 | { |
181 | QueryItemType type; |
182 | int8 oper; /* see above */ |
183 | int16 distance; /* distance between agrs for OP_PHRASE */ |
184 | uint32 left; /* pointer to left operand. Right operand is |
185 | * item + 1, left operand is placed |
186 | * item+item->left */ |
187 | } QueryOperator; |
188 | |
189 | /* |
190 | * Note: TSQuery is 4-bytes aligned, so make sure there's no fields |
191 | * inside QueryItem requiring 8-byte alignment, like int64. |
192 | */ |
193 | typedef union |
194 | { |
195 | QueryItemType type; |
196 | QueryOperator qoperator; |
197 | QueryOperand qoperand; |
198 | } QueryItem; |
199 | |
200 | /* |
201 | * Storage: |
202 | * (len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings) |
203 | */ |
204 | |
205 | typedef struct |
206 | { |
207 | int32 vl_len_; /* varlena header (do not touch directly!) */ |
208 | int32 size; /* number of QueryItems */ |
209 | char data[FLEXIBLE_ARRAY_MEMBER]; /* data starts here */ |
210 | } TSQueryData; |
211 | |
212 | typedef TSQueryData *TSQuery; |
213 | |
214 | #define HDRSIZETQ ( VARHDRSZ + sizeof(int32) ) |
215 | |
216 | /* Computes the size of header and all QueryItems. size is the number of |
217 | * QueryItems, and lenofoperand is the total length of all operands |
218 | */ |
219 | #define COMPUTESIZE(size, lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) ) |
220 | #define TSQUERY_TOO_BIG(size, lenofoperand) \ |
221 | ((size) > (MaxAllocSize - HDRSIZETQ - (lenofoperand)) / sizeof(QueryItem)) |
222 | |
223 | /* Returns a pointer to the first QueryItem in a TSQuery */ |
224 | #define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ )) |
225 | |
226 | /* Returns a pointer to the beginning of operands in a TSQuery */ |
227 | #define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) ) |
228 | |
229 | /* |
230 | * fmgr interface macros |
231 | * Note, TSQuery type marked as plain storage, so it can't be toasted |
232 | * but PG_DETOAST_DATUM_COPY is used for simplicity |
233 | */ |
234 | |
235 | #define DatumGetTSQuery(X) ((TSQuery) DatumGetPointer(X)) |
236 | #define DatumGetTSQueryCopy(X) ((TSQuery) PG_DETOAST_DATUM_COPY(X)) |
237 | #define TSQueryGetDatum(X) PointerGetDatum(X) |
238 | #define PG_GETARG_TSQUERY(n) DatumGetTSQuery(PG_GETARG_DATUM(n)) |
239 | #define PG_GETARG_TSQUERY_COPY(n) DatumGetTSQueryCopy(PG_GETARG_DATUM(n)) |
240 | #define PG_RETURN_TSQUERY(x) return TSQueryGetDatum(x) |
241 | |
242 | #endif /* _PG_TSTYPE_H_ */ |
243 | |