1/*-------------------------------------------------------------------------
2 *
3 * ts_type.h
4 * Definitions for the tsvector and tsquery types
5 *
6 * Copyright (c) 1998-2019, PostgreSQL Global Development Group
7 *
8 * src/include/tsearch/ts_type.h
9 *
10 *-------------------------------------------------------------------------
11 */
12#ifndef _PG_TSTYPE_H_
13#define _PG_TSTYPE_H_
14
15#include "fmgr.h"
16#include "utils/memutils.h"
17
18
19/*
20 * TSVector type.
21 *
22 * Structure of tsvector datatype:
23 * 1) standard varlena header
24 * 2) int32 size - number of lexemes (WordEntry array entries)
25 * 3) Array of WordEntry - one per lexeme; must be sorted according to
26 * tsCompareString() (ie, memcmp of lexeme strings).
27 * WordEntry->pos gives the number of bytes from end of WordEntry
28 * array to start of lexeme's string, which is of length len.
29 * 4) Per-lexeme data storage:
30 * lexeme string (not null-terminated)
31 * if haspos is true:
32 * padding byte if necessary to make the position data 2-byte aligned
33 * uint16 number of positions that follow
34 * WordEntryPos[] positions
35 *
36 * The positions for each lexeme must be sorted.
37 *
38 * Note, tsvectorsend/recv believe that sizeof(WordEntry) == 4
39 */
40
41typedef struct
42{
43 uint32
44 haspos:1,
45 len:11, /* MAX 2Kb */
46 pos:20; /* MAX 1Mb */
47} WordEntry;
48
49#define MAXSTRLEN ( (1<<11) - 1)
50#define MAXSTRPOS ( (1<<20) - 1)
51
52extern int compareWordEntryPos(const void *a, const void *b);
53
54/*
55 * Equivalent to
56 * typedef struct {
57 * uint16
58 * weight:2,
59 * pos:14;
60 * }
61 */
62
63typedef uint16 WordEntryPos;
64
65typedef struct
66{
67 uint16 npos;
68 WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER];
69} WordEntryPosVector;
70
71/* WordEntryPosVector with exactly 1 entry */
72typedef struct
73{
74 uint16 npos;
75 WordEntryPos pos[1];
76} WordEntryPosVector1;
77
78
79#define WEP_GETWEIGHT(x) ( (x) >> 14 )
80#define WEP_GETPOS(x) ( (x) & 0x3fff )
81
82#define WEP_SETWEIGHT(x,v) ( (x) = ( (v) << 14 ) | ( (x) & 0x3fff ) )
83#define WEP_SETPOS(x,v) ( (x) = ( (x) & 0xc000 ) | ( (v) & 0x3fff ) )
84
85#define MAXENTRYPOS (1<<14)
86#define MAXNUMPOS (256)
87#define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) )
88
89/* This struct represents a complete tsvector datum */
90typedef struct
91{
92 int32 vl_len_; /* varlena header (do not touch directly!) */
93 int32 size;
94 WordEntry entries[FLEXIBLE_ARRAY_MEMBER];
95 /* lexemes follow the entries[] array */
96} TSVectorData;
97
98typedef TSVectorData *TSVector;
99
100#define DATAHDRSIZE (offsetof(TSVectorData, entries))
101#define CALCDATASIZE(nentries, lenstr) (DATAHDRSIZE + (nentries) * sizeof(WordEntry) + (lenstr) )
102
103/* pointer to start of a tsvector's WordEntry array */
104#define ARRPTR(x) ( (x)->entries )
105
106/* pointer to start of a tsvector's lexeme storage */
107#define STRPTR(x) ( (char *) &(x)->entries[(x)->size] )
108
109#define _POSVECPTR(x, e) ((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len)))
110#define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 )
111#define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos)
112
113/*
114 * fmgr interface macros
115 */
116
117#define DatumGetTSVector(X) ((TSVector) PG_DETOAST_DATUM(X))
118#define DatumGetTSVectorCopy(X) ((TSVector) PG_DETOAST_DATUM_COPY(X))
119#define TSVectorGetDatum(X) PointerGetDatum(X)
120#define PG_GETARG_TSVECTOR(n) DatumGetTSVector(PG_GETARG_DATUM(n))
121#define PG_GETARG_TSVECTOR_COPY(n) DatumGetTSVectorCopy(PG_GETARG_DATUM(n))
122#define PG_RETURN_TSVECTOR(x) return TSVectorGetDatum(x)
123
124
125/*
126 * TSQuery
127 *
128 *
129 */
130
131typedef int8 QueryItemType;
132
133/* Valid values for QueryItemType: */
134#define QI_VAL 1
135#define QI_OPR 2
136#define QI_VALSTOP 3 /* This is only used in an intermediate stack
137 * representation in parse_tsquery. It's not a
138 * legal type elsewhere. */
139
140/*
141 * QueryItem is one node in tsquery - operator or operand.
142 */
143typedef struct
144{
145 QueryItemType type; /* operand or kind of operator (ts_tokentype) */
146 uint8 weight; /* weights of operand to search. It's a
147 * bitmask of allowed weights. if it =0 then
148 * any weight are allowed. Weights and bit
149 * map: A: 1<<3 B: 1<<2 C: 1<<1 D: 1<<0 */
150 bool prefix; /* true if it's a prefix search */
151 int32 valcrc; /* XXX: pg_crc32 would be a more appropriate
152 * data type, but we use comparisons to signed
153 * integers in the code. They would need to be
154 * changed as well. */
155
156 /* pointer to text value of operand, must correlate with WordEntry */
157 uint32
158 length:12,
159 distance:20;
160} QueryOperand;
161
162
163/*
164 * Legal values for QueryOperator.operator.
165 */
166#define OP_NOT 1
167#define OP_AND 2
168#define OP_OR 3
169#define OP_PHRASE 4 /* highest code, tsquery_cleanup.c */
170#define OP_COUNT 4
171
172extern const int tsearch_op_priority[OP_COUNT];
173
174/* get operation priority by its code*/
175#define OP_PRIORITY(x) ( tsearch_op_priority[(x) - 1] )
176/* get QueryOperator priority */
177#define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper)
178
179typedef struct
180{
181 QueryItemType type;
182 int8 oper; /* see above */
183 int16 distance; /* distance between agrs for OP_PHRASE */
184 uint32 left; /* pointer to left operand. Right operand is
185 * item + 1, left operand is placed
186 * item+item->left */
187} QueryOperator;
188
189/*
190 * Note: TSQuery is 4-bytes aligned, so make sure there's no fields
191 * inside QueryItem requiring 8-byte alignment, like int64.
192 */
193typedef union
194{
195 QueryItemType type;
196 QueryOperator qoperator;
197 QueryOperand qoperand;
198} QueryItem;
199
200/*
201 * Storage:
202 * (len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings)
203 */
204
205typedef struct
206{
207 int32 vl_len_; /* varlena header (do not touch directly!) */
208 int32 size; /* number of QueryItems */
209 char data[FLEXIBLE_ARRAY_MEMBER]; /* data starts here */
210} TSQueryData;
211
212typedef TSQueryData *TSQuery;
213
214#define HDRSIZETQ ( VARHDRSZ + sizeof(int32) )
215
216/* Computes the size of header and all QueryItems. size is the number of
217 * QueryItems, and lenofoperand is the total length of all operands
218 */
219#define COMPUTESIZE(size, lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
220#define TSQUERY_TOO_BIG(size, lenofoperand) \
221 ((size) > (MaxAllocSize - HDRSIZETQ - (lenofoperand)) / sizeof(QueryItem))
222
223/* Returns a pointer to the first QueryItem in a TSQuery */
224#define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
225
226/* Returns a pointer to the beginning of operands in a TSQuery */
227#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
228
229/*
230 * fmgr interface macros
231 * Note, TSQuery type marked as plain storage, so it can't be toasted
232 * but PG_DETOAST_DATUM_COPY is used for simplicity
233 */
234
235#define DatumGetTSQuery(X) ((TSQuery) DatumGetPointer(X))
236#define DatumGetTSQueryCopy(X) ((TSQuery) PG_DETOAST_DATUM_COPY(X))
237#define TSQueryGetDatum(X) PointerGetDatum(X)
238#define PG_GETARG_TSQUERY(n) DatumGetTSQuery(PG_GETARG_DATUM(n))
239#define PG_GETARG_TSQUERY_COPY(n) DatumGetTSQueryCopy(PG_GETARG_DATUM(n))
240#define PG_RETURN_TSQUERY(x) return TSQueryGetDatum(x)
241
242#endif /* _PG_TSTYPE_H_ */
243