| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * ts_type.h |
| 4 | * Definitions for the tsvector and tsquery types |
| 5 | * |
| 6 | * Copyright (c) 1998-2019, PostgreSQL Global Development Group |
| 7 | * |
| 8 | * src/include/tsearch/ts_type.h |
| 9 | * |
| 10 | *------------------------------------------------------------------------- |
| 11 | */ |
| 12 | #ifndef _PG_TSTYPE_H_ |
| 13 | #define _PG_TSTYPE_H_ |
| 14 | |
| 15 | #include "fmgr.h" |
| 16 | #include "utils/memutils.h" |
| 17 | |
| 18 | |
| 19 | /* |
| 20 | * TSVector type. |
| 21 | * |
| 22 | * Structure of tsvector datatype: |
| 23 | * 1) standard varlena header |
| 24 | * 2) int32 size - number of lexemes (WordEntry array entries) |
| 25 | * 3) Array of WordEntry - one per lexeme; must be sorted according to |
| 26 | * tsCompareString() (ie, memcmp of lexeme strings). |
| 27 | * WordEntry->pos gives the number of bytes from end of WordEntry |
| 28 | * array to start of lexeme's string, which is of length len. |
| 29 | * 4) Per-lexeme data storage: |
| 30 | * lexeme string (not null-terminated) |
| 31 | * if haspos is true: |
| 32 | * padding byte if necessary to make the position data 2-byte aligned |
| 33 | * uint16 number of positions that follow |
| 34 | * WordEntryPos[] positions |
| 35 | * |
| 36 | * The positions for each lexeme must be sorted. |
| 37 | * |
| 38 | * Note, tsvectorsend/recv believe that sizeof(WordEntry) == 4 |
| 39 | */ |
| 40 | |
| 41 | typedef struct |
| 42 | { |
| 43 | uint32 |
| 44 | haspos:1, |
| 45 | len:11, /* MAX 2Kb */ |
| 46 | pos:20; /* MAX 1Mb */ |
| 47 | } WordEntry; |
| 48 | |
| 49 | #define MAXSTRLEN ( (1<<11) - 1) |
| 50 | #define MAXSTRPOS ( (1<<20) - 1) |
| 51 | |
| 52 | extern int compareWordEntryPos(const void *a, const void *b); |
| 53 | |
| 54 | /* |
| 55 | * Equivalent to |
| 56 | * typedef struct { |
| 57 | * uint16 |
| 58 | * weight:2, |
| 59 | * pos:14; |
| 60 | * } |
| 61 | */ |
| 62 | |
| 63 | typedef uint16 WordEntryPos; |
| 64 | |
| 65 | typedef struct |
| 66 | { |
| 67 | uint16 npos; |
| 68 | WordEntryPos pos[FLEXIBLE_ARRAY_MEMBER]; |
| 69 | } WordEntryPosVector; |
| 70 | |
| 71 | /* WordEntryPosVector with exactly 1 entry */ |
| 72 | typedef struct |
| 73 | { |
| 74 | uint16 npos; |
| 75 | WordEntryPos pos[1]; |
| 76 | } WordEntryPosVector1; |
| 77 | |
| 78 | |
| 79 | #define WEP_GETWEIGHT(x) ( (x) >> 14 ) |
| 80 | #define WEP_GETPOS(x) ( (x) & 0x3fff ) |
| 81 | |
| 82 | #define WEP_SETWEIGHT(x,v) ( (x) = ( (v) << 14 ) | ( (x) & 0x3fff ) ) |
| 83 | #define WEP_SETPOS(x,v) ( (x) = ( (x) & 0xc000 ) | ( (v) & 0x3fff ) ) |
| 84 | |
| 85 | #define MAXENTRYPOS (1<<14) |
| 86 | #define MAXNUMPOS (256) |
| 87 | #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) ) |
| 88 | |
| 89 | /* This struct represents a complete tsvector datum */ |
| 90 | typedef struct |
| 91 | { |
| 92 | int32 vl_len_; /* varlena header (do not touch directly!) */ |
| 93 | int32 size; |
| 94 | WordEntry entries[FLEXIBLE_ARRAY_MEMBER]; |
| 95 | /* lexemes follow the entries[] array */ |
| 96 | } TSVectorData; |
| 97 | |
| 98 | typedef TSVectorData *TSVector; |
| 99 | |
| 100 | #define DATAHDRSIZE (offsetof(TSVectorData, entries)) |
| 101 | #define CALCDATASIZE(nentries, lenstr) (DATAHDRSIZE + (nentries) * sizeof(WordEntry) + (lenstr) ) |
| 102 | |
| 103 | /* pointer to start of a tsvector's WordEntry array */ |
| 104 | #define ARRPTR(x) ( (x)->entries ) |
| 105 | |
| 106 | /* pointer to start of a tsvector's lexeme storage */ |
| 107 | #define STRPTR(x) ( (char *) &(x)->entries[(x)->size] ) |
| 108 | |
| 109 | #define _POSVECPTR(x, e) ((WordEntryPosVector *)(STRPTR(x) + SHORTALIGN((e)->pos + (e)->len))) |
| 110 | #define POSDATALEN(x,e) ( ( (e)->haspos ) ? (_POSVECPTR(x,e)->npos) : 0 ) |
| 111 | #define POSDATAPTR(x,e) (_POSVECPTR(x,e)->pos) |
| 112 | |
| 113 | /* |
| 114 | * fmgr interface macros |
| 115 | */ |
| 116 | |
| 117 | #define DatumGetTSVector(X) ((TSVector) PG_DETOAST_DATUM(X)) |
| 118 | #define DatumGetTSVectorCopy(X) ((TSVector) PG_DETOAST_DATUM_COPY(X)) |
| 119 | #define TSVectorGetDatum(X) PointerGetDatum(X) |
| 120 | #define PG_GETARG_TSVECTOR(n) DatumGetTSVector(PG_GETARG_DATUM(n)) |
| 121 | #define PG_GETARG_TSVECTOR_COPY(n) DatumGetTSVectorCopy(PG_GETARG_DATUM(n)) |
| 122 | #define PG_RETURN_TSVECTOR(x) return TSVectorGetDatum(x) |
| 123 | |
| 124 | |
| 125 | /* |
| 126 | * TSQuery |
| 127 | * |
| 128 | * |
| 129 | */ |
| 130 | |
| 131 | typedef int8 QueryItemType; |
| 132 | |
| 133 | /* Valid values for QueryItemType: */ |
| 134 | #define QI_VAL 1 |
| 135 | #define QI_OPR 2 |
| 136 | #define QI_VALSTOP 3 /* This is only used in an intermediate stack |
| 137 | * representation in parse_tsquery. It's not a |
| 138 | * legal type elsewhere. */ |
| 139 | |
| 140 | /* |
| 141 | * QueryItem is one node in tsquery - operator or operand. |
| 142 | */ |
| 143 | typedef struct |
| 144 | { |
| 145 | QueryItemType type; /* operand or kind of operator (ts_tokentype) */ |
| 146 | uint8 weight; /* weights of operand to search. It's a |
| 147 | * bitmask of allowed weights. if it =0 then |
| 148 | * any weight are allowed. Weights and bit |
| 149 | * map: A: 1<<3 B: 1<<2 C: 1<<1 D: 1<<0 */ |
| 150 | bool prefix; /* true if it's a prefix search */ |
| 151 | int32 valcrc; /* XXX: pg_crc32 would be a more appropriate |
| 152 | * data type, but we use comparisons to signed |
| 153 | * integers in the code. They would need to be |
| 154 | * changed as well. */ |
| 155 | |
| 156 | /* pointer to text value of operand, must correlate with WordEntry */ |
| 157 | uint32 |
| 158 | length:12, |
| 159 | distance:20; |
| 160 | } QueryOperand; |
| 161 | |
| 162 | |
| 163 | /* |
| 164 | * Legal values for QueryOperator.operator. |
| 165 | */ |
| 166 | #define OP_NOT 1 |
| 167 | #define OP_AND 2 |
| 168 | #define OP_OR 3 |
| 169 | #define OP_PHRASE 4 /* highest code, tsquery_cleanup.c */ |
| 170 | #define OP_COUNT 4 |
| 171 | |
| 172 | extern const int tsearch_op_priority[OP_COUNT]; |
| 173 | |
| 174 | /* get operation priority by its code*/ |
| 175 | #define OP_PRIORITY(x) ( tsearch_op_priority[(x) - 1] ) |
| 176 | /* get QueryOperator priority */ |
| 177 | #define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper) |
| 178 | |
| 179 | typedef struct |
| 180 | { |
| 181 | QueryItemType type; |
| 182 | int8 oper; /* see above */ |
| 183 | int16 distance; /* distance between agrs for OP_PHRASE */ |
| 184 | uint32 left; /* pointer to left operand. Right operand is |
| 185 | * item + 1, left operand is placed |
| 186 | * item+item->left */ |
| 187 | } QueryOperator; |
| 188 | |
| 189 | /* |
| 190 | * Note: TSQuery is 4-bytes aligned, so make sure there's no fields |
| 191 | * inside QueryItem requiring 8-byte alignment, like int64. |
| 192 | */ |
| 193 | typedef union |
| 194 | { |
| 195 | QueryItemType type; |
| 196 | QueryOperator qoperator; |
| 197 | QueryOperand qoperand; |
| 198 | } QueryItem; |
| 199 | |
| 200 | /* |
| 201 | * Storage: |
| 202 | * (len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings) |
| 203 | */ |
| 204 | |
| 205 | typedef struct |
| 206 | { |
| 207 | int32 vl_len_; /* varlena header (do not touch directly!) */ |
| 208 | int32 size; /* number of QueryItems */ |
| 209 | char data[FLEXIBLE_ARRAY_MEMBER]; /* data starts here */ |
| 210 | } TSQueryData; |
| 211 | |
| 212 | typedef TSQueryData *TSQuery; |
| 213 | |
| 214 | #define HDRSIZETQ ( VARHDRSZ + sizeof(int32) ) |
| 215 | |
| 216 | /* Computes the size of header and all QueryItems. size is the number of |
| 217 | * QueryItems, and lenofoperand is the total length of all operands |
| 218 | */ |
| 219 | #define COMPUTESIZE(size, lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) ) |
| 220 | #define TSQUERY_TOO_BIG(size, lenofoperand) \ |
| 221 | ((size) > (MaxAllocSize - HDRSIZETQ - (lenofoperand)) / sizeof(QueryItem)) |
| 222 | |
| 223 | /* Returns a pointer to the first QueryItem in a TSQuery */ |
| 224 | #define GETQUERY(x) ((QueryItem*)( (char*)(x)+HDRSIZETQ )) |
| 225 | |
| 226 | /* Returns a pointer to the beginning of operands in a TSQuery */ |
| 227 | #define GETOPERAND(x) ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) ) |
| 228 | |
| 229 | /* |
| 230 | * fmgr interface macros |
| 231 | * Note, TSQuery type marked as plain storage, so it can't be toasted |
| 232 | * but PG_DETOAST_DATUM_COPY is used for simplicity |
| 233 | */ |
| 234 | |
| 235 | #define DatumGetTSQuery(X) ((TSQuery) DatumGetPointer(X)) |
| 236 | #define DatumGetTSQueryCopy(X) ((TSQuery) PG_DETOAST_DATUM_COPY(X)) |
| 237 | #define TSQueryGetDatum(X) PointerGetDatum(X) |
| 238 | #define PG_GETARG_TSQUERY(n) DatumGetTSQuery(PG_GETARG_DATUM(n)) |
| 239 | #define PG_GETARG_TSQUERY_COPY(n) DatumGetTSQueryCopy(PG_GETARG_DATUM(n)) |
| 240 | #define PG_RETURN_TSQUERY(x) return TSQueryGetDatum(x) |
| 241 | |
| 242 | #endif /* _PG_TSTYPE_H_ */ |
| 243 | |