1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * spell.h |
4 | * |
5 | * Declarations for ISpell dictionary |
6 | * |
7 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
8 | * |
9 | * src/include/tsearch/dicts/spell.h |
10 | * |
11 | *------------------------------------------------------------------------- |
12 | */ |
13 | |
14 | #ifndef __SPELL_H__ |
15 | #define __SPELL_H__ |
16 | |
17 | #include "regex/regex.h" |
18 | #include "tsearch/dicts/regis.h" |
19 | #include "tsearch/ts_public.h" |
20 | |
21 | /* |
22 | * SPNode and SPNodeData are used to represent prefix tree (Trie) to store |
23 | * a words list. |
24 | */ |
25 | struct SPNode; |
26 | |
27 | typedef struct |
28 | { |
29 | uint32 val:8, |
30 | isword:1, |
31 | /* Stores compound flags listed below */ |
32 | compoundflag:4, |
33 | /* Reference to an entry of the AffixData field */ |
34 | affix:19; |
35 | struct SPNode *node; |
36 | } SPNodeData; |
37 | |
38 | /* |
39 | * Names of FF_ are correlated with Hunspell options in affix file |
40 | * http://hunspell.sourceforge.net/ |
41 | */ |
42 | #define FF_COMPOUNDONLY 0x01 |
43 | #define FF_COMPOUNDBEGIN 0x02 |
44 | #define FF_COMPOUNDMIDDLE 0x04 |
45 | #define FF_COMPOUNDLAST 0x08 |
46 | #define FF_COMPOUNDFLAG ( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | \ |
47 | FF_COMPOUNDLAST ) |
48 | #define FF_COMPOUNDFLAGMASK 0x0f |
49 | |
50 | typedef struct SPNode |
51 | { |
52 | uint32 length; |
53 | SPNodeData data[FLEXIBLE_ARRAY_MEMBER]; |
54 | } SPNode; |
55 | |
56 | #define SPNHDRSZ (offsetof(SPNode,data)) |
57 | |
58 | /* |
59 | * Represents an entry in a words list. |
60 | */ |
61 | typedef struct spell_struct |
62 | { |
63 | union |
64 | { |
65 | /* |
66 | * flag is filled in by NIImportDictionary(). After |
67 | * NISortDictionary(), d is used instead of flag. |
68 | */ |
69 | char *flag; |
70 | /* d is used in mkSPNode() */ |
71 | struct |
72 | { |
73 | /* Reference to an entry of the AffixData field */ |
74 | int affix; |
75 | /* Length of the word */ |
76 | int len; |
77 | } d; |
78 | } p; |
79 | char word[FLEXIBLE_ARRAY_MEMBER]; |
80 | } SPELL; |
81 | |
82 | #define SPELLHDRSZ (offsetof(SPELL, word)) |
83 | |
84 | /* |
85 | * Represents an entry in an affix list. |
86 | */ |
87 | typedef struct aff_struct |
88 | { |
89 | char *flag; |
90 | /* FF_SUFFIX or FF_PREFIX */ |
91 | uint32 type:1, |
92 | flagflags:7, |
93 | issimple:1, |
94 | isregis:1, |
95 | replen:14; |
96 | char *find; |
97 | char *repl; |
98 | union |
99 | { |
100 | regex_t regex; |
101 | Regis regis; |
102 | } reg; |
103 | } AFFIX; |
104 | |
105 | /* |
106 | * affixes use dictionary flags too |
107 | */ |
108 | #define FF_COMPOUNDPERMITFLAG 0x10 |
109 | #define FF_COMPOUNDFORBIDFLAG 0x20 |
110 | #define FF_CROSSPRODUCT 0x40 |
111 | |
112 | /* |
113 | * Don't change the order of these. Initialization sorts by these, |
114 | * and expects prefixes to come first after sorting. |
115 | */ |
116 | #define FF_SUFFIX 1 |
117 | #define FF_PREFIX 0 |
118 | |
119 | /* |
120 | * AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store |
121 | * an affix list. |
122 | */ |
123 | struct AffixNode; |
124 | |
125 | typedef struct |
126 | { |
127 | uint32 val:8, |
128 | naff:24; |
129 | AFFIX **aff; |
130 | struct AffixNode *node; |
131 | } AffixNodeData; |
132 | |
133 | typedef struct AffixNode |
134 | { |
135 | uint32 isvoid:1, |
136 | length:31; |
137 | AffixNodeData data[FLEXIBLE_ARRAY_MEMBER]; |
138 | } AffixNode; |
139 | |
140 | #define ANHRDSZ (offsetof(AffixNode, data)) |
141 | |
142 | typedef struct |
143 | { |
144 | char *affix; |
145 | int len; |
146 | bool issuffix; |
147 | } CMPDAffix; |
148 | |
149 | /* |
150 | * Type of encoding affix flags in Hunspell dictionaries |
151 | */ |
152 | typedef enum |
153 | { |
154 | FM_CHAR, /* one character (like ispell) */ |
155 | FM_LONG, /* two characters */ |
156 | FM_NUM /* number, >= 0 and < 65536 */ |
157 | } FlagMode; |
158 | |
159 | /* |
160 | * Structure to store Hunspell options. Flag representation depends on flag |
161 | * type. These flags are about support of compound words. |
162 | */ |
163 | typedef struct CompoundAffixFlag |
164 | { |
165 | union |
166 | { |
167 | /* Flag name if flagMode is FM_CHAR or FM_LONG */ |
168 | char *s; |
169 | /* Flag name if flagMode is FM_NUM */ |
170 | uint32 i; |
171 | } flag; |
172 | /* we don't have a bsearch_arg version, so, copy FlagMode */ |
173 | FlagMode flagMode; |
174 | uint32 value; |
175 | } CompoundAffixFlag; |
176 | |
177 | #define FLAGNUM_MAXSIZE (1 << 16) |
178 | |
179 | typedef struct |
180 | { |
181 | int maffixes; |
182 | int naffixes; |
183 | AFFIX *Affix; |
184 | |
185 | AffixNode *Suffix; |
186 | AffixNode *Prefix; |
187 | |
188 | SPNode *Dictionary; |
189 | /* Array of sets of affixes */ |
190 | char **AffixData; |
191 | int lenAffixData; |
192 | int nAffixData; |
193 | bool useFlagAliases; |
194 | |
195 | CMPDAffix *CompoundAffix; |
196 | |
197 | bool usecompound; |
198 | FlagMode flagMode; |
199 | |
200 | /* |
201 | * All follow fields are actually needed only for initialization |
202 | */ |
203 | |
204 | /* Array of Hunspell options in affix file */ |
205 | CompoundAffixFlag *CompoundAffixFlags; |
206 | /* number of entries in CompoundAffixFlags array */ |
207 | int nCompoundAffixFlag; |
208 | /* allocated length of CompoundAffixFlags array */ |
209 | int mCompoundAffixFlag; |
210 | |
211 | /* |
212 | * Remaining fields are only used during dictionary construction; they are |
213 | * set up by NIStartBuild and cleared by NIFinishBuild. |
214 | */ |
215 | MemoryContext buildCxt; /* temp context for construction */ |
216 | |
217 | /* Temporary array of all words in the dict file */ |
218 | SPELL **Spell; |
219 | int nspell; /* number of valid entries in Spell array */ |
220 | int mspell; /* allocated length of Spell array */ |
221 | |
222 | /* These are used to allocate "compact" data without palloc overhead */ |
223 | char *firstfree; /* first free address (always maxaligned) */ |
224 | size_t avail; /* free space remaining at firstfree */ |
225 | } IspellDict; |
226 | |
227 | extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word); |
228 | |
229 | extern void NIStartBuild(IspellDict *Conf); |
230 | extern void NIImportAffixes(IspellDict *Conf, const char *filename); |
231 | extern void NIImportDictionary(IspellDict *Conf, const char *filename); |
232 | extern void NISortDictionary(IspellDict *Conf); |
233 | extern void NISortAffixes(IspellDict *Conf); |
234 | extern void NIFinishBuild(IspellDict *Conf); |
235 | |
236 | #endif |
237 | |