1/*-------------------------------------------------------------------------
2 *
3 * spell.h
4 *
5 * Declarations for ISpell dictionary
6 *
7 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8 *
9 * src/include/tsearch/dicts/spell.h
10 *
11 *-------------------------------------------------------------------------
12 */
13
14#ifndef __SPELL_H__
15#define __SPELL_H__
16
17#include "regex/regex.h"
18#include "tsearch/dicts/regis.h"
19#include "tsearch/ts_public.h"
20
21/*
22 * SPNode and SPNodeData are used to represent prefix tree (Trie) to store
23 * a words list.
24 */
25struct SPNode;
26
27typedef struct
28{
29 uint32 val:8,
30 isword:1,
31 /* Stores compound flags listed below */
32 compoundflag:4,
33 /* Reference to an entry of the AffixData field */
34 affix:19;
35 struct SPNode *node;
36} SPNodeData;
37
38/*
39 * Names of FF_ are correlated with Hunspell options in affix file
40 * http://hunspell.sourceforge.net/
41 */
42#define FF_COMPOUNDONLY 0x01
43#define FF_COMPOUNDBEGIN 0x02
44#define FF_COMPOUNDMIDDLE 0x04
45#define FF_COMPOUNDLAST 0x08
46#define FF_COMPOUNDFLAG ( FF_COMPOUNDBEGIN | FF_COMPOUNDMIDDLE | \
47 FF_COMPOUNDLAST )
48#define FF_COMPOUNDFLAGMASK 0x0f
49
50typedef struct SPNode
51{
52 uint32 length;
53 SPNodeData data[FLEXIBLE_ARRAY_MEMBER];
54} SPNode;
55
56#define SPNHDRSZ (offsetof(SPNode,data))
57
58/*
59 * Represents an entry in a words list.
60 */
61typedef struct spell_struct
62{
63 union
64 {
65 /*
66 * flag is filled in by NIImportDictionary(). After
67 * NISortDictionary(), d is used instead of flag.
68 */
69 char *flag;
70 /* d is used in mkSPNode() */
71 struct
72 {
73 /* Reference to an entry of the AffixData field */
74 int affix;
75 /* Length of the word */
76 int len;
77 } d;
78 } p;
79 char word[FLEXIBLE_ARRAY_MEMBER];
80} SPELL;
81
82#define SPELLHDRSZ (offsetof(SPELL, word))
83
84/*
85 * Represents an entry in an affix list.
86 */
87typedef struct aff_struct
88{
89 char *flag;
90 /* FF_SUFFIX or FF_PREFIX */
91 uint32 type:1,
92 flagflags:7,
93 issimple:1,
94 isregis:1,
95 replen:14;
96 char *find;
97 char *repl;
98 union
99 {
100 regex_t regex;
101 Regis regis;
102 } reg;
103} AFFIX;
104
105/*
106 * affixes use dictionary flags too
107 */
108#define FF_COMPOUNDPERMITFLAG 0x10
109#define FF_COMPOUNDFORBIDFLAG 0x20
110#define FF_CROSSPRODUCT 0x40
111
112/*
113 * Don't change the order of these. Initialization sorts by these,
114 * and expects prefixes to come first after sorting.
115 */
116#define FF_SUFFIX 1
117#define FF_PREFIX 0
118
119/*
120 * AffixNode and AffixNodeData are used to represent prefix tree (Trie) to store
121 * an affix list.
122 */
123struct AffixNode;
124
125typedef struct
126{
127 uint32 val:8,
128 naff:24;
129 AFFIX **aff;
130 struct AffixNode *node;
131} AffixNodeData;
132
133typedef struct AffixNode
134{
135 uint32 isvoid:1,
136 length:31;
137 AffixNodeData data[FLEXIBLE_ARRAY_MEMBER];
138} AffixNode;
139
140#define ANHRDSZ (offsetof(AffixNode, data))
141
142typedef struct
143{
144 char *affix;
145 int len;
146 bool issuffix;
147} CMPDAffix;
148
149/*
150 * Type of encoding affix flags in Hunspell dictionaries
151 */
152typedef enum
153{
154 FM_CHAR, /* one character (like ispell) */
155 FM_LONG, /* two characters */
156 FM_NUM /* number, >= 0 and < 65536 */
157} FlagMode;
158
159/*
160 * Structure to store Hunspell options. Flag representation depends on flag
161 * type. These flags are about support of compound words.
162 */
163typedef struct CompoundAffixFlag
164{
165 union
166 {
167 /* Flag name if flagMode is FM_CHAR or FM_LONG */
168 char *s;
169 /* Flag name if flagMode is FM_NUM */
170 uint32 i;
171 } flag;
172 /* we don't have a bsearch_arg version, so, copy FlagMode */
173 FlagMode flagMode;
174 uint32 value;
175} CompoundAffixFlag;
176
177#define FLAGNUM_MAXSIZE (1 << 16)
178
179typedef struct
180{
181 int maffixes;
182 int naffixes;
183 AFFIX *Affix;
184
185 AffixNode *Suffix;
186 AffixNode *Prefix;
187
188 SPNode *Dictionary;
189 /* Array of sets of affixes */
190 char **AffixData;
191 int lenAffixData;
192 int nAffixData;
193 bool useFlagAliases;
194
195 CMPDAffix *CompoundAffix;
196
197 bool usecompound;
198 FlagMode flagMode;
199
200 /*
201 * All follow fields are actually needed only for initialization
202 */
203
204 /* Array of Hunspell options in affix file */
205 CompoundAffixFlag *CompoundAffixFlags;
206 /* number of entries in CompoundAffixFlags array */
207 int nCompoundAffixFlag;
208 /* allocated length of CompoundAffixFlags array */
209 int mCompoundAffixFlag;
210
211 /*
212 * Remaining fields are only used during dictionary construction; they are
213 * set up by NIStartBuild and cleared by NIFinishBuild.
214 */
215 MemoryContext buildCxt; /* temp context for construction */
216
217 /* Temporary array of all words in the dict file */
218 SPELL **Spell;
219 int nspell; /* number of valid entries in Spell array */
220 int mspell; /* allocated length of Spell array */
221
222 /* These are used to allocate "compact" data without palloc overhead */
223 char *firstfree; /* first free address (always maxaligned) */
224 size_t avail; /* free space remaining at firstfree */
225} IspellDict;
226
227extern TSLexeme *NINormalizeWord(IspellDict *Conf, char *word);
228
229extern void NIStartBuild(IspellDict *Conf);
230extern void NIImportAffixes(IspellDict *Conf, const char *filename);
231extern void NIImportDictionary(IspellDict *Conf, const char *filename);
232extern void NISortDictionary(IspellDict *Conf);
233extern void NISortAffixes(IspellDict *Conf);
234extern void NIFinishBuild(IspellDict *Conf);
235
236#endif
237