spell.c source code [PostgreSQL/src/backend/tsearch/spell.c]

1	/-------------------------------------------------------------------------*
2	*
3	* spell.c
4	* Normalizing word with ISpell
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	*
8	* Ispell dictionary
9	* -----------------
10	*
11	* Rules of dictionaries are defined in two files with .affix and .dict
12	* extensions. They are used by spell checker programs Ispell and Hunspell.
13	*
14	* An .affix file declares morphological rules to get a basic form of words.
15	* The format of an .affix file has different structure for Ispell and Hunspell
16	* dictionaries. The Hunspell format is more complicated. But when an .affix
17	* file is imported and compiled, it is stored in the same structure AffixNode.
18	*
19	* A .dict file stores a list of basic forms of words with references to
20	* affix rules. The format of a .dict file has the same structure for Ispell
21	* and Hunspell dictionaries.
22	*
23	* Compilation of a dictionary
24	* ---------------------------
25	*
26	* A compiled dictionary is stored in the IspellDict structure. Compilation of
27	* a dictionary is divided into the several steps:
28	* - NIImportDictionary() - stores each word of a .dict file in the
29	* temporary Spell field.
30	* - NIImportAffixes() - stores affix rules of an .affix file in the
31	* Affix field (not temporary) if an .affix file has the Ispell format.
32	* -> NIImportOOAffixes() - stores affix rules if an .affix file has the
33	* Hunspell format. The AffixData field is initialized if AF parameter
34	* is defined.
35	* - NISortDictionary() - builds a prefix tree (Trie) from the words list
36	* and stores it in the Dictionary field. The words list is got from the
37	* Spell field. The AffixData field is initialized if AF parameter is not
38	* defined.
39	* - NISortAffixes():
40	* - builds a list of compound affixes from the affix list and stores it
41	* in the CompoundAffix.
42	* - builds prefix trees (Trie) from the affix list for prefixes and suffixes
43	* and stores them in Suffix and Prefix fields.
44	* The affix list is got from the Affix field.
45	*
46	* Memory management
47	* -----------------
48	*
49	* The IspellDict structure has the Spell field which is used only in compile
50	* time. The Spell field stores a words list. It can take a lot of memory.
51	* Therefore when a dictionary is compiled this field is cleared by
52	* NIFinishBuild().
53	*
54	* All resources which should cleared by NIFinishBuild() is initialized using
55	* tmpalloc() and tmpalloc0().
56	*
57	* IDENTIFICATION
58	* src/backend/tsearch/spell.c
59	*
60	*-------------------------------------------------------------------------
61	*/
62
63	#include "postgres.h"
64
65	#include "catalog/pg_collation.h"
66	#include "tsearch/dicts/spell.h"
67	#include "tsearch/ts_locale.h"
68	#include "utils/memutils.h"
69
70
71	/*
72	* Initialization requires a lot of memory that's not needed
73	* after the initialization is done. During initialization,
74	* CurrentMemoryContext is the long-lived memory context associated
75	* with the dictionary cache entry. We keep the short-lived stuff
76	* in the Conf->buildCxt context.
77	*/
78	#define tmpalloc(sz) MemoryContextAlloc(Conf->buildCxt, (sz))
79	#define tmpalloc0(sz) MemoryContextAllocZero(Conf->buildCxt, (sz))
80
81	/*
82	* Prepare for constructing an ISpell dictionary.
83	*
84	* The IspellDict struct is assumed to be zeroed when allocated.
85	*/
86	void
87	NIStartBuild(IspellDict *Conf)
88	{
89	/*
90	* The temp context is a child of CurTransactionContext, so that it will
91	* go away automatically on error.
92	*/
93	Conf->buildCxt = AllocSetContextCreate(CurTransactionContext,
94	"Ispell dictionary init context",
95	ALLOCSET_DEFAULT_SIZES);
96	}
97
98	/*
99	* Clean up when dictionary construction is complete.
100	*/
101	void
102	NIFinishBuild(IspellDict *Conf)
103	{
104	/ Release no-longer-needed temp memory /
105	MemoryContextDelete(Conf->buildCxt);
106	/ Just for cleanliness, zero the now-dangling pointers /
107	Conf->buildCxt = NULL;
108	Conf->Spell = NULL;
109	Conf->firstfree = NULL;
110	Conf->CompoundAffixFlags = NULL;
111	}
112
113
114	/*
115	* "Compact" palloc: allocate without extra palloc overhead.
116	*
117	* Since we have no need to free the ispell data items individually, there's
118	* not much value in the per-chunk overhead normally consumed by palloc.
119	* Getting rid of it is helpful since ispell can allocate a lot of small nodes.
120	*
121	* We currently pre-zero all data allocated this way, even though some of it
122	* doesn't need that. The cpalloc and cpalloc0 macros are just documentation
123	* to indicate which allocations actually require zeroing.
124	*/
125	#define COMPACT_ALLOC_CHUNK 8192 /* amount to get from palloc at once */
126	#define COMPACT_MAX_REQ 1024 /* must be < COMPACT_ALLOC_CHUNK */
127
128	static void *
129	compact_palloc0(IspellDict *Conf, size_t size)
130	{
131	void *result;
132
133	/ Should only be called during init /
134	Assert(Conf->buildCxt != NULL);
135
136	/ No point in this for large chunks /
137	if (size > COMPACT_MAX_REQ)
138	return palloc0(size);
139
140	/ Keep everything maxaligned /
141	size = MAXALIGN(size);
142
143	/ Need more space? /
144	if (size > Conf->avail)
145	{
146	Conf->firstfree = palloc0(COMPACT_ALLOC_CHUNK);
147	Conf->avail = COMPACT_ALLOC_CHUNK;
148	}
149
150	result = (void *) Conf->firstfree;
151	Conf->firstfree += size;
152	Conf->avail -= size;
153
154	return result;
155	}
156
157	#define cpalloc(size) compact_palloc0(Conf, size)
158	#define cpalloc0(size) compact_palloc0(Conf, size)
159
160	static char *
161	cpstrdup(IspellDict Conf, const* char *str)
162	{
163	char *res = cpalloc(strlen(str) + `1`);
164
165	strcpy(res, str);
166	return res;
167	}
168
169
170	/*
171	* Apply lowerstr(), producing a temporary result (in the buildCxt).
172	*/
173	static char *
174	lowerstr_ctx(IspellDict Conf, const* char *src)
175	{
176	MemoryContext saveCtx;
177	char *dst;
178
179	saveCtx = MemoryContextSwitchTo(Conf->buildCxt);
180	dst = lowerstr(src);
181	MemoryContextSwitchTo(saveCtx);
182
183	return dst;
184	}
185
186	#define MAX_NORM 1024
187	#define MAXNORMLEN 256
188
189	#define STRNCMP(s,p) strncmp( (s), (p), strlen(p) )
190	#define GETWCHAR(W,L,N,T) ( ((const uint8*)(W))[ ((T)==FF_PREFIX) ? (N) : ( (L) - 1 - (N) ) ] )
191	#define GETCHAR(A,N,T) GETWCHAR( (A)->repl, (A)->replen, N, T )
192
193	static char *VoidString = "";
194
195	static int
196	cmpspell(const void s1, const* void *s2)
197	{
198	return strcmp(((SPELL const ) s1)->word, ((SPELL *const *) s2)->word);
199	}
200
201	static int
202	cmpspellaffix(const void s1, const* void *s2)
203	{
204	return strcmp(((SPELL const *) s1)->p.flag,
205	((SPELL const *) s2)->p.flag);
206	}
207
208	static int
209	cmpcmdflag(const void f1, const* void *f2)
210	{
211	CompoundAffixFlag fv1 = (CompoundAffixFlag ) f1,
212	fv2 = (CompoundAffixFlag ) f2;
213
214	Assert(fv1->flagMode == fv2->flagMode);
215
216	if (fv1->flagMode == FM_NUM)
217	{
218	if (fv1->flag.i == fv2->flag.i)
219	return `0`;
220
221	return (fv1->flag.i > fv2->flag.i) ? `1` : -`1`;
222	}
223
224	return strcmp(fv1->flag.s, fv2->flag.s);
225	}
226
227	static char *
228	findchar(char str, int* c)
229	{
230	while (*str)
231	{
232	if (t_iseq(str, c))
233	return str;
234	str += pg_mblen(str);
235	}
236
237	return NULL;
238	}
239
240	static char *
241	findchar2(char str, int* c1, int c2)
242	{
243	while (*str)
244	{
245	if (t_iseq(str, c1) \|\| t_iseq(str, c2))
246	return str;
247	str += pg_mblen(str);
248	}
249
250	return NULL;
251	}
252
253
254	/ backward string compare for suffix tree operations /
255	static int
256	strbcmp(const unsigned char s1, const* unsigned char *s2)
257	{
258	int l1 = strlen((const char *) s1) - `1`,
259	l2 = strlen((const char *) s2) - `1`;
260
261	while (l1 >= `0` && l2 >= `0`)
262	{
263	if (s1[l1] < s2[l2])
264	return -`1`;
265	if (s1[l1] > s2[l2])
266	return `1`;
267	l1--;
268	l2--;
269	}
270	if (l1 < l2)
271	return -`1`;
272	if (l1 > l2)
273	return `1`;
274
275	return `0`;
276	}
277
278	static int
279	strbncmp(const unsigned char s1, const* unsigned char *s2, size_t count)
280	{
281	int l1 = strlen((const char *) s1) - `1`,
282	l2 = strlen((const char *) s2) - `1`,
283	l = count;
284
285	while (l1 >= `0` && l2 >= `0` && l > `0`)
286	{
287	if (s1[l1] < s2[l2])
288	return -`1`;
289	if (s1[l1] > s2[l2])
290	return `1`;
291	l1--;
292	l2--;
293	l--;
294	}
295	if (l == `0`)
296	return `0`;
297	if (l1 < l2)
298	return -`1`;
299	if (l1 > l2)
300	return `1`;
301	return `0`;
302	}
303
304	/*
305	* Compares affixes.
306	* First compares the type of an affix. Prefixes should go before affixes.
307	* If types are equal then compares replaceable string.
308	*/
309	static int
310	cmpaffix(const void s1, const* void *s2)
311	{
312	const AFFIX a1 = (const* AFFIX *) s1;
313	const AFFIX a2 = (const* AFFIX *) s2;
314
315	if (a1->type < a2->type)
316	return -`1`;
317	if (a1->type > a2->type)
318	return `1`;
319	if (a1->type == FF_PREFIX)
320	return strcmp(a1->repl, a2->repl);
321	else
322	return strbcmp((const unsigned char *) a1->repl,
323	(const unsigned char *) a2->repl);
324	}
325
326	/*
327	* Gets an affix flag from the set of affix flags (sflagset).
328	*
329	* Several flags can be stored in a single string. Flags can be represented by:
330	* - 1 character (FM_CHAR). A character may be Unicode.
331	* - 2 characters (FM_LONG). A character may be Unicode.
332	* - numbers from 1 to 65000 (FM_NUM).
333	*
334	* Depending on the flagMode an affix string can have the following format:
335	* - FM_CHAR: ABCD
336	* Here we have 4 flags: A, B, C and D
337	* - FM_LONG: ABCDE*
338	* Here we have 3 flags: AB, CD and E*
339	* - FM_NUM: 200,205,50
340	* Here we have 3 flags: 200, 205 and 50
341	*
342	* Conf: current dictionary.
343	* sflagset: the set of affix flags. Returns a reference to the start of a next
344	* affix flag.
345	* sflag: returns an affix flag from sflagset.
346	*/
347	static void
348	getNextFlagFromString(IspellDict Conf, char* *sflagset, char* *sflag)
349	{
350	int32 s;
351	char *next,
352	sbuf = sflagset;
353	int maxstep;
354	bool stop = false;
355	bool met_comma = false;
356
357	maxstep = (Conf->flagMode == FM_LONG) ? `2` : `1`;
358
359	while (**sflagset)
360	{
361	switch (Conf->flagMode)
362	{
363	case FM_LONG:
364	case FM_CHAR:
365	COPYCHAR(sflag, *sflagset);
366	sflag += pg_mblen(*sflagset);
367
368	/ Go to start of the next flag /
369	sflagset += pg_mblen(sflagset);
370
371	/ Check if we get all characters of flag /
372	maxstep--;
373	stop = (maxstep == `0`);
374	break;
375	case FM_NUM:
376	s = strtol(*sflagset, &next, `10`);
377	if (*sflagset == next \|\| errno == ERANGE)
378	ereport(ERROR,
379	(errcode(ERRCODE_CONFIG_FILE_ERROR),
380	errmsg("invalid affix flag \"%s\"", *sflagset)));
381	if (s < `0` \|\| s > FLAGNUM_MAXSIZE)
382	ereport(ERROR,
383	(errcode(ERRCODE_CONFIG_FILE_ERROR),
384	errmsg("affix flag \"%s\" is out of range",
385	*sflagset)));
386	sflag += sprintf(sflag, "%0d", s);
387
388	/ Go to start of the next flag /
389	*sflagset = next;
390	while (**sflagset)
391	{
392	if (t_isdigit(*sflagset))
393	{
394	if (!met_comma)
395	ereport(ERROR,
396	(errcode(ERRCODE_CONFIG_FILE_ERROR),
397	errmsg("invalid affix flag \"%s\"",
398	*sflagset)));
399	break;
400	}
401	else if (t_iseq(*sflagset, `','`))
402	{
403	if (met_comma)
404	ereport(ERROR,
405	(errcode(ERRCODE_CONFIG_FILE_ERROR),
406	errmsg("invalid affix flag \"%s\"",
407	*sflagset)));
408	met_comma = true;
409	}
410	else if (!t_isspace(*sflagset))
411	{
412	ereport(ERROR,
413	(errcode(ERRCODE_CONFIG_FILE_ERROR),
414	errmsg("invalid character in affix flag \"%s\"",
415	*sflagset)));
416	}
417
418	sflagset += pg_mblen(sflagset);
419	}
420	stop = true;
421	break;
422	default:
423	elog(ERROR, "unrecognized type of Conf->flagMode: %d",
424	Conf->flagMode);
425	}
426
427	if (stop)
428	break;
429	}
430
431	if (Conf->flagMode == FM_LONG && maxstep > `0`)
432	ereport(ERROR,
433	(errcode(ERRCODE_CONFIG_FILE_ERROR),
434	errmsg("invalid affix flag \"%s\" with \"long\" flag value",
435	sbuf)));
436
437	*sflag = `'\0'`;
438	}
439
440	/*
441	* Checks if the affix set Conf->AffixData[affix] contains affixflag.
442	* Conf->AffixData[affix] does not contain affixflag if this flag is not used
443	* actually by the .dict file.
444	*
445	* Conf: current dictionary.
446	* affix: index of the Conf->AffixData array.
447	* affixflag: the affix flag.
448	*
449	* Returns true if the string Conf->AffixData[affix] contains affixflag,
450	* otherwise returns false.
451	*/
452	static bool
453	IsAffixFlagInUse(IspellDict Conf, int* affix, const char *affixflag)
454	{
455	char *flagcur;
456	char flag[BUFSIZ];
457
458	if (*affixflag == `0`)
459	return true;
460
461	flagcur = Conf->AffixData[affix];
462
463	while (*flagcur)
464	{
465	getNextFlagFromString(Conf, &flagcur, flag);
466	/ Compare first affix flag in flagcur with affixflag /
467	if (strcmp(flag, affixflag) == `0`)
468	return true;
469	}
470
471	/ Could not find affixflag /
472	return false;
473	}
474
475	/*
476	* Adds the new word into the temporary array Spell.
477	*
478	* Conf: current dictionary.
479	* word: new word.
480	* flag: set of affix flags. Single flag can be get by getNextFlagFromString().
481	*/
482	static void
483	NIAddSpell(IspellDict Conf, const* char word, const* char *flag)
484	{
485	if (Conf->nspell >= Conf->mspell)
486	{
487	if (Conf->mspell)
488	{
489	Conf->mspell *= `2`;
490	Conf->Spell = (SPELL *) repalloc(Conf->Spell, Conf->mspell sizeof(SPELL *));
491	}
492	else
493	{
494	Conf->mspell = `1024` * `20`;
495	Conf->Spell = (SPELL *) tmpalloc(Conf->mspell sizeof(SPELL *));
496	}
497	}
498	Conf->Spell[Conf->nspell] = (SPELL *) tmpalloc(SPELLHDRSZ + strlen(word) + `1`);
499	strcpy(Conf->Spell[Conf->nspell]->word, word);
500	Conf->Spell[Conf->nspell]->p.flag = (*flag != `'\0'`)
501	? cpstrdup(Conf, flag) : VoidString;
502	Conf->nspell++;
503	}
504
505	/*
506	* Imports dictionary into the temporary array Spell.
507	*
508	* Note caller must already have applied get_tsearch_config_filename.
509	*
510	* Conf: current dictionary.
511	* filename: path to the .dict file.
512	*/
513	void
514	NIImportDictionary(IspellDict Conf, const* char *filename)
515	{
516	tsearch_readline_state trst;
517	char *line;
518
519	if (!tsearch_readline_begin(&trst, filename))
520	ereport(ERROR,
521	(errcode(ERRCODE_CONFIG_FILE_ERROR),
522	errmsg("could not open dictionary file \"%s\": %m",
523	filename)));
524
525	while ((line = tsearch_readline(&trst)) != NULL)
526	{
527	char *s,
528	*pstr;
529
530	/ Set of affix flags /
531	const char *flag;
532
533	/ Extract flag from the line /
534	flag = NULL;
535	if ((s = findchar(line, `'/'`)))
536	{
537	*s++ = `'\0'`;
538	flag = s;
539	while (*s)
540	{
541	/ we allow only single encoded flags for faster works /
542	if (pg_mblen(s) == `1` && t_isprint(s) && !t_isspace(s))
543	s++;
544	else
545	{
546	*s = `'\0'`;
547	break;
548	}
549	}
550	}
551	else
552	flag = "";
553
554	/ Remove trailing spaces /
555	s = line;
556	while (*s)
557	{
558	if (t_isspace(s))
559	{
560	*s = `'\0'`;
561	break;
562	}
563	s += pg_mblen(s);
564	}
565	pstr = lowerstr_ctx(Conf, line);
566
567	NIAddSpell(Conf, pstr, flag);
568	pfree(pstr);
569
570	pfree(line);
571	}
572	tsearch_readline_end(&trst);
573	}
574
575	/*
576	* Searches a basic form of word in the prefix tree. This word was generated
577	* using an affix rule. This rule may not be presented in an affix set of
578	* a basic form of word.
579	*
580	* For example, we have the entry in the .dict file:
581	* meter/GMD
582	*
583	* The affix rule with the flag S:
584	* SFX S y ies [^aeiou]y
585	* is not presented here.
586	*
587	* The affix rule with the flag M:
588	* SFX M 0 's .
589	* is presented here.
590	*
591	* Conf: current dictionary.
592	* word: basic form of word.
593	* affixflag: affix flag, by which a basic form of word was generated.
594	* flag: compound flag used to compare with StopMiddle->compoundflag.
595	*
596	* Returns 1 if the word was found in the prefix tree, else returns 0.
597	*/
598	static int
599	FindWord(IspellDict Conf, const* char word, const* char affixflag, int* flag)
600	{
601	SPNode *node = Conf->Dictionary;
602	SPNodeData *StopLow,
603	*StopHigh,
604	*StopMiddle;
605	const uint8 ptr = (const* uint8 *) word;
606
607	flag &= FF_COMPOUNDFLAGMASK;
608
609	while (node && *ptr)
610	{
611	StopLow = node->data;
612	StopHigh = node->data + node->length;
613	while (StopLow < StopHigh)
614	{
615	StopMiddle = StopLow + ((StopHigh - StopLow) >> `1`);
616	if (StopMiddle->val == *ptr)
617	{
618	if (*(ptr + `1`) == `'\0'` && StopMiddle->isword)
619	{
620	if (flag == `0`)
621	{
622	/*
623	* The word can be formed only with another word. And
624	* in the flag parameter there is not a sign that we
625	* search compound words.
626	*/
627	if (StopMiddle->compoundflag & FF_COMPOUNDONLY)
628	return `0`;
629	}
630	else if ((flag & StopMiddle->compoundflag) == `0`)
631	return `0`;
632
633	/*
634	* Check if this affix rule is presented in the affix set
635	* with index StopMiddle->affix.
636	*/
637	if (IsAffixFlagInUse(Conf, StopMiddle->affix, affixflag))
638	return `1`;
639	}
640	node = StopMiddle->node;
641	ptr++;
642	break;
643	}
644	else if (StopMiddle->val < *ptr)
645	StopLow = StopMiddle + `1`;
646	else
647	StopHigh = StopMiddle;
648	}
649	if (StopLow >= StopHigh)
650	break;
651	}
652	return `0`;
653	}
654
655	/*
656	* Adds a new affix rule to the Affix field.
657	*
658	* Conf: current dictionary.
659	* flag: affix flag ('\' in the below example).
660	* flagflags: set of flags from the flagval field for this affix rule. This set
661	* is listed after '/' character in the added string (repl).
662	*
663	* For example L flag in the hunspell_sample.affix:
664	* SFX \ 0 Y/L [^Y]
665	*
666	* mask: condition for search ('[^Y]' in the above example).
667	* find: stripping characters from beginning (at prefix) or end (at suffix)
668	* of the word ('0' in the above example, 0 means that there is not
669	* stripping character).
670	* repl: adding string after stripping ('Y' in the above example).
671	* type: FF_SUFFIX or FF_PREFIX.
672	*/
673	static void
674	NIAddAffix(IspellDict Conf, const* char flag, char* flagflags, const char *mask,
675	const char find, const* char repl, int* type)
676	{
677	AFFIX *Affix;
678
679	if (Conf->naffixes >= Conf->maffixes)
680	{
681	if (Conf->maffixes)
682	{
683	Conf->maffixes *= `2`;
684	Conf->Affix = (AFFIX ) repalloc((void* ) Conf->Affix, Conf->maffixes sizeof(AFFIX));
685	}
686	else
687	{
688	Conf->maffixes = `16`;
689	Conf->Affix = (AFFIX ) palloc(Conf->maffixes sizeof(AFFIX));
690	}
691	}
692
693	Affix = Conf->Affix + Conf->naffixes;
694
695	/ This affix rule can be applied for words with any ending /
696	if (strcmp(mask, ".") == `0` \|\| *mask == `'\0'`)
697	{
698	Affix->issimple = `1`;
699	Affix->isregis = `0`;
700	}
701	/ This affix rule will use regis to search word ending /
702	else if (RS_isRegis(mask))
703	{
704	Affix->issimple = `0`;
705	Affix->isregis = `1`;
706	RS_compile(&(Affix->reg.regis), (type == FF_SUFFIX),
707	*mask ? mask : VoidString);
708	}
709	/ This affix rule will use regex_t to search word ending /
710	else
711	{
712	int masklen;
713	int wmasklen;
714	int err;
715	pg_wchar *wmask;
716	char *tmask;
717
718	Affix->issimple = `0`;
719	Affix->isregis = `0`;
720	tmask = (char *) tmpalloc(strlen(mask) + `3`);
721	if (type == FF_SUFFIX)
722	sprintf(tmask, "%s$", mask);
723	else
724	sprintf(tmask, "^%s", mask);
725
726	masklen = strlen(tmask);
727	wmask = (pg_wchar ) tmpalloc((masklen + `1`) sizeof(pg_wchar));
728	wmasklen = pg_mb2wchar_with_len(tmask, wmask, masklen);
729
730	err = pg_regcomp(&(Affix->reg.regex), wmask, wmasklen,
731	REG_ADVANCED \| REG_NOSUB,
732	DEFAULT_COLLATION_OID);
733	if (err)
734	{
735	char errstr[`100`];
736
737	pg_regerror(err, &(Affix->reg.regex), errstr, sizeof(errstr));
738	ereport(ERROR,
739	(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
740	errmsg("invalid regular expression: %s", errstr)));
741	}
742	}
743
744	Affix->flagflags = flagflags;
745	if ((Affix->flagflags & FF_COMPOUNDONLY) \|\| (Affix->flagflags & FF_COMPOUNDPERMITFLAG))
746	{
747	if ((Affix->flagflags & FF_COMPOUNDFLAG) == `0`)
748	Affix->flagflags \|= FF_COMPOUNDFLAG;
749	}
750	Affix->flag = cpstrdup(Conf, flag);
751	Affix->type = type;
752
753	Affix->find = (find && *find) ? cpstrdup(Conf, find) : VoidString;
754	if ((Affix->replen = strlen(repl)) > `0`)
755	Affix->repl = cpstrdup(Conf, repl);
756	else
757	Affix->repl = VoidString;
758	Conf->naffixes++;
759	}
760
761	/ Parsing states for parse_affentry() and friends /
762	#define PAE_WAIT_MASK 0
763	#define PAE_INMASK 1
764	#define PAE_WAIT_FIND 2
765	#define PAE_INFIND 3
766	#define PAE_WAIT_REPL 4
767	#define PAE_INREPL 5
768	#define PAE_WAIT_TYPE 6
769	#define PAE_WAIT_FLAG 7
770
771	/*
772	* Parse next space-separated field of an .affix file line.
773	*
774	* *str is the input pointer (will be advanced past field)
775	* next is where to copy the field value to, with null termination
776	*
777	* The buffer at "next" must be of size BUFSIZ; we truncate the input to fit.
778	*
779	* Returns true if we found a field, false if not.
780	*/
781	static bool
782	get_nextfield(char *str, char* *next)
783	{
784	int state = PAE_WAIT_MASK;
785	int avail = BUFSIZ;
786
787	while (**str)
788	{
789	if (state == PAE_WAIT_MASK)
790	{
791	if (t_iseq(*str, `'#'`))
792	return false;
793	else if (!t_isspace(*str))
794	{
795	int clen = pg_mblen(*str);
796
797	if (clen < avail)
798	{
799	COPYCHAR(next, *str);
800	next += clen;
801	avail -= clen;
802	}
803	state = PAE_INMASK;
804	}
805	}
806	else / state == PAE_INMASK /
807	{
808	if (t_isspace(*str))
809	{
810	*next = `'\0'`;
811	return true;
812	}
813	else
814	{
815	int clen = pg_mblen(*str);
816
817	if (clen < avail)
818	{
819	COPYCHAR(next, *str);
820	next += clen;
821	avail -= clen;
822	}
823	}
824	}
825	str += pg_mblen(str);
826	}
827
828	*next = `'\0'`;
829
830	return (state == PAE_INMASK); / OK if we got a nonempty field /
831	}
832
833	/*
834	* Parses entry of an .affix file of MySpell or Hunspell format.
835	*
836	* An .affix file entry has the following format:
837	* - header
838	* <type> <flag> <cross_flag> <flag_count>
839	* - fields after header:
840	* <type> <flag> <find> <replace> <mask>
841	*
842	* str is the input line
843	* field values are returned to type etc, which must be buffers of size BUFSIZ.
844	*
845	* Returns number of fields found; any omitted fields are set to empty strings.
846	*/
847	static int
848	parse_ooaffentry(char str, char* type, char* flag, char* *find,
849	char repl, char* *mask)
850	{
851	int state = PAE_WAIT_TYPE;
852	int fields_read = `0`;
853	bool valid = false;
854
855	type = flag = find = repl = *mask = `'\0'`;
856
857	while (*str)
858	{
859	switch (state)
860	{
861	case PAE_WAIT_TYPE:
862	valid = get_nextfield(&str, type);
863	state = PAE_WAIT_FLAG;
864	break;
865	case PAE_WAIT_FLAG:
866	valid = get_nextfield(&str, flag);
867	state = PAE_WAIT_FIND;
868	break;
869	case PAE_WAIT_FIND:
870	valid = get_nextfield(&str, find);
871	state = PAE_WAIT_REPL;
872	break;
873	case PAE_WAIT_REPL:
874	valid = get_nextfield(&str, repl);
875	state = PAE_WAIT_MASK;
876	break;
877	case PAE_WAIT_MASK:
878	valid = get_nextfield(&str, mask);
879	state = -`1`; / force loop exit /
880	break;
881	default:
882	elog(ERROR, "unrecognized state in parse_ooaffentry: %d",
883	state);
884	break;
885	}
886	if (valid)
887	fields_read++;
888	else
889	break; / early EOL /
890	if (state < `0`)
891	break; / got all fields /
892	}
893
894	return fields_read;
895	}
896
897	/*
898	* Parses entry of an .affix file of Ispell format
899	*
900	* An .affix file entry has the following format:
901	* <mask> > [-<find>,]<replace>
902	*/
903	static bool
904	parse_affentry(char str, char* mask, char* find, char* *repl)
905	{
906	int state = PAE_WAIT_MASK;
907	char *pmask = mask,
908	*pfind = find,
909	*prepl = repl;
910
911	mask = find = *repl = `'\0'`;
912
913	while (*str)
914	{
915	if (state == PAE_WAIT_MASK)
916	{
917	if (t_iseq(str, `'#'`))
918	return false;
919	else if (!t_isspace(str))
920	{
921	COPYCHAR(pmask, str);
922	pmask += pg_mblen(str);
923	state = PAE_INMASK;
924	}
925	}
926	else if (state == PAE_INMASK)
927	{
928	if (t_iseq(str, `'>'`))
929	{
930	*pmask = `'\0'`;
931	state = PAE_WAIT_FIND;
932	}
933	else if (!t_isspace(str))
934	{
935	COPYCHAR(pmask, str);
936	pmask += pg_mblen(str);
937	}
938	}
939	else if (state == PAE_WAIT_FIND)
940	{
941	if (t_iseq(str, `'-'`))
942	{
943	state = PAE_INFIND;
944	}
945	else if (t_isalpha(str) \|\| t_iseq(str, `'\''`) / english 's / )
946	{
947	COPYCHAR(prepl, str);
948	prepl += pg_mblen(str);
949	state = PAE_INREPL;
950	}
951	else if (!t_isspace(str))
952	ereport(ERROR,
953	(errcode(ERRCODE_CONFIG_FILE_ERROR),
954	errmsg("syntax error")));
955	}
956	else if (state == PAE_INFIND)
957	{
958	if (t_iseq(str, `','`))
959	{
960	*pfind = `'\0'`;
961	state = PAE_WAIT_REPL;
962	}
963	else if (t_isalpha(str))
964	{
965	COPYCHAR(pfind, str);
966	pfind += pg_mblen(str);
967	}
968	else if (!t_isspace(str))
969	ereport(ERROR,
970	(errcode(ERRCODE_CONFIG_FILE_ERROR),
971	errmsg("syntax error")));
972	}
973	else if (state == PAE_WAIT_REPL)
974	{
975	if (t_iseq(str, `'-'`))
976	{
977	break; / void repl /
978	}
979	else if (t_isalpha(str))
980	{
981	COPYCHAR(prepl, str);
982	prepl += pg_mblen(str);
983	state = PAE_INREPL;
984	}
985	else if (!t_isspace(str))
986	ereport(ERROR,
987	(errcode(ERRCODE_CONFIG_FILE_ERROR),
988	errmsg("syntax error")));
989	}
990	else if (state == PAE_INREPL)
991	{
992	if (t_iseq(str, `'#'`))
993	{
994	*prepl = `'\0'`;
995	break;
996	}
997	else if (t_isalpha(str))
998	{
999	COPYCHAR(prepl, str);
1000	prepl += pg_mblen(str);
1001	}
1002	else if (!t_isspace(str))
1003	ereport(ERROR,
1004	(errcode(ERRCODE_CONFIG_FILE_ERROR),
1005	errmsg("syntax error")));
1006	}
1007	else
1008	elog(ERROR, "unrecognized state in parse_affentry: %d", state);
1009
1010	str += pg_mblen(str);
1011	}
1012
1013	pmask = pfind = *prepl = `'\0'`;
1014
1015	return (mask && (find \|\| *repl));
1016	}
1017
1018	/*
1019	* Sets a Hunspell options depending on flag type.
1020	*/
1021	static void
1022	setCompoundAffixFlagValue(IspellDict Conf, CompoundAffixFlag entry,
1023	char *s, uint32 val)
1024	{
1025	if (Conf->flagMode == FM_NUM)
1026	{
1027	char *next;
1028	int i;
1029
1030	i = strtol(s, &next, `10`);
1031	if (s == next \|\| errno == ERANGE)
1032	ereport(ERROR,
1033	(errcode(ERRCODE_CONFIG_FILE_ERROR),
1034	errmsg("invalid affix flag \"%s\"", s)));
1035	if (i < `0` \|\| i > FLAGNUM_MAXSIZE)
1036	ereport(ERROR,
1037	(errcode(ERRCODE_CONFIG_FILE_ERROR),
1038	errmsg("affix flag \"%s\" is out of range", s)));
1039
1040	entry->flag.i = i;
1041	}
1042	else
1043	entry->flag.s = cpstrdup(Conf, s);
1044
1045	entry->flagMode = Conf->flagMode;
1046	entry->value = val;
1047	}
1048
1049	/*
1050	* Sets up a correspondence for the affix parameter with the affix flag.
1051	*
1052	* Conf: current dictionary.
1053	* s: affix flag in string.
1054	* val: affix parameter.
1055	*/
1056	static void
1057	addCompoundAffixFlagValue(IspellDict Conf, char* *s, uint32 val)
1058	{
1059	CompoundAffixFlag *newValue;
1060	char sbuf[BUFSIZ];
1061	char *sflag;
1062	int clen;
1063
1064	while (*s && t_isspace(s))
1065	s += pg_mblen(s);
1066
1067	if (!*s)
1068	ereport(ERROR,
1069	(errcode(ERRCODE_CONFIG_FILE_ERROR),
1070	errmsg("syntax error")));
1071
1072	/ Get flag without \n /
1073	sflag = sbuf;
1074	while (s && !t_isspace(s) && s != `'\n'`)
1075	{
1076	clen = pg_mblen(s);
1077	COPYCHAR(sflag, s);
1078	sflag += clen;
1079	s += clen;
1080	}
1081	*sflag = `'\0'`;
1082
1083	/ Resize array or allocate memory for array CompoundAffixFlag /
1084	if (Conf->nCompoundAffixFlag >= Conf->mCompoundAffixFlag)
1085	{
1086	if (Conf->mCompoundAffixFlag)
1087	{
1088	Conf->mCompoundAffixFlag *= `2`;
1089	Conf->CompoundAffixFlags = (CompoundAffixFlag *)
1090	repalloc((void *) Conf->CompoundAffixFlags,
1091	Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
1092	}
1093	else
1094	{
1095	Conf->mCompoundAffixFlag = `10`;
1096	Conf->CompoundAffixFlags = (CompoundAffixFlag *)
1097	tmpalloc(Conf->mCompoundAffixFlag * sizeof(CompoundAffixFlag));
1098	}
1099	}
1100
1101	newValue = Conf->CompoundAffixFlags + Conf->nCompoundAffixFlag;
1102
1103	setCompoundAffixFlagValue(Conf, newValue, sbuf, val);
1104
1105	Conf->usecompound = true;
1106	Conf->nCompoundAffixFlag++;
1107	}
1108
1109	/*
1110	* Returns a set of affix parameters which correspondence to the set of affix
1111	* flags s.
1112	*/
1113	static int
1114	getCompoundAffixFlagValue(IspellDict Conf, char* *s)
1115	{
1116	uint32 flag = `0`;
1117	CompoundAffixFlag *found,
1118	key;
1119	char sflag[BUFSIZ];
1120	char *flagcur;
1121
1122	if (Conf->nCompoundAffixFlag == `0`)
1123	return `0`;
1124
1125	flagcur = s;
1126	while (*flagcur)
1127	{
1128	getNextFlagFromString(Conf, &flagcur, sflag);
1129	setCompoundAffixFlagValue(Conf, &key, sflag, `0`);
1130
1131	found = (CompoundAffixFlag *)
1132	bsearch(&key, (void *) Conf->CompoundAffixFlags,
1133	Conf->nCompoundAffixFlag, sizeof(CompoundAffixFlag),
1134	cmpcmdflag);
1135	if (found != NULL)
1136	flag \|= found->value;
1137	}
1138
1139	return flag;
1140	}
1141
1142	/*
1143	* Returns a flag set using the s parameter.
1144	*
1145	* If Conf->useFlagAliases is true then the s parameter is index of the
1146	* Conf->AffixData array and function returns its entry.
1147	* Else function returns the s parameter.
1148	*/
1149	static char *
1150	getAffixFlagSet(IspellDict Conf, char* *s)
1151	{
1152	if (Conf->useFlagAliases && *s != `'\0'`)
1153	{
1154	int curaffix;
1155	char *end;
1156
1157	curaffix = strtol(s, &end, `10`);
1158	if (s == end \|\| errno == ERANGE)
1159	ereport(ERROR,
1160	(errcode(ERRCODE_CONFIG_FILE_ERROR),
1161	errmsg("invalid affix alias \"%s\"", s)));
1162
1163	if (curaffix > `0` && curaffix <= Conf->nAffixData)
1164
1165	/*
1166	* Do not subtract 1 from curaffix because empty string was added
1167	* in NIImportOOAffixes
1168	*/
1169	return Conf->AffixData[curaffix];
1170	else
1171	return VoidString;
1172	}
1173	else
1174	return s;
1175	}
1176
1177	/*
1178	* Import an affix file that follows MySpell or Hunspell format.
1179	*
1180	* Conf: current dictionary.
1181	* filename: path to the .affix file.
1182	*/
1183	static void
1184	NIImportOOAffixes(IspellDict Conf, const* char *filename)
1185	{
1186	char type[BUFSIZ],
1187	*ptype = NULL;
1188	char sflag[BUFSIZ];
1189	char mask[BUFSIZ],
1190	*pmask;
1191	char find[BUFSIZ],
1192	*pfind;
1193	char repl[BUFSIZ],
1194	*prepl;
1195	bool isSuffix = false;
1196	int naffix = `0`,
1197	curaffix = `0`;
1198	int sflaglen = `0`;
1199	char flagflags = `0`;
1200	tsearch_readline_state trst;
1201	char *recoded;
1202
1203	/ read file to find any flag /
1204	Conf->usecompound = false;
1205	Conf->useFlagAliases = false;
1206	Conf->flagMode = FM_CHAR;
1207
1208	if (!tsearch_readline_begin(&trst, filename))
1209	ereport(ERROR,
1210	(errcode(ERRCODE_CONFIG_FILE_ERROR),
1211	errmsg("could not open affix file \"%s\": %m",
1212	filename)));
1213
1214	while ((recoded = tsearch_readline(&trst)) != NULL)
1215	{
1216	if (*recoded == `'\0'` \|\| t_isspace(recoded) \|\| t_iseq(recoded, `'#'`))
1217	{
1218	pfree(recoded);
1219	continue;
1220	}
1221
1222	if (STRNCMP(recoded, "COMPOUNDFLAG") == `0`)
1223	addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDFLAG"),
1224	FF_COMPOUNDFLAG);
1225	else if (STRNCMP(recoded, "COMPOUNDBEGIN") == `0`)
1226	addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDBEGIN"),
1227	FF_COMPOUNDBEGIN);
1228	else if (STRNCMP(recoded, "COMPOUNDLAST") == `0`)
1229	addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDLAST"),
1230	FF_COMPOUNDLAST);
1231	/ COMPOUNDLAST and COMPOUNDEND are synonyms /
1232	else if (STRNCMP(recoded, "COMPOUNDEND") == `0`)
1233	addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDEND"),
1234	FF_COMPOUNDLAST);
1235	else if (STRNCMP(recoded, "COMPOUNDMIDDLE") == `0`)
1236	addCompoundAffixFlagValue(Conf, recoded + strlen("COMPOUNDMIDDLE"),
1237	FF_COMPOUNDMIDDLE);
1238	else if (STRNCMP(recoded, "ONLYINCOMPOUND") == `0`)
1239	addCompoundAffixFlagValue(Conf, recoded + strlen("ONLYINCOMPOUND"),
1240	FF_COMPOUNDONLY);
1241	else if (STRNCMP(recoded, "COMPOUNDPERMITFLAG") == `0`)
1242	addCompoundAffixFlagValue(Conf,
1243	recoded + strlen("COMPOUNDPERMITFLAG"),
1244	FF_COMPOUNDPERMITFLAG);
1245	else if (STRNCMP(recoded, "COMPOUNDFORBIDFLAG") == `0`)
1246	addCompoundAffixFlagValue(Conf,
1247	recoded + strlen("COMPOUNDFORBIDFLAG"),
1248	FF_COMPOUNDFORBIDFLAG);
1249	else if (STRNCMP(recoded, "FLAG") == `0`)
1250	{
1251	char *s = recoded + strlen("FLAG");
1252
1253	while (*s && t_isspace(s))
1254	s += pg_mblen(s);
1255
1256	if (*s)
1257	{
1258	if (STRNCMP(s, "long") == `0`)
1259	Conf->flagMode = FM_LONG;
1260	else if (STRNCMP(s, "num") == `0`)
1261	Conf->flagMode = FM_NUM;
1262	else if (STRNCMP(s, "default") != `0`)
1263	ereport(ERROR,
1264	(errcode(ERRCODE_CONFIG_FILE_ERROR),
1265	errmsg("Ispell dictionary supports only "
1266	"\"default\", \"long\", "
1267	"and \"num\" flag values")));
1268	}
1269	}
1270
1271	pfree(recoded);
1272	}
1273	tsearch_readline_end(&trst);
1274
1275	if (Conf->nCompoundAffixFlag > `1`)
1276	qsort((void *) Conf->CompoundAffixFlags, Conf->nCompoundAffixFlag,
1277	sizeof(CompoundAffixFlag), cmpcmdflag);
1278
1279	if (!tsearch_readline_begin(&trst, filename))
1280	ereport(ERROR,
1281	(errcode(ERRCODE_CONFIG_FILE_ERROR),
1282	errmsg("could not open affix file \"%s\": %m",
1283	filename)));
1284
1285	while ((recoded = tsearch_readline(&trst)) != NULL)
1286	{
1287	int fields_read;
1288
1289	if (*recoded == `'\0'` \|\| t_isspace(recoded) \|\| t_iseq(recoded, `'#'`))
1290	goto nextline;
1291
1292	fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask);
1293
1294	if (ptype)
1295	pfree(ptype);
1296	ptype = lowerstr_ctx(Conf, type);
1297
1298	/ First try to parse AF parameter (alias compression) /
1299	if (STRNCMP(ptype, "af") == `0`)
1300	{
1301	/ First line is the number of aliases /
1302	if (!Conf->useFlagAliases)
1303	{
1304	Conf->useFlagAliases = true;
1305	naffix = atoi(sflag);
1306	if (naffix <= `0`)
1307	ereport(ERROR,
1308	(errcode(ERRCODE_CONFIG_FILE_ERROR),
1309	errmsg("invalid number of flag vector aliases")));
1310
1311	/ Also reserve place for empty flag set /
1312	naffix++;
1313
1314	Conf->AffixData = (char *) palloc0(naffix sizeof(char *));
1315	Conf->lenAffixData = Conf->nAffixData = naffix;
1316
1317	/ Add empty flag set into AffixData /
1318	Conf->AffixData[curaffix] = VoidString;
1319	curaffix++;
1320	}
1321	/ Other lines are aliases /
1322	else
1323	{
1324	if (curaffix < naffix)
1325	{
1326	Conf->AffixData[curaffix] = cpstrdup(Conf, sflag);
1327	curaffix++;
1328	}
1329	else
1330	ereport(ERROR,
1331	(errcode(ERRCODE_CONFIG_FILE_ERROR),
1332	errmsg("number of aliases exceeds specified number %d",
1333	naffix - `1`)));
1334	}
1335	goto nextline;
1336	}
1337	/ Else try to parse prefixes and suffixes /
1338	if (fields_read < `4` \|\|
1339	(STRNCMP(ptype, "sfx") != `0` && STRNCMP(ptype, "pfx") != `0`))
1340	goto nextline;
1341
1342	sflaglen = strlen(sflag);
1343	if (sflaglen == `0`
1344	\|\| (sflaglen > `1` && Conf->flagMode == FM_CHAR)
1345	\|\| (sflaglen > `2` && Conf->flagMode == FM_LONG))
1346	goto nextline;
1347
1348	/--------*
1349	* Affix header. For example:
1350	* SFX \ N 1
1351	*--------
1352	*/
1353	if (fields_read == `4`)
1354	{
1355	isSuffix = (STRNCMP(ptype, "sfx") == `0`);
1356	if (t_iseq(find, `'y'`) \|\| t_iseq(find, `'Y'`))
1357	flagflags = FF_CROSSPRODUCT;
1358	else
1359	flagflags = `0`;
1360	}
1361	/--------*
1362	* Affix fields. For example:
1363	* SFX \ 0 Y/L [^Y]
1364	*--------
1365	*/
1366	else
1367	{
1368	char *ptr;
1369	int aflg = `0`;
1370
1371	/ Get flags after '/' (flags are case sensitive) /
1372	if ((ptr = strchr(repl, `'/'`)) != NULL)
1373	aflg \|= getCompoundAffixFlagValue(Conf,
1374	getAffixFlagSet(Conf,
1375	ptr + `1`));
1376	/ Get lowercased version of string before '/' /
1377	prepl = lowerstr_ctx(Conf, repl);
1378	if ((ptr = strchr(prepl, `'/'`)) != NULL)
1379	*ptr = `'\0'`;
1380	pfind = lowerstr_ctx(Conf, find);
1381	pmask = lowerstr_ctx(Conf, mask);
1382	if (t_iseq(find, `'0'`))
1383	*pfind = `'\0'`;
1384	if (t_iseq(repl, `'0'`))
1385	*prepl = `'\0'`;
1386
1387	NIAddAffix(Conf, sflag, flagflags \| aflg, pmask, pfind, prepl,
1388	isSuffix ? FF_SUFFIX : FF_PREFIX);
1389	pfree(prepl);
1390	pfree(pfind);
1391	pfree(pmask);
1392	}
1393
1394	nextline:
1395	pfree(recoded);
1396	}
1397
1398	tsearch_readline_end(&trst);
1399	if (ptype)
1400	pfree(ptype);
1401	}
1402
1403	/*
1404	* import affixes
1405	*
1406	* Note caller must already have applied get_tsearch_config_filename
1407	*
1408	* This function is responsible for parsing ispell ("old format") affix files.
1409	* If we realize that the file contains new-format commands, we pass off the
1410	* work to NIImportOOAffixes(), which will re-read the whole file.
1411	*/
1412	void
1413	NIImportAffixes(IspellDict Conf, const* char *filename)
1414	{
1415	char *pstr = NULL;
1416	char flag[BUFSIZ];
1417	char mask[BUFSIZ];
1418	char find[BUFSIZ];
1419	char repl[BUFSIZ];
1420	char *s;
1421	bool suffixes = false;
1422	bool prefixes = false;
1423	char flagflags = `0`;
1424	tsearch_readline_state trst;
1425	bool oldformat = false;
1426	char *recoded = NULL;
1427
1428	if (!tsearch_readline_begin(&trst, filename))
1429	ereport(ERROR,
1430	(errcode(ERRCODE_CONFIG_FILE_ERROR),
1431	errmsg("could not open affix file \"%s\": %m",
1432	filename)));
1433
1434	Conf->usecompound = false;
1435	Conf->useFlagAliases = false;
1436	Conf->flagMode = FM_CHAR;
1437
1438	while ((recoded = tsearch_readline(&trst)) != NULL)
1439	{
1440	pstr = lowerstr(recoded);
1441
1442	/ Skip comments and empty lines /
1443	if (pstr == `'#'` \|\| pstr == `'\n'`)
1444	goto nextline;
1445
1446	if (STRNCMP(pstr, "compoundwords") == `0`)
1447	{
1448	/ Find case-insensitive L flag in non-lowercased string /
1449	s = findchar2(recoded, `'l'`, `'L'`);
1450	if (s)
1451	{
1452	while (*s && !t_isspace(s))
1453	s += pg_mblen(s);
1454	while (*s && t_isspace(s))
1455	s += pg_mblen(s);
1456
1457	if (*s && pg_mblen(s) == `1`)
1458	{
1459	addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG);
1460	Conf->usecompound = true;
1461	}
1462	oldformat = true;
1463	goto nextline;
1464	}
1465	}
1466	if (STRNCMP(pstr, "suffixes") == `0`)
1467	{
1468	suffixes = true;
1469	prefixes = false;
1470	oldformat = true;
1471	goto nextline;
1472	}
1473	if (STRNCMP(pstr, "prefixes") == `0`)
1474	{
1475	suffixes = false;
1476	prefixes = true;
1477	oldformat = true;
1478	goto nextline;
1479	}
1480	if (STRNCMP(pstr, "flag") == `0`)
1481	{
1482	s = recoded + `4`; / we need non-lowercased string /
1483	flagflags = `0`;
1484
1485	while (*s && t_isspace(s))
1486	s += pg_mblen(s);
1487
1488	if (s == `''`)
1489	{
1490	flagflags \|= FF_CROSSPRODUCT;
1491	s++;
1492	}
1493	else if (*s == `'~'`)
1494	{
1495	flagflags \|= FF_COMPOUNDONLY;
1496	s++;
1497	}
1498
1499	if (*s == `'\\'`)
1500	s++;
1501
1502	/*
1503	* An old-format flag is a single ASCII character; we expect it to
1504	* be followed by EOL, whitespace, or ':'. Otherwise this is a
1505	* new-format flag command.
1506	*/
1507	if (*s && pg_mblen(s) == `1`)
1508	{
1509	COPYCHAR(flag, s);
1510	flag[`1`] = `'\0'`;
1511
1512	s++;
1513	if (s == `'\0'` \|\| s == `'#'` \|\| s == `'\n'` \|\| s == `':'` \|\|
1514	t_isspace(s))
1515	{
1516	oldformat = true;
1517	goto nextline;
1518	}
1519	}
1520	goto isnewformat;
1521	}
1522	if (STRNCMP(recoded, "COMPOUNDFLAG") == `0` \|\|
1523	STRNCMP(recoded, "COMPOUNDMIN") == `0` \|\|
1524	STRNCMP(recoded, "PFX") == `0` \|\|
1525	STRNCMP(recoded, "SFX") == `0`)
1526	goto isnewformat;
1527
1528	if ((!suffixes) && (!prefixes))
1529	goto nextline;
1530
1531	if (!parse_affentry(pstr, mask, find, repl))
1532	goto nextline;
1533
1534	NIAddAffix(Conf, flag, flagflags, mask, find, repl, suffixes ? FF_SUFFIX : FF_PREFIX);
1535
1536	nextline:
1537	pfree(recoded);
1538	pfree(pstr);
1539	}
1540	tsearch_readline_end(&trst);
1541	return;
1542
1543	isnewformat:
1544	if (oldformat)
1545	ereport(ERROR,
1546	(errcode(ERRCODE_CONFIG_FILE_ERROR),
1547	errmsg("affix file contains both old-style and new-style commands")));
1548	tsearch_readline_end(&trst);
1549
1550	NIImportOOAffixes(Conf, filename);
1551	}
1552
1553	/*
1554	* Merges two affix flag sets and stores a new affix flag set into
1555	* Conf->AffixData.
1556	*
1557	* Returns index of a new affix flag set.
1558	*/
1559	static int
1560	MergeAffix(IspellDict Conf, int* a1, int a2)
1561	{
1562	char **ptr;
1563
1564	/ Do not merge affix flags if one of affix flags is empty /
1565	if (*Conf->AffixData[a1] == `'\0'`)
1566	return a2;
1567	else if (*Conf->AffixData[a2] == `'\0'`)
1568	return a1;
1569
1570	while (Conf->nAffixData + `1` >= Conf->lenAffixData)
1571	{
1572	Conf->lenAffixData *= `2`;
1573	Conf->AffixData = (char **) repalloc(Conf->AffixData,
1574	sizeof(char ) Conf->lenAffixData);
1575	}
1576
1577	ptr = Conf->AffixData + Conf->nAffixData;
1578	if (Conf->flagMode == FM_NUM)
1579	{
1580	*ptr = cpalloc(strlen(Conf->AffixData[a1]) +
1581	strlen(Conf->AffixData[a2]) +
1582	`1` / comma / + `1` / \0 / );
1583	sprintf(*ptr, "%s,%s", Conf->AffixData[a1], Conf->AffixData[a2]);
1584	}
1585	else
1586	{
1587	*ptr = cpalloc(strlen(Conf->AffixData[a1]) +
1588	strlen(Conf->AffixData[a2]) +
1589	`1` / \0 / );
1590	sprintf(*ptr, "%s%s", Conf->AffixData[a1], Conf->AffixData[a2]);
1591	}
1592	ptr++;
1593	*ptr = NULL;
1594	Conf->nAffixData++;
1595
1596	return Conf->nAffixData - `1`;
1597	}
1598
1599	/*
1600	* Returns a set of affix parameters which correspondence to the set of affix
1601	* flags with the given index.
1602	*/
1603	static uint32
1604	makeCompoundFlags(IspellDict Conf, int* affix)
1605	{
1606	char *str = Conf->AffixData[affix];
1607
1608	return (getCompoundAffixFlagValue(Conf, str) & FF_COMPOUNDFLAGMASK);
1609	}
1610
1611	/*
1612	* Makes a prefix tree for the given level.
1613	*
1614	* Conf: current dictionary.
1615	* low: lower index of the Conf->Spell array.
1616	* high: upper index of the Conf->Spell array.
1617	* level: current prefix tree level.
1618	*/
1619	static SPNode *
1620	mkSPNode(IspellDict Conf, int* low, int high, int level)
1621	{
1622	int i;
1623	int nchar = `0`;
1624	char lastchar = `'\0'`;
1625	SPNode *rs;
1626	SPNodeData *data;
1627	int lownew = low;
1628
1629	for (i = low; i < high; i++)
1630	if (Conf->Spell[i]->p.d.len > level && lastchar != Conf->Spell[i]->word[level])
1631	{
1632	nchar++;
1633	lastchar = Conf->Spell[i]->word[level];
1634	}
1635
1636	if (!nchar)
1637	return NULL;
1638
1639	rs = (SPNode ) cpalloc0(SPNHDRSZ + nchar sizeof(SPNodeData));
1640	rs->length = nchar;
1641	data = rs->data;
1642
1643	lastchar = `'\0'`;
1644	for (i = low; i < high; i++)
1645	if (Conf->Spell[i]->p.d.len > level)
1646	{
1647	if (lastchar != Conf->Spell[i]->word[level])
1648	{
1649	if (lastchar)
1650	{
1651	/ Next level of the prefix tree /
1652	data->node = mkSPNode(Conf, lownew, i, level + `1`);
1653	lownew = i;
1654	data++;
1655	}
1656	lastchar = Conf->Spell[i]->word[level];
1657	}
1658	data->val = ((uint8 *) (Conf->Spell[i]->word))[level];
1659	if (Conf->Spell[i]->p.d.len == level + `1`)
1660	{
1661	bool clearCompoundOnly = false;
1662
1663	if (data->isword && data->affix != Conf->Spell[i]->p.d.affix)
1664	{
1665	/*
1666	* MergeAffix called a few times. If one of word is
1667	* allowed to be in compound word and another isn't, then
1668	* clear FF_COMPOUNDONLY flag.
1669	*/
1670
1671	clearCompoundOnly = (FF_COMPOUNDONLY & data->compoundflag
1672	& makeCompoundFlags(Conf, Conf->Spell[i]->p.d.affix))
1673	? false : true;
1674	data->affix = MergeAffix(Conf, data->affix, Conf->Spell[i]->p.d.affix);
1675	}
1676	else
1677	data->affix = Conf->Spell[i]->p.d.affix;
1678	data->isword = `1`;
1679
1680	data->compoundflag = makeCompoundFlags(Conf, data->affix);
1681
1682	if ((data->compoundflag & FF_COMPOUNDONLY) &&
1683	(data->compoundflag & FF_COMPOUNDFLAG) == `0`)
1684	data->compoundflag \|= FF_COMPOUNDFLAG;
1685
1686	if (clearCompoundOnly)
1687	data->compoundflag &= ~FF_COMPOUNDONLY;
1688	}
1689	}
1690
1691	/ Next level of the prefix tree /
1692	data->node = mkSPNode(Conf, lownew, high, level + `1`);
1693
1694	return rs;
1695	}
1696
1697	/*
1698	* Builds the Conf->Dictionary tree and AffixData from the imported dictionary
1699	* and affixes.
1700	*/
1701	void
1702	NISortDictionary(IspellDict *Conf)
1703	{
1704	int i;
1705	int naffix = `0`;
1706	int curaffix;
1707
1708	/ compress affixes /
1709
1710	/*
1711	* If we use flag aliases then we need to use Conf->AffixData filled in
1712	* the NIImportOOAffixes().
1713	*/
1714	if (Conf->useFlagAliases)
1715	{
1716	for (i = `0`; i < Conf->nspell; i++)
1717	{
1718	char *end;
1719
1720	if (*Conf->Spell[i]->p.flag != `'\0'`)
1721	{
1722	curaffix = strtol(Conf->Spell[i]->p.flag, &end, `10`);
1723	if (Conf->Spell[i]->p.flag == end \|\| errno == ERANGE)
1724	ereport(ERROR,
1725	(errcode(ERRCODE_CONFIG_FILE_ERROR),
1726	errmsg("invalid affix alias \"%s\"",
1727	Conf->Spell[i]->p.flag)));
1728	}
1729	else
1730	{
1731	/*
1732	* If Conf->Spell[i]->p.flag is empty, then get empty value of
1733	* Conf->AffixData (0 index).
1734	*/
1735	curaffix = `0`;
1736	}
1737
1738	Conf->Spell[i]->p.d.affix = curaffix;
1739	Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
1740	}
1741	}
1742	/ Otherwise fill Conf->AffixData here /
1743	else
1744	{
1745	/ Count the number of different flags used in the dictionary /
1746	qsort((void ) Conf->Spell, Conf->nspell, sizeof(SPELL ),
1747	cmpspellaffix);
1748
1749	naffix = `0`;
1750	for (i = `0`; i < Conf->nspell; i++)
1751	{
1752	if (i == `0` \|\|
1753	strcmp(Conf->Spell[i]->p.flag, Conf->Spell[i - `1`]->p.flag) != `0`)
1754	naffix++;
1755	}
1756
1757	/*
1758	* Fill in Conf->AffixData with the affixes that were used in the
1759	* dictionary. Replace textual flag-field of Conf->Spell entries with
1760	* indexes into Conf->AffixData array.
1761	*/
1762	Conf->AffixData = (char *) palloc0(naffix sizeof(char *));
1763
1764	curaffix = -`1`;
1765	for (i = `0`; i < Conf->nspell; i++)
1766	{
1767	if (i == `0` \|\|
1768	strcmp(Conf->Spell[i]->p.flag, Conf->AffixData[curaffix]) != `0`)
1769	{
1770	curaffix++;
1771	Assert(curaffix < naffix);
1772	Conf->AffixData[curaffix] = cpstrdup(Conf,
1773	Conf->Spell[i]->p.flag);
1774	}
1775
1776	Conf->Spell[i]->p.d.affix = curaffix;
1777	Conf->Spell[i]->p.d.len = strlen(Conf->Spell[i]->word);
1778	}
1779
1780	Conf->lenAffixData = Conf->nAffixData = naffix;
1781	}
1782
1783	/ Start build a prefix tree /
1784	qsort((void ) Conf->Spell, Conf->nspell, sizeof(SPELL ), cmpspell);
1785	Conf->Dictionary = mkSPNode(Conf, `0`, Conf->nspell, `0`);
1786	}
1787
1788	/*
1789	* Makes a prefix tree for the given level using the repl string of an affix
1790	* rule. Affixes with empty replace string do not include in the prefix tree.
1791	* This affixes are included by mkVoidAffix().
1792	*
1793	* Conf: current dictionary.
1794	* low: lower index of the Conf->Affix array.
1795	* high: upper index of the Conf->Affix array.
1796	* level: current prefix tree level.
1797	* type: FF_SUFFIX or FF_PREFIX.
1798	*/
1799	static AffixNode *
1800	mkANode(IspellDict Conf, int* low, int high, int level, int type)
1801	{
1802	int i;
1803	int nchar = `0`;
1804	uint8 lastchar = `'\0'`;
1805	AffixNode *rs;
1806	AffixNodeData *data;
1807	int lownew = low;
1808	int naff;
1809	AFFIX **aff;
1810
1811	for (i = low; i < high; i++)
1812	if (Conf->Affix[i].replen > level && lastchar != GETCHAR(Conf->Affix + i, level, type))
1813	{
1814	nchar++;
1815	lastchar = GETCHAR(Conf->Affix + i, level, type);
1816	}
1817
1818	if (!nchar)
1819	return NULL;
1820
1821	aff = (AFFIX ) tmpalloc(sizeof*(AFFIX ) * (high - low + `1`));
1822	naff = `0`;
1823
1824	rs = (AffixNode ) cpalloc0(ANHRDSZ + nchar sizeof(AffixNodeData));
1825	rs->length = nchar;
1826	data = rs->data;
1827
1828	lastchar = `'\0'`;
1829	for (i = low; i < high; i++)
1830	if (Conf->Affix[i].replen > level)
1831	{
1832	if (lastchar != GETCHAR(Conf->Affix + i, level, type))
1833	{
1834	if (lastchar)
1835	{
1836	/ Next level of the prefix tree /
1837	data->node = mkANode(Conf, lownew, i, level + `1`, type);
1838	if (naff)
1839	{
1840	data->naff = naff;
1841	data->aff = (AFFIX ) cpalloc(sizeof*(AFFIX ) * naff);
1842	memcpy(data->aff, aff, sizeof(AFFIX ) naff);
1843	naff = `0`;
1844	}
1845	data++;
1846	lownew = i;
1847	}
1848	lastchar = GETCHAR(Conf->Affix + i, level, type);
1849	}
1850	data->val = GETCHAR(Conf->Affix + i, level, type);
1851	if (Conf->Affix[i].replen == level + `1`)
1852	{ / affix stopped /
1853	aff[naff++] = Conf->Affix + i;
1854	}
1855	}
1856
1857	/ Next level of the prefix tree /
1858	data->node = mkANode(Conf, lownew, high, level + `1`, type);
1859	if (naff)
1860	{
1861	data->naff = naff;
1862	data->aff = (AFFIX ) cpalloc(sizeof*(AFFIX ) * naff);
1863	memcpy(data->aff, aff, sizeof(AFFIX ) naff);
1864	naff = `0`;
1865	}
1866
1867	pfree(aff);
1868
1869	return rs;
1870	}
1871
1872	/*
1873	* Makes the root void node in the prefix tree. The root void node is created
1874	* for affixes which have empty replace string ("repl" field).
1875	*/
1876	static void
1877	mkVoidAffix(IspellDict Conf, bool issuffix, int* startsuffix)
1878	{
1879	int i,
1880	cnt = `0`;
1881	int start = (issuffix) ? startsuffix : `0`;
1882	int end = (issuffix) ? Conf->naffixes : startsuffix;
1883	AffixNode Affix = (AffixNode ) palloc0(ANHRDSZ + sizeof(AffixNodeData));
1884
1885	Affix->length = `1`;
1886	Affix->isvoid = `1`;
1887
1888	if (issuffix)
1889	{
1890	Affix->data->node = Conf->Suffix;
1891	Conf->Suffix = Affix;
1892	}
1893	else
1894	{
1895	Affix->data->node = Conf->Prefix;
1896	Conf->Prefix = Affix;
1897	}
1898
1899	/ Count affixes with empty replace string /
1900	for (i = start; i < end; i++)
1901	if (Conf->Affix[i].replen == `0`)
1902	cnt++;
1903
1904	/ There is not affixes with empty replace string /
1905	if (cnt == `0`)
1906	return;
1907
1908	Affix->data->aff = (AFFIX ) cpalloc(sizeof*(AFFIX ) * cnt);
1909	Affix->data->naff = (uint32) cnt;
1910
1911	cnt = `0`;
1912	for (i = start; i < end; i++)
1913	if (Conf->Affix[i].replen == `0`)
1914	{
1915	Affix->data->aff[cnt] = Conf->Affix + i;
1916	cnt++;
1917	}
1918	}
1919
1920	/*
1921	* Checks if the affixflag is used by dictionary. Conf->AffixData does not
1922	* contain affixflag if this flag is not used actually by the .dict file.
1923	*
1924	* Conf: current dictionary.
1925	* affixflag: affix flag.
1926	*
1927	* Returns true if the Conf->AffixData array contains affixflag, otherwise
1928	* returns false.
1929	*/
1930	static bool
1931	isAffixInUse(IspellDict Conf, char* *affixflag)
1932	{
1933	int i;
1934
1935	for (i = `0`; i < Conf->nAffixData; i++)
1936	if (IsAffixFlagInUse(Conf, i, affixflag))
1937	return true;
1938
1939	return false;
1940	}
1941
1942	/*
1943	* Builds Conf->Prefix and Conf->Suffix trees from the imported affixes.
1944	*/
1945	void
1946	NISortAffixes(IspellDict *Conf)
1947	{
1948	AFFIX *Affix;
1949	size_t i;
1950	CMPDAffix *ptr;
1951	int firstsuffix = Conf->naffixes;
1952
1953	if (Conf->naffixes == `0`)
1954	return;
1955
1956	/ Store compound affixes in the Conf->CompoundAffix array /
1957	if (Conf->naffixes > `1`)
1958	qsort((void ) Conf->Affix, Conf->naffixes, sizeof*(AFFIX), cmpaffix);
1959	Conf->CompoundAffix = ptr = (CMPDAffix ) palloc(sizeof(CMPDAffix) Conf->naffixes);
1960	ptr->affix = NULL;
1961
1962	for (i = `0`; i < Conf->naffixes; i++)
1963	{
1964	Affix = &(((AFFIX *) Conf->Affix)[i]);
1965	if (Affix->type == FF_SUFFIX && i < firstsuffix)
1966	firstsuffix = i;
1967
1968	if ((Affix->flagflags & FF_COMPOUNDFLAG) && Affix->replen > `0` &&
1969	isAffixInUse(Conf, Affix->flag))
1970	{
1971	bool issuffix = (Affix->type == FF_SUFFIX);
1972
1973	if (ptr == Conf->CompoundAffix \|\|
1974	issuffix != (ptr - `1`)->issuffix \|\|
1975	strbncmp((const unsigned char *) (ptr - `1`)->affix,
1976	(const unsigned char *) Affix->repl,
1977	(ptr - `1`)->len))
1978	{
1979	/ leave only unique and minimals suffixes /
1980	ptr->affix = Affix->repl;
1981	ptr->len = Affix->replen;
1982	ptr->issuffix = issuffix;
1983	ptr++;
1984	}
1985	}
1986	}
1987	ptr->affix = NULL;
1988	Conf->CompoundAffix = (CMPDAffix ) repalloc(Conf->CompoundAffix, sizeof(CMPDAffix) (ptr - Conf->CompoundAffix + `1`));
1989
1990	/ Start build a prefix tree /
1991	Conf->Prefix = mkANode(Conf, `0`, firstsuffix, `0`, FF_PREFIX);
1992	Conf->Suffix = mkANode(Conf, firstsuffix, Conf->naffixes, `0`, FF_SUFFIX);
1993	mkVoidAffix(Conf, true, firstsuffix);
1994	mkVoidAffix(Conf, false, firstsuffix);
1995	}
1996
1997	static AffixNodeData *
1998	FindAffixes(AffixNode node, const* char word, int* wrdlen, int level, int* type)
1999	{
2000	AffixNodeData *StopLow,
2001	*StopHigh,
2002	*StopMiddle;
2003	uint8 symbol;
2004
2005	if (node->isvoid)
2006	{ / search void affixes /
2007	if (node->data->naff)
2008	return node->data;
2009	node = node->data->node;
2010	}
2011
2012	while (node && *level < wrdlen)
2013	{
2014	StopLow = node->data;
2015	StopHigh = node->data + node->length;
2016	while (StopLow < StopHigh)
2017	{
2018	StopMiddle = StopLow + ((StopHigh - StopLow) >> `1`);
2019	symbol = GETWCHAR(word, wrdlen, *level, type);
2020
2021	if (StopMiddle->val == symbol)
2022	{
2023	(*level)++;
2024	if (StopMiddle->naff)
2025	return StopMiddle;
2026	node = StopMiddle->node;
2027	break;
2028	}
2029	else if (StopMiddle->val < symbol)
2030	StopLow = StopMiddle + `1`;
2031	else
2032	StopHigh = StopMiddle;
2033	}
2034	if (StopLow >= StopHigh)
2035	break;
2036	}
2037	return NULL;
2038	}
2039
2040	static char *
2041	CheckAffix(const char word, size_t len, AFFIX Affix, int flagflags, char newword, int* *baselen)
2042	{
2043	/*
2044	* Check compound allow flags
2045	*/
2046
2047	if (flagflags == `0`)
2048	{
2049	if (Affix->flagflags & FF_COMPOUNDONLY)
2050	return NULL;
2051	}
2052	else if (flagflags & FF_COMPOUNDBEGIN)
2053	{
2054	if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
2055	return NULL;
2056	if ((Affix->flagflags & FF_COMPOUNDBEGIN) == `0`)
2057	if (Affix->type == FF_SUFFIX)
2058	return NULL;
2059	}
2060	else if (flagflags & FF_COMPOUNDMIDDLE)
2061	{
2062	if ((Affix->flagflags & FF_COMPOUNDMIDDLE) == `0` \|\|
2063	(Affix->flagflags & FF_COMPOUNDFORBIDFLAG))
2064	return NULL;
2065	}
2066	else if (flagflags & FF_COMPOUNDLAST)
2067	{
2068	if (Affix->flagflags & FF_COMPOUNDFORBIDFLAG)
2069	return NULL;
2070	if ((Affix->flagflags & FF_COMPOUNDLAST) == `0`)
2071	if (Affix->type == FF_PREFIX)
2072	return NULL;
2073	}
2074
2075	/*
2076	* make replace pattern of affix
2077	*/
2078	if (Affix->type == FF_SUFFIX)
2079	{
2080	strcpy(newword, word);
2081	strcpy(newword + len - Affix->replen, Affix->find);
2082	if (baselen) / store length of non-changed part of word /
2083	*baselen = len - Affix->replen;
2084	}
2085	else
2086	{
2087	/*
2088	* if prefix is an all non-changed part's length then all word
2089	* contains only prefix and suffix, so out
2090	*/
2091	if (baselen && *baselen + strlen(Affix->find) <= Affix->replen)
2092	return NULL;
2093	strcpy(newword, Affix->find);
2094	strcat(newword, word + Affix->replen);
2095	}
2096
2097	/*
2098	* check resulting word
2099	*/
2100	if (Affix->issimple)
2101	return newword;
2102	else if (Affix->isregis)
2103	{
2104	if (RS_execute(&(Affix->reg.regis), newword))
2105	return newword;
2106	}
2107	else
2108	{
2109	int err;
2110	pg_wchar *data;
2111	size_t data_len;
2112	int newword_len;
2113
2114	/ Convert data string to wide characters /
2115	newword_len = strlen(newword);
2116	data = (pg_wchar ) palloc((newword_len + `1`) sizeof(pg_wchar));
2117	data_len = pg_mb2wchar_with_len(newword, data, newword_len);
2118
2119	if (!(err = pg_regexec(&(Affix->reg.regex), data, data_len, `0`, NULL, `0`, NULL, `0`)))
2120	{
2121	pfree(data);
2122	return newword;
2123	}
2124	pfree(data);
2125	}
2126
2127	return NULL;
2128	}
2129
2130	static int
2131	addToResult(char *forms, char* *cur, char* *word)
2132	{
2133	if (cur - forms >= MAX_NORM - `1`)
2134	return `0`;
2135	if (forms == cur \|\| strcmp(word, *(cur - `1`)) != `0`)
2136	{
2137	*cur = pstrdup(word);
2138	*(cur + `1`) = NULL;
2139	return `1`;
2140	}
2141
2142	return `0`;
2143	}
2144
2145	static char **
2146	NormalizeSubWord(IspellDict Conf, char* word, int* flag)
2147	{
2148	AffixNodeData *suffix = NULL,
2149	*prefix = NULL;
2150	int slevel = `0`,
2151	plevel = `0`;
2152	int wrdlen = strlen(word),
2153	swrdlen;
2154	char **forms;
2155	char **cur;
2156	char newword[`2` * MAXNORMLEN] = "";
2157	char pnewword[`2` * MAXNORMLEN] = "";
2158	AffixNode *snode = Conf->Suffix,
2159	*pnode;
2160	int i,
2161	j;
2162
2163	if (wrdlen > MAXNORMLEN)
2164	return NULL;
2165	cur = forms = (char *) palloc(MAX_NORM sizeof(char *));
2166	*cur = NULL;
2167
2168
2169	/ Check that the word itself is normal form /
2170	if (FindWord(Conf, word, VoidString, flag))
2171	{
2172	*cur = pstrdup(word);
2173	cur++;
2174	*cur = NULL;
2175	}
2176
2177	/ Find all other NORMAL forms of the 'word' (check only prefix) /
2178	pnode = Conf->Prefix;
2179	plevel = `0`;
2180	while (pnode)
2181	{
2182	prefix = FindAffixes(pnode, word, wrdlen, &plevel, FF_PREFIX);
2183	if (!prefix)
2184	break;
2185	for (j = `0`; j < prefix->naff; j++)
2186	{
2187	if (CheckAffix(word, wrdlen, prefix->aff[j], flag, newword, NULL))
2188	{
2189	/ prefix success /
2190	if (FindWord(Conf, newword, prefix->aff[j]->flag, flag))
2191	cur += addToResult(forms, cur, newword);
2192	}
2193	}
2194	pnode = prefix->node;
2195	}
2196
2197	/*
2198	* Find all other NORMAL forms of the 'word' (check suffix and then
2199	* prefix)
2200	*/
2201	while (snode)
2202	{
2203	int baselen = `0`;
2204
2205	/ find possible suffix /
2206	suffix = FindAffixes(snode, word, wrdlen, &slevel, FF_SUFFIX);
2207	if (!suffix)
2208	break;
2209	/ foreach suffix check affix /
2210	for (i = `0`; i < suffix->naff; i++)
2211	{
2212	if (CheckAffix(word, wrdlen, suffix->aff[i], flag, newword, &baselen))
2213	{
2214	/ suffix success /
2215	if (FindWord(Conf, newword, suffix->aff[i]->flag, flag))
2216	cur += addToResult(forms, cur, newword);
2217
2218	/ now we will look changed word with prefixes /
2219	pnode = Conf->Prefix;
2220	plevel = `0`;
2221	swrdlen = strlen(newword);
2222	while (pnode)
2223	{
2224	prefix = FindAffixes(pnode, newword, swrdlen, &plevel, FF_PREFIX);
2225	if (!prefix)
2226	break;
2227	for (j = `0`; j < prefix->naff; j++)
2228	{
2229	if (CheckAffix(newword, swrdlen, prefix->aff[j], flag, pnewword, &baselen))
2230	{
2231	/ prefix success /
2232	char *ff = (prefix->aff[j]->flagflags & suffix->aff[i]->flagflags & FF_CROSSPRODUCT) ?
2233	VoidString : prefix->aff[j]->flag;
2234
2235	if (FindWord(Conf, pnewword, ff, flag))
2236	cur += addToResult(forms, cur, pnewword);
2237	}
2238	}
2239	pnode = prefix->node;
2240	}
2241	}
2242	}
2243
2244	snode = suffix->node;
2245	}
2246
2247	if (cur == forms)
2248	{
2249	pfree(forms);
2250	return NULL;
2251	}
2252	return forms;
2253	}
2254
2255	typedef struct SplitVar
2256	{
2257	int nstem;
2258	int lenstem;
2259	char **stem;
2260	struct SplitVar *next;
2261	} SplitVar;
2262
2263	static int
2264	CheckCompoundAffixes(CMPDAffix *ptr, char* word, int* len, bool CheckInPlace)
2265	{
2266	bool issuffix;
2267
2268	/ in case CompoundAffix is null: /
2269	if (*ptr == NULL)
2270	return -`1`;
2271
2272	if (CheckInPlace)
2273	{
2274	while ((*ptr)->affix)
2275	{
2276	if (len > (ptr)->len && strncmp((ptr)->affix, word, (*ptr)->len) == `0`)
2277	{
2278	len = (*ptr)->len;
2279	issuffix = (*ptr)->issuffix;
2280	(*ptr)++;
2281	return (issuffix) ? len : `0`;
2282	}
2283	(*ptr)++;
2284	}
2285	}
2286	else
2287	{
2288	char *affbegin;
2289
2290	while ((*ptr)->affix)
2291	{
2292	if (len > (ptr)->len && (affbegin = strstr(word, (ptr)->affix)) != NULL)
2293	{
2294	len = (*ptr)->len + (affbegin - word);
2295	issuffix = (*ptr)->issuffix;
2296	(*ptr)++;
2297	return (issuffix) ? len : `0`;
2298	}
2299	(*ptr)++;
2300	}
2301	}
2302	return -`1`;
2303	}
2304
2305	static SplitVar *
2306	CopyVar(SplitVar s, int* makedup)
2307	{
2308	SplitVar v = (SplitVar ) palloc(sizeof(SplitVar));
2309
2310	v->next = NULL;
2311	if (s)
2312	{
2313	int i;
2314
2315	v->lenstem = s->lenstem;
2316	v->stem = (char ) palloc(sizeof*(char* ) v->lenstem);
2317	v->nstem = s->nstem;
2318	for (i = `0`; i < s->nstem; i++)
2319	v->stem[i] = (makedup) ? pstrdup(s->stem[i]) : s->stem[i];
2320	}
2321	else
2322	{
2323	v->lenstem = `16`;
2324	v->stem = (char ) palloc(sizeof*(char* ) v->lenstem);
2325	v->nstem = `0`;
2326	}
2327	return v;
2328	}
2329
2330	static void
2331	AddStem(SplitVar v, char* *word)
2332	{
2333	if (v->nstem >= v->lenstem)
2334	{
2335	v->lenstem *= `2`;
2336	v->stem = (char ) repalloc(v->stem, sizeof*(char* ) v->lenstem);
2337	}
2338
2339	v->stem[v->nstem] = word;
2340	v->nstem++;
2341	}
2342
2343	static SplitVar *
2344	SplitToVariants(IspellDict Conf, SPNode snode, SplitVar orig, char* word, int* wordlen, int startpos, int minpos)
2345	{
2346	SplitVar *var = NULL;
2347	SPNodeData *StopLow,
2348	*StopHigh,
2349	*StopMiddle = NULL;
2350	SPNode *node = (snode) ? snode : Conf->Dictionary;
2351	int level = (snode) ? minpos : startpos; / recursive*
2352	* minpos==level */
2353	int lenaff;
2354	CMPDAffix *caff;
2355	char *notprobed;
2356	int compoundflag = `0`;
2357
2358	notprobed = (char *) palloc(wordlen);
2359	memset(notprobed, `1`, wordlen);
2360	var = CopyVar(orig, `1`);
2361
2362	while (level < wordlen)
2363	{
2364	/ find word with epenthetic or/and compound affix /
2365	caff = Conf->CompoundAffix;
2366	while (level > startpos && (lenaff = CheckCompoundAffixes(&caff, word + level, wordlen - level, (node) ? true : false)) >= `0`)
2367	{
2368	/*
2369	* there is one of compound affixes, so check word for existings
2370	*/
2371	char buf[MAXNORMLEN];
2372	char **subres;
2373
2374	lenaff = level - startpos + lenaff;
2375
2376	if (!notprobed[startpos + lenaff - `1`])
2377	continue;
2378
2379	if (level + lenaff - `1` <= minpos)
2380	continue;
2381
2382	if (lenaff >= MAXNORMLEN)
2383	continue; / skip too big value /
2384	if (lenaff > `0`)
2385	memcpy(buf, word + startpos, lenaff);
2386	buf[lenaff] = `'\0'`;
2387
2388	if (level == `0`)
2389	compoundflag = FF_COMPOUNDBEGIN;
2390	else if (level == wordlen - `1`)
2391	compoundflag = FF_COMPOUNDLAST;
2392	else
2393	compoundflag = FF_COMPOUNDMIDDLE;
2394	subres = NormalizeSubWord(Conf, buf, compoundflag);
2395	if (subres)
2396	{
2397	/ Yes, it was a word from dictionary /
2398	SplitVar *new = CopyVar(var, `0`);
2399	SplitVar *ptr = var;
2400	char **sptr = subres;
2401
2402	notprobed[startpos + lenaff - `1`] = `0`;
2403
2404	while (*sptr)
2405	{
2406	AddStem(new, *sptr);
2407	sptr++;
2408	}
2409	pfree(subres);
2410
2411	while (ptr->next)
2412	ptr = ptr->next;
2413	ptr->next = SplitToVariants(Conf, NULL, new, word, wordlen, startpos + lenaff, startpos + lenaff);
2414
2415	pfree(new->stem);
2416	pfree(new);
2417	}
2418	}
2419
2420	if (!node)
2421	break;
2422
2423	StopLow = node->data;
2424	StopHigh = node->data + node->length;
2425	while (StopLow < StopHigh)
2426	{
2427	StopMiddle = StopLow + ((StopHigh - StopLow) >> `1`);
2428	if (StopMiddle->val == ((uint8 *) (word))[level])
2429	break;
2430	else if (StopMiddle->val < ((uint8 *) (word))[level])
2431	StopLow = StopMiddle + `1`;
2432	else
2433	StopHigh = StopMiddle;
2434	}
2435
2436	if (StopLow < StopHigh)
2437	{
2438	if (startpos == `0`)
2439	compoundflag = FF_COMPOUNDBEGIN;
2440	else if (level == wordlen - `1`)
2441	compoundflag = FF_COMPOUNDLAST;
2442	else
2443	compoundflag = FF_COMPOUNDMIDDLE;
2444
2445	/ find infinitive /
2446	if (StopMiddle->isword &&
2447	(StopMiddle->compoundflag & compoundflag) &&
2448	notprobed[level])
2449	{
2450	/ ok, we found full compoundallowed word /
2451	if (level > minpos)
2452	{
2453	/ and its length more than minimal /
2454	if (wordlen == level + `1`)
2455	{
2456	/ well, it was last word /
2457	AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
2458	pfree(notprobed);
2459	return var;
2460	}
2461	else
2462	{
2463	/ then we will search more big word at the same point /
2464	SplitVar *ptr = var;
2465
2466	while (ptr->next)
2467	ptr = ptr->next;
2468	ptr->next = SplitToVariants(Conf, node, var, word, wordlen, startpos, level);
2469	/ we can find next word /
2470	level++;
2471	AddStem(var, pnstrdup(word + startpos, level - startpos));
2472	node = Conf->Dictionary;
2473	startpos = level;
2474	continue;
2475	}
2476	}
2477	}
2478	node = StopMiddle->node;
2479	}
2480	else
2481	node = NULL;
2482	level++;
2483	}
2484
2485	AddStem(var, pnstrdup(word + startpos, wordlen - startpos));
2486	pfree(notprobed);
2487	return var;
2488	}
2489
2490	static void
2491	addNorm(TSLexeme lres, TSLexeme lcur, char word, int* flags, uint16 NVariant)
2492	{
2493	if (*lres == NULL)
2494	lcur = lres = (TSLexeme ) palloc(MAX_NORM sizeof(TSLexeme));
2495
2496	if (lcur - lres < MAX_NORM - `1`)
2497	{
2498	(*lcur)->lexeme = word;
2499	(*lcur)->flags = flags;
2500	(*lcur)->nvariant = NVariant;
2501	(*lcur)++;
2502	(*lcur)->lexeme = NULL;
2503	}
2504	}
2505
2506	TSLexeme *
2507	NINormalizeWord(IspellDict Conf, char* *word)
2508	{
2509	char **res;
2510	TSLexeme *lcur = NULL,
2511	*lres = NULL;
2512	uint16 NVariant = `1`;
2513
2514	res = NormalizeSubWord(Conf, word, `0`);
2515
2516	if (res)
2517	{
2518	char **ptr = res;
2519
2520	while (*ptr && (lcur - lres) < MAX_NORM)
2521	{
2522	addNorm(&lres, &lcur, *ptr, `0`, NVariant++);
2523	ptr++;
2524	}
2525	pfree(res);
2526	}
2527
2528	if (Conf->usecompound)
2529	{
2530	int wordlen = strlen(word);
2531	SplitVar *ptr,
2532	*var = SplitToVariants(Conf, NULL, NULL, word, wordlen, `0`, -`1`);
2533	int i;
2534
2535	while (var)
2536	{
2537	if (var->nstem > `1`)
2538	{
2539	char **subres = NormalizeSubWord(Conf, var->stem[var->nstem - `1`], FF_COMPOUNDLAST);
2540
2541	if (subres)
2542	{
2543	char **subptr = subres;
2544
2545	while (*subptr)
2546	{
2547	for (i = `0`; i < var->nstem - `1`; i++)
2548	{
2549	addNorm(&lres, &lcur, (subptr == subres) ? var->stem[i] : pstrdup(var->stem[i]), `0`, NVariant);
2550	}
2551
2552	addNorm(&lres, &lcur, *subptr, `0`, NVariant);
2553	subptr++;
2554	NVariant++;
2555	}
2556
2557	pfree(subres);
2558	var->stem[`0`] = NULL;
2559	pfree(var->stem[var->nstem - `1`]);
2560	}
2561	}
2562
2563	for (i = `0`; i < var->nstem && var->stem[i]; i++)
2564	pfree(var->stem[i]);
2565	ptr = var->next;
2566	pfree(var->stem);
2567	pfree(var);
2568	var = ptr;
2569	}
2570	}
2571
2572	return lres;
2573	}
2574

Browse the source code of PostgreSQL/src/backend/tsearch/spell.c