dict_thesaurus.c source code [PostgreSQL/src/backend/tsearch/dict_thesaurus.c]

1	/-------------------------------------------------------------------------*
2	*
3	* dict_thesaurus.c
4	* Thesaurus dictionary: phrase to phrase substitution
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	*
8	*
9	* IDENTIFICATION
10	* src/backend/tsearch/dict_thesaurus.c
11	*
12	*-------------------------------------------------------------------------
13	*/
14	#include "postgres.h"
15
16	#include "catalog/namespace.h"
17	#include "commands/defrem.h"
18	#include "tsearch/ts_cache.h"
19	#include "tsearch/ts_locale.h"
20	#include "tsearch/ts_utils.h"
21	#include "utils/builtins.h"
22	#include "utils/regproc.h"
23
24
25	/*
26	* Temporary we use TSLexeme.flags for inner use...
27	*/
28	#define DT_USEASIS 0x1000
29
30	typedef struct LexemeInfo
31	{
32	uint32 idsubst; / entry's number in DictThesaurus->subst /
33	uint16 posinsubst; / pos info in entry /
34	uint16 tnvariant; / total num lexemes in one variant /
35	struct LexemeInfo *nextentry;
36	struct LexemeInfo *nextvariant;
37	} LexemeInfo;
38
39	typedef struct
40	{
41	char *lexeme;
42	LexemeInfo *entries;
43	} TheLexeme;
44
45	typedef struct
46	{
47	uint16 lastlexeme; / number lexemes to substitute /
48	uint16 reslen;
49	TSLexeme res; /* prepared substituted result /
50	} TheSubstitute;
51
52	typedef struct
53	{
54	/ subdictionary to normalize lexemes /
55	Oid subdictOid;
56	TSDictionaryCacheEntry *subdict;
57
58	/ Array to search lexeme by exact match /
59	TheLexeme *wrds;
60	int nwrds; / current number of words /
61	int ntwrds; / allocated array length /
62
63	/*
64	* Storage of substituted result, n-th element is for n-th expression
65	*/
66	TheSubstitute *subst;
67	int nsubst;
68	} DictThesaurus;
69
70
71	static void
72	newLexeme(DictThesaurus d, char* b, char* *e, uint32 idsubst, uint16 posinsubst)
73	{
74	TheLexeme *ptr;
75
76	if (d->nwrds >= d->ntwrds)
77	{
78	if (d->ntwrds == `0`)
79	{
80	d->ntwrds = `16`;
81	d->wrds = (TheLexeme ) palloc(sizeof(TheLexeme) d->ntwrds);
82	}
83	else
84	{
85	d->ntwrds *= `2`;
86	d->wrds = (TheLexeme ) repalloc(d->wrds, sizeof(TheLexeme) d->ntwrds);
87	}
88	}
89
90	ptr = d->wrds + d->nwrds;
91	d->nwrds++;
92
93	ptr->lexeme = palloc(e - b + `1`);
94
95	memcpy(ptr->lexeme, b, e - b);
96	ptr->lexeme[e - b] = `'\0'`;
97
98	ptr->entries = (LexemeInfo ) palloc(sizeof*(LexemeInfo));
99
100	ptr->entries->nextentry = NULL;
101	ptr->entries->idsubst = idsubst;
102	ptr->entries->posinsubst = posinsubst;
103	}
104
105	static void
106	addWrd(DictThesaurus d, char* b, char* *e, uint32 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
107	{
108	static int nres = `0`;
109	static int ntres = `0`;
110	TheSubstitute *ptr;
111
112	if (nwrd == `0`)
113	{
114	nres = ntres = `0`;
115
116	if (idsubst >= d->nsubst)
117	{
118	if (d->nsubst == `0`)
119	{
120	d->nsubst = `16`;
121	d->subst = (TheSubstitute ) palloc(sizeof(TheSubstitute) d->nsubst);
122	}
123	else
124	{
125	d->nsubst *= `2`;
126	d->subst = (TheSubstitute ) repalloc(d->subst, sizeof(TheSubstitute) d->nsubst);
127	}
128	}
129	}
130
131	ptr = d->subst + idsubst;
132
133	ptr->lastlexeme = posinsubst - `1`;
134
135	if (nres + `1` >= ntres)
136	{
137	if (ntres == `0`)
138	{
139	ntres = `2`;
140	ptr->res = (TSLexeme ) palloc(sizeof(TSLexeme) ntres);
141	}
142	else
143	{
144	ntres *= `2`;
145	ptr->res = (TSLexeme ) repalloc(ptr->res, sizeof(TSLexeme) ntres);
146	}
147	}
148
149	ptr->res[nres].lexeme = palloc(e - b + `1`);
150	memcpy(ptr->res[nres].lexeme, b, e - b);
151	ptr->res[nres].lexeme[e - b] = `'\0'`;
152
153	ptr->res[nres].nvariant = nwrd;
154	if (useasis)
155	ptr->res[nres].flags = DT_USEASIS;
156	else
157	ptr->res[nres].flags = `0`;
158
159	ptr->res[++nres].lexeme = NULL;
160	}
161
162	#define TR_WAITLEX 1
163	#define TR_INLEX 2
164	#define TR_WAITSUBS 3
165	#define TR_INSUBS 4
166
167	static void
168	thesaurusRead(const char filename, DictThesaurus d)
169	{
170	tsearch_readline_state trst;
171	uint32 idsubst = `0`;
172	bool useasis = false;
173	char *line;
174
175	filename = get_tsearch_config_filename(filename, "ths");
176	if (!tsearch_readline_begin(&trst, filename))
177	ereport(ERROR,
178	(errcode(ERRCODE_CONFIG_FILE_ERROR),
179	errmsg("could not open thesaurus file \"%s\": %m",
180	filename)));
181
182	while ((line = tsearch_readline(&trst)) != NULL)
183	{
184	char *ptr;
185	int state = TR_WAITLEX;
186	char *beginwrd = NULL;
187	uint32 posinsubst = `0`;
188	uint32 nwrd = `0`;
189
190	ptr = line;
191
192	/ is it a comment? /
193	while (*ptr && t_isspace(ptr))
194	ptr += pg_mblen(ptr);
195
196	if (t_iseq(ptr, `'#'`) \|\| *ptr == `'\0'` \|\|
197	t_iseq(ptr, `'\n'`) \|\| t_iseq(ptr, `'\r'`))
198	{
199	pfree(line);
200	continue;
201	}
202
203	while (*ptr)
204	{
205	if (state == TR_WAITLEX)
206	{
207	if (t_iseq(ptr, `':'`))
208	{
209	if (posinsubst == `0`)
210	ereport(ERROR,
211	(errcode(ERRCODE_CONFIG_FILE_ERROR),
212	errmsg("unexpected delimiter")));
213	state = TR_WAITSUBS;
214	}
215	else if (!t_isspace(ptr))
216	{
217	beginwrd = ptr;
218	state = TR_INLEX;
219	}
220	}
221	else if (state == TR_INLEX)
222	{
223	if (t_iseq(ptr, `':'`))
224	{
225	newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
226	state = TR_WAITSUBS;
227	}
228	else if (t_isspace(ptr))
229	{
230	newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
231	state = TR_WAITLEX;
232	}
233	}
234	else if (state == TR_WAITSUBS)
235	{
236	if (t_iseq(ptr, `'*'`))
237	{
238	useasis = true;
239	state = TR_INSUBS;
240	beginwrd = ptr + pg_mblen(ptr);
241	}
242	else if (t_iseq(ptr, `'\\'`))
243	{
244	useasis = false;
245	state = TR_INSUBS;
246	beginwrd = ptr + pg_mblen(ptr);
247	}
248	else if (!t_isspace(ptr))
249	{
250	useasis = false;
251	beginwrd = ptr;
252	state = TR_INSUBS;
253	}
254	}
255	else if (state == TR_INSUBS)
256	{
257	if (t_isspace(ptr))
258	{
259	if (ptr == beginwrd)
260	ereport(ERROR,
261	(errcode(ERRCODE_CONFIG_FILE_ERROR),
262	errmsg("unexpected end of line or lexeme")));
263	addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
264	state = TR_WAITSUBS;
265	}
266	}
267	else
268	elog(ERROR, "unrecognized thesaurus state: %d", state);
269
270	ptr += pg_mblen(ptr);
271	}
272
273	if (state == TR_INSUBS)
274	{
275	if (ptr == beginwrd)
276	ereport(ERROR,
277	(errcode(ERRCODE_CONFIG_FILE_ERROR),
278	errmsg("unexpected end of line or lexeme")));
279	addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
280	}
281
282	idsubst++;
283
284	if (!(nwrd && posinsubst))
285	ereport(ERROR,
286	(errcode(ERRCODE_CONFIG_FILE_ERROR),
287	errmsg("unexpected end of line")));
288
289	/*
290	* Note: currently, tsearch_readline can't return lines exceeding 4KB,
291	* so overflow of the word counts is impossible. But that may not
292	* always be true, so let's check.
293	*/
294	if (nwrd != (uint16) nwrd \|\| posinsubst != (uint16) posinsubst)
295	ereport(ERROR,
296	(errcode(ERRCODE_CONFIG_FILE_ERROR),
297	errmsg("too many lexemes in thesaurus entry")));
298
299	pfree(line);
300	}
301
302	d->nsubst = idsubst;
303
304	tsearch_readline_end(&trst);
305	}
306
307	static TheLexeme *
308	addCompiledLexeme(TheLexeme newwrds, int* nnw, int* tnm, TSLexeme lexeme, LexemeInfo *src, uint16 tnvariant)
309	{
310	if (nnw >= tnm)
311	{
312	tnm = `2`;
313	newwrds = (TheLexeme ) repalloc(newwrds, sizeof(TheLexeme) *tnm);
314	}
315
316	newwrds[nnw].entries = (LexemeInfo ) palloc(sizeof(LexemeInfo));
317
318	if (lexeme && lexeme->lexeme)
319	{
320	newwrds[*nnw].lexeme = pstrdup(lexeme->lexeme);
321	newwrds[*nnw].entries->tnvariant = tnvariant;
322	}
323	else
324	{
325	newwrds[*nnw].lexeme = NULL;
326	newwrds[*nnw].entries->tnvariant = `1`;
327	}
328
329	newwrds[*nnw].entries->idsubst = src->idsubst;
330	newwrds[*nnw].entries->posinsubst = src->posinsubst;
331
332	newwrds[*nnw].entries->nextentry = NULL;
333
334	(*nnw)++;
335	return newwrds;
336	}
337
338	static int
339	cmpLexemeInfo(LexemeInfo a, LexemeInfo b)
340	{
341	if (a == NULL \|\| b == NULL)
342	return `0`;
343
344	if (a->idsubst == b->idsubst)
345	{
346	if (a->posinsubst == b->posinsubst)
347	{
348	if (a->tnvariant == b->tnvariant)
349	return `0`;
350
351	return (a->tnvariant > b->tnvariant) ? `1` : -`1`;
352	}
353
354	return (a->posinsubst > b->posinsubst) ? `1` : -`1`;
355	}
356
357	return (a->idsubst > b->idsubst) ? `1` : -`1`;
358	}
359
360	static int
361	cmpLexeme(const TheLexeme a, const* TheLexeme *b)
362	{
363	if (a->lexeme == NULL)
364	{
365	if (b->lexeme == NULL)
366	return `0`;
367	else
368	return `1`;
369	}
370	else if (b->lexeme == NULL)
371	return -`1`;
372
373	return strcmp(a->lexeme, b->lexeme);
374	}
375
376	static int
377	cmpLexemeQ(const void a, const* void *b)
378	{
379	return cmpLexeme((const TheLexeme ) a, (const* TheLexeme *) b);
380	}
381
382	static int
383	cmpTheLexeme(const void a, const* void *b)
384	{
385	const TheLexeme la = (const* TheLexeme *) a;
386	const TheLexeme lb = (const* TheLexeme *) b;
387	int res;
388
389	if ((res = cmpLexeme(la, lb)) != `0`)
390	return res;
391
392	return -cmpLexemeInfo(la->entries, lb->entries);
393	}
394
395	static void
396	compileTheLexeme(DictThesaurus *d)
397	{
398	int i,
399	nnw = `0`,
400	tnm = `16`;
401	TheLexeme newwrds = (TheLexeme ) palloc(sizeof(TheLexeme) * tnm),
402	*ptrwrds;
403
404	for (i = `0`; i < d->nwrds; i++)
405	{
406	TSLexeme *ptr;
407
408	if (strcmp(d->wrds[i].lexeme, "?") == `0`) / Is stop word marker? /
409	newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, `0`);
410	else
411	{
412	ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
413	PointerGetDatum(d->subdict->dictData),
414	PointerGetDatum(d->wrds[i].lexeme),
415	Int32GetDatum(strlen(d->wrds[i].lexeme)),
416	PointerGetDatum(NULL)));
417
418	if (!ptr)
419	ereport(ERROR,
420	(errcode(ERRCODE_CONFIG_FILE_ERROR),
421	errmsg("thesaurus sample word \"%s\" isn't recognized by subdictionary (rule %d)",
422	d->wrds[i].lexeme,
423	d->wrds[i].entries->idsubst + `1`)));
424	else if (!(ptr->lexeme))
425	ereport(ERROR,
426	(errcode(ERRCODE_CONFIG_FILE_ERROR),
427	errmsg("thesaurus sample word \"%s\" is a stop word (rule %d)",
428	d->wrds[i].lexeme,
429	d->wrds[i].entries->idsubst + `1`),
430	errhint("Use \"?\" to represent a stop word within a sample phrase.")));
431	else
432	{
433	while (ptr->lexeme)
434	{
435	TSLexeme *remptr = ptr + `1`;
436	int tnvar = `1`;
437	int curvar = ptr->nvariant;
438
439	/ compute n words in one variant /
440	while (remptr->lexeme)
441	{
442	if (remptr->nvariant != (remptr - `1`)->nvariant)
443	break;
444	tnvar++;
445	remptr++;
446	}
447
448	remptr = ptr;
449	while (remptr->lexeme && remptr->nvariant == curvar)
450	{
451	newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar);
452	remptr++;
453	}
454
455	ptr = remptr;
456	}
457	}
458	}
459
460	pfree(d->wrds[i].lexeme);
461	pfree(d->wrds[i].entries);
462	}
463
464	if (d->wrds)
465	pfree(d->wrds);
466	d->wrds = newwrds;
467	d->nwrds = nnw;
468	d->ntwrds = tnm;
469
470	if (d->nwrds > `1`)
471	{
472	qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme);
473
474	/ uniq /
475	newwrds = d->wrds;
476	ptrwrds = d->wrds + `1`;
477	while (ptrwrds - d->wrds < d->nwrds)
478	{
479	if (cmpLexeme(ptrwrds, newwrds) == `0`)
480	{
481	if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries))
482	{
483	ptrwrds->entries->nextentry = newwrds->entries;
484	newwrds->entries = ptrwrds->entries;
485	}
486	else
487	pfree(ptrwrds->entries);
488
489	if (ptrwrds->lexeme)
490	pfree(ptrwrds->lexeme);
491	}
492	else
493	{
494	newwrds++;
495	newwrds = ptrwrds;
496	}
497
498	ptrwrds++;
499	}
500
501	d->nwrds = newwrds - d->wrds + `1`;
502	d->wrds = (TheLexeme ) repalloc(d->wrds, sizeof(TheLexeme) d->nwrds);
503	}
504	}
505
506	static void
507	compileTheSubstitute(DictThesaurus *d)
508	{
509	int i;
510
511	for (i = `0`; i < d->nsubst; i++)
512	{
513	TSLexeme *rem = d->subst[i].res,
514	*outptr,
515	*inptr;
516	int n = `2`;
517
518	outptr = d->subst[i].res = (TSLexeme ) palloc(sizeof(TSLexeme) n);
519	outptr->lexeme = NULL;
520	inptr = rem;
521
522	while (inptr && inptr->lexeme)
523	{
524	TSLexeme *lexized,
525	tmplex[`2`];
526
527	if (inptr->flags & DT_USEASIS)
528	{ / do not lexize /
529	tmplex[`0`] = *inptr;
530	tmplex[`0`].flags = `0`;
531	tmplex[`1`].lexeme = NULL;
532	lexized = tmplex;
533	}
534	else
535	{
536	lexized = (TSLexeme *) DatumGetPointer(
537	FunctionCall4(
538	&(d->subdict->lexize),
539	PointerGetDatum(d->subdict->dictData),
540	PointerGetDatum(inptr->lexeme),
541	Int32GetDatum(strlen(inptr->lexeme)),
542	PointerGetDatum(NULL)
543	)
544	);
545	}
546
547	if (lexized && lexized->lexeme)
548	{
549	int toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -`1`;
550
551	while (lexized->lexeme)
552	{
553	if (outptr - d->subst[i].res + `1` >= n)
554	{
555	int diff = outptr - d->subst[i].res;
556
557	n *= `2`;
558	d->subst[i].res = (TSLexeme ) repalloc(d->subst[i].res, sizeof(TSLexeme) n);
559	outptr = d->subst[i].res + diff;
560	}
561
562	outptr = lexized;
563	outptr->lexeme = pstrdup(lexized->lexeme);
564
565	outptr++;
566	lexized++;
567	}
568
569	if (toset > `0`)
570	d->subst[i].res[toset].flags \|= TSL_ADDPOS;
571	}
572	else if (lexized)
573	{
574	ereport(ERROR,
575	(errcode(ERRCODE_CONFIG_FILE_ERROR),
576	errmsg("thesaurus substitute word \"%s\" is a stop word (rule %d)",
577	inptr->lexeme, i + `1`)));
578	}
579	else
580	{
581	ereport(ERROR,
582	(errcode(ERRCODE_CONFIG_FILE_ERROR),
583	errmsg("thesaurus substitute word \"%s\" isn't recognized by subdictionary (rule %d)",
584	inptr->lexeme, i + `1`)));
585	}
586
587	if (inptr->lexeme)
588	pfree(inptr->lexeme);
589	inptr++;
590	}
591
592	if (outptr == d->subst[i].res)
593	ereport(ERROR,
594	(errcode(ERRCODE_CONFIG_FILE_ERROR),
595	errmsg("thesaurus substitute phrase is empty (rule %d)",
596	i + `1`)));
597
598	d->subst[i].reslen = outptr - d->subst[i].res;
599
600	pfree(rem);
601	}
602	}
603
604	Datum
605	thesaurus_init(PG_FUNCTION_ARGS)
606	{
607	List dictoptions = (List ) PG_GETARG_POINTER(`0`);
608	DictThesaurus *d;
609	char *subdictname = NULL;
610	bool fileloaded = false;
611	ListCell *l;
612
613	d = (DictThesaurus ) palloc0(sizeof*(DictThesaurus));
614
615	foreach(l, dictoptions)
616	{
617	DefElem defel = (DefElem ) lfirst(l);
618
619	if (strcmp(defel->defname, "dictfile") == `0`)
620	{
621	if (fileloaded)
622	ereport(ERROR,
623	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
624	errmsg("multiple DictFile parameters")));
625	thesaurusRead(defGetString(defel), d);
626	fileloaded = true;
627	}
628	else if (strcmp(defel->defname, "dictionary") == `0`)
629	{
630	if (subdictname)
631	ereport(ERROR,
632	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
633	errmsg("multiple Dictionary parameters")));
634	subdictname = pstrdup(defGetString(defel));
635	}
636	else
637	{
638	ereport(ERROR,
639	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
640	errmsg("unrecognized Thesaurus parameter: \"%s\"",
641	defel->defname)));
642	}
643	}
644
645	if (!fileloaded)
646	ereport(ERROR,
647	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
648	errmsg("missing DictFile parameter")));
649	if (!subdictname)
650	ereport(ERROR,
651	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
652	errmsg("missing Dictionary parameter")));
653
654	d->subdictOid = get_ts_dict_oid(stringToQualifiedNameList(subdictname), false);
655	d->subdict = lookup_ts_dictionary_cache(d->subdictOid);
656
657	compileTheLexeme(d);
658	compileTheSubstitute(d);
659
660	PG_RETURN_POINTER(d);
661	}
662
663	static LexemeInfo *
664	findTheLexeme(DictThesaurus d, char* *lexeme)
665	{
666	TheLexeme key,
667	*res;
668
669	if (d->nwrds == `0`)
670	return NULL;
671
672	key.lexeme = lexeme;
673	key.entries = NULL;
674
675	res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ);
676
677	if (res == NULL)
678	return NULL;
679	return res->entries;
680	}
681
682	static bool
683	matchIdSubst(LexemeInfo *stored, uint32 idsubst)
684	{
685	bool res = true;
686
687	if (stored)
688	{
689	res = false;
690
691	for (; stored; stored = stored->nextvariant)
692	if (stored->idsubst == idsubst)
693	{
694	res = true;
695	break;
696	}
697	}
698
699	return res;
700	}
701
702	static LexemeInfo *
703	findVariant(LexemeInfo in, LexemeInfo stored, uint16 curpos, LexemeInfo *newin, int* newn)
704	{
705	for (;;)
706	{
707	int i;
708	LexemeInfo *ptr = newin[`0`];
709
710	for (i = `0`; i < newn; i++)
711	{
712	while (newin[i] && newin[i]->idsubst < ptr->idsubst)
713	newin[i] = newin[i]->nextentry;
714
715	if (newin[i] == NULL)
716	return in;
717
718	if (newin[i]->idsubst > ptr->idsubst)
719	{
720	ptr = newin[i];
721	i = -`1`;
722	continue;
723	}
724
725	while (newin[i]->idsubst == ptr->idsubst)
726	{
727	if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn)
728	{
729	ptr = newin[i];
730	break;
731	}
732
733	newin[i] = newin[i]->nextentry;
734	if (newin[i] == NULL)
735	return in;
736	}
737
738	if (newin[i]->idsubst != ptr->idsubst)
739	{
740	ptr = newin[i];
741	i = -`1`;
742	continue;
743	}
744	}
745
746	if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL \|\| !matchIdSubst(in, ptr->idsubst)))
747	{ / found /
748
749	ptr->nextvariant = in;
750	in = ptr;
751	}
752
753	/ step forward /
754	for (i = `0`; i < newn; i++)
755	newin[i] = newin[i]->nextentry;
756	}
757	}
758
759	static TSLexeme *
760	copyTSLexeme(TheSubstitute *ts)
761	{
762	TSLexeme *res;
763	uint16 i;
764
765	res = (TSLexeme ) palloc(sizeof(TSLexeme) (ts->reslen + `1`));
766	for (i = `0`; i < ts->reslen; i++)
767	{
768	res[i] = ts->res[i];
769	res[i].lexeme = pstrdup(ts->res[i].lexeme);
770	}
771
772	res[ts->reslen].lexeme = NULL;
773
774	return res;
775	}
776
777	static TSLexeme *
778	checkMatch(DictThesaurus d, LexemeInfo info, uint16 curpos, bool *moreres)
779	{
780	*moreres = false;
781	while (info)
782	{
783	Assert(info->idsubst < d->nsubst);
784	if (info->nextvariant)
785	*moreres = true;
786	if (d->subst[info->idsubst].lastlexeme == curpos)
787	return copyTSLexeme(d->subst + info->idsubst);
788	info = info->nextvariant;
789	}
790
791	return NULL;
792	}
793
794	Datum
795	thesaurus_lexize(PG_FUNCTION_ARGS)
796	{
797	DictThesaurus d = (DictThesaurus ) PG_GETARG_POINTER(`0`);
798	DictSubState dstate = (DictSubState ) PG_GETARG_POINTER(`3`);
799	TSLexeme *res = NULL;
800	LexemeInfo *stored,
801	*info = NULL;
802	uint16 curpos = `0`;
803	bool moreres = false;
804
805	if (PG_NARGS() != `4` \|\| dstate == NULL)
806	elog(ERROR, "forbidden call of thesaurus or nested call");
807
808	if (dstate->isend)
809	PG_RETURN_POINTER(NULL);
810	stored = (LexemeInfo *) dstate->private_state;
811
812	if (stored)
813	curpos = stored->posinsubst + `1`;
814
815	if (!d->subdict->isvalid)
816	d->subdict = lookup_ts_dictionary_cache(d->subdictOid);
817
818	res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
819	PointerGetDatum(d->subdict->dictData),
820	PG_GETARG_DATUM(`1`),
821	PG_GETARG_DATUM(`2`),
822	PointerGetDatum(NULL)));
823
824	if (res && res->lexeme)
825	{
826	TSLexeme *ptr = res,
827	*basevar;
828
829	while (ptr->lexeme)
830	{
831	uint16 nv = ptr->nvariant;
832	uint16 i,
833	nlex = `0`;
834	LexemeInfo **infos;
835
836	basevar = ptr;
837	while (ptr->lexeme && nv == ptr->nvariant)
838	{
839	nlex++;
840	ptr++;
841	}
842
843	infos = (LexemeInfo ) palloc(sizeof*(LexemeInfo ) * nlex);
844	for (i = `0`; i < nlex; i++)
845	if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL)
846	break;
847
848	if (i < nlex)
849	{
850	/ no chance to find /
851	pfree(infos);
852	continue;
853	}
854
855	info = findVariant(info, stored, curpos, infos, nlex);
856	}
857	}
858	else if (res)
859	{ / stop-word /
860	LexemeInfo *infos = findTheLexeme(d, NULL);
861
862	info = findVariant(NULL, stored, curpos, &infos, `1`);
863	}
864	else
865	{
866	info = NULL; / word isn't recognized /
867	}
868
869	dstate->private_state = (void *) info;
870
871	if (!info)
872	{
873	dstate->getnext = false;
874	PG_RETURN_POINTER(NULL);
875	}
876
877	if ((res = checkMatch(d, info, curpos, &moreres)) != NULL)
878	{
879	dstate->getnext = moreres;
880	PG_RETURN_POINTER(res);
881	}
882
883	dstate->getnext = true;
884
885	PG_RETURN_POINTER(NULL);
886	}
887

Browse the source code of PostgreSQL/src/backend/tsearch/dict_thesaurus.c