1/*-------------------------------------------------------------------------
2 *
3 * dict_thesaurus.c
4 * Thesaurus dictionary: phrase to phrase substitution
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/tsearch/dict_thesaurus.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "catalog/namespace.h"
17#include "commands/defrem.h"
18#include "tsearch/ts_cache.h"
19#include "tsearch/ts_locale.h"
20#include "tsearch/ts_utils.h"
21#include "utils/builtins.h"
22#include "utils/regproc.h"
23
24
25/*
26 * Temporary we use TSLexeme.flags for inner use...
27 */
28#define DT_USEASIS 0x1000
29
30typedef struct LexemeInfo
31{
32 uint32 idsubst; /* entry's number in DictThesaurus->subst */
33 uint16 posinsubst; /* pos info in entry */
34 uint16 tnvariant; /* total num lexemes in one variant */
35 struct LexemeInfo *nextentry;
36 struct LexemeInfo *nextvariant;
37} LexemeInfo;
38
39typedef struct
40{
41 char *lexeme;
42 LexemeInfo *entries;
43} TheLexeme;
44
45typedef struct
46{
47 uint16 lastlexeme; /* number lexemes to substitute */
48 uint16 reslen;
49 TSLexeme *res; /* prepared substituted result */
50} TheSubstitute;
51
52typedef struct
53{
54 /* subdictionary to normalize lexemes */
55 Oid subdictOid;
56 TSDictionaryCacheEntry *subdict;
57
58 /* Array to search lexeme by exact match */
59 TheLexeme *wrds;
60 int nwrds; /* current number of words */
61 int ntwrds; /* allocated array length */
62
63 /*
64 * Storage of substituted result, n-th element is for n-th expression
65 */
66 TheSubstitute *subst;
67 int nsubst;
68} DictThesaurus;
69
70
71static void
72newLexeme(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 posinsubst)
73{
74 TheLexeme *ptr;
75
76 if (d->nwrds >= d->ntwrds)
77 {
78 if (d->ntwrds == 0)
79 {
80 d->ntwrds = 16;
81 d->wrds = (TheLexeme *) palloc(sizeof(TheLexeme) * d->ntwrds);
82 }
83 else
84 {
85 d->ntwrds *= 2;
86 d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->ntwrds);
87 }
88 }
89
90 ptr = d->wrds + d->nwrds;
91 d->nwrds++;
92
93 ptr->lexeme = palloc(e - b + 1);
94
95 memcpy(ptr->lexeme, b, e - b);
96 ptr->lexeme[e - b] = '\0';
97
98 ptr->entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
99
100 ptr->entries->nextentry = NULL;
101 ptr->entries->idsubst = idsubst;
102 ptr->entries->posinsubst = posinsubst;
103}
104
105static void
106addWrd(DictThesaurus *d, char *b, char *e, uint32 idsubst, uint16 nwrd, uint16 posinsubst, bool useasis)
107{
108 static int nres = 0;
109 static int ntres = 0;
110 TheSubstitute *ptr;
111
112 if (nwrd == 0)
113 {
114 nres = ntres = 0;
115
116 if (idsubst >= d->nsubst)
117 {
118 if (d->nsubst == 0)
119 {
120 d->nsubst = 16;
121 d->subst = (TheSubstitute *) palloc(sizeof(TheSubstitute) * d->nsubst);
122 }
123 else
124 {
125 d->nsubst *= 2;
126 d->subst = (TheSubstitute *) repalloc(d->subst, sizeof(TheSubstitute) * d->nsubst);
127 }
128 }
129 }
130
131 ptr = d->subst + idsubst;
132
133 ptr->lastlexeme = posinsubst - 1;
134
135 if (nres + 1 >= ntres)
136 {
137 if (ntres == 0)
138 {
139 ntres = 2;
140 ptr->res = (TSLexeme *) palloc(sizeof(TSLexeme) * ntres);
141 }
142 else
143 {
144 ntres *= 2;
145 ptr->res = (TSLexeme *) repalloc(ptr->res, sizeof(TSLexeme) * ntres);
146 }
147 }
148
149 ptr->res[nres].lexeme = palloc(e - b + 1);
150 memcpy(ptr->res[nres].lexeme, b, e - b);
151 ptr->res[nres].lexeme[e - b] = '\0';
152
153 ptr->res[nres].nvariant = nwrd;
154 if (useasis)
155 ptr->res[nres].flags = DT_USEASIS;
156 else
157 ptr->res[nres].flags = 0;
158
159 ptr->res[++nres].lexeme = NULL;
160}
161
162#define TR_WAITLEX 1
163#define TR_INLEX 2
164#define TR_WAITSUBS 3
165#define TR_INSUBS 4
166
167static void
168thesaurusRead(const char *filename, DictThesaurus *d)
169{
170 tsearch_readline_state trst;
171 uint32 idsubst = 0;
172 bool useasis = false;
173 char *line;
174
175 filename = get_tsearch_config_filename(filename, "ths");
176 if (!tsearch_readline_begin(&trst, filename))
177 ereport(ERROR,
178 (errcode(ERRCODE_CONFIG_FILE_ERROR),
179 errmsg("could not open thesaurus file \"%s\": %m",
180 filename)));
181
182 while ((line = tsearch_readline(&trst)) != NULL)
183 {
184 char *ptr;
185 int state = TR_WAITLEX;
186 char *beginwrd = NULL;
187 uint32 posinsubst = 0;
188 uint32 nwrd = 0;
189
190 ptr = line;
191
192 /* is it a comment? */
193 while (*ptr && t_isspace(ptr))
194 ptr += pg_mblen(ptr);
195
196 if (t_iseq(ptr, '#') || *ptr == '\0' ||
197 t_iseq(ptr, '\n') || t_iseq(ptr, '\r'))
198 {
199 pfree(line);
200 continue;
201 }
202
203 while (*ptr)
204 {
205 if (state == TR_WAITLEX)
206 {
207 if (t_iseq(ptr, ':'))
208 {
209 if (posinsubst == 0)
210 ereport(ERROR,
211 (errcode(ERRCODE_CONFIG_FILE_ERROR),
212 errmsg("unexpected delimiter")));
213 state = TR_WAITSUBS;
214 }
215 else if (!t_isspace(ptr))
216 {
217 beginwrd = ptr;
218 state = TR_INLEX;
219 }
220 }
221 else if (state == TR_INLEX)
222 {
223 if (t_iseq(ptr, ':'))
224 {
225 newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
226 state = TR_WAITSUBS;
227 }
228 else if (t_isspace(ptr))
229 {
230 newLexeme(d, beginwrd, ptr, idsubst, posinsubst++);
231 state = TR_WAITLEX;
232 }
233 }
234 else if (state == TR_WAITSUBS)
235 {
236 if (t_iseq(ptr, '*'))
237 {
238 useasis = true;
239 state = TR_INSUBS;
240 beginwrd = ptr + pg_mblen(ptr);
241 }
242 else if (t_iseq(ptr, '\\'))
243 {
244 useasis = false;
245 state = TR_INSUBS;
246 beginwrd = ptr + pg_mblen(ptr);
247 }
248 else if (!t_isspace(ptr))
249 {
250 useasis = false;
251 beginwrd = ptr;
252 state = TR_INSUBS;
253 }
254 }
255 else if (state == TR_INSUBS)
256 {
257 if (t_isspace(ptr))
258 {
259 if (ptr == beginwrd)
260 ereport(ERROR,
261 (errcode(ERRCODE_CONFIG_FILE_ERROR),
262 errmsg("unexpected end of line or lexeme")));
263 addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
264 state = TR_WAITSUBS;
265 }
266 }
267 else
268 elog(ERROR, "unrecognized thesaurus state: %d", state);
269
270 ptr += pg_mblen(ptr);
271 }
272
273 if (state == TR_INSUBS)
274 {
275 if (ptr == beginwrd)
276 ereport(ERROR,
277 (errcode(ERRCODE_CONFIG_FILE_ERROR),
278 errmsg("unexpected end of line or lexeme")));
279 addWrd(d, beginwrd, ptr, idsubst, nwrd++, posinsubst, useasis);
280 }
281
282 idsubst++;
283
284 if (!(nwrd && posinsubst))
285 ereport(ERROR,
286 (errcode(ERRCODE_CONFIG_FILE_ERROR),
287 errmsg("unexpected end of line")));
288
289 /*
290 * Note: currently, tsearch_readline can't return lines exceeding 4KB,
291 * so overflow of the word counts is impossible. But that may not
292 * always be true, so let's check.
293 */
294 if (nwrd != (uint16) nwrd || posinsubst != (uint16) posinsubst)
295 ereport(ERROR,
296 (errcode(ERRCODE_CONFIG_FILE_ERROR),
297 errmsg("too many lexemes in thesaurus entry")));
298
299 pfree(line);
300 }
301
302 d->nsubst = idsubst;
303
304 tsearch_readline_end(&trst);
305}
306
307static TheLexeme *
308addCompiledLexeme(TheLexeme *newwrds, int *nnw, int *tnm, TSLexeme *lexeme, LexemeInfo *src, uint16 tnvariant)
309{
310 if (*nnw >= *tnm)
311 {
312 *tnm *= 2;
313 newwrds = (TheLexeme *) repalloc(newwrds, sizeof(TheLexeme) * *tnm);
314 }
315
316 newwrds[*nnw].entries = (LexemeInfo *) palloc(sizeof(LexemeInfo));
317
318 if (lexeme && lexeme->lexeme)
319 {
320 newwrds[*nnw].lexeme = pstrdup(lexeme->lexeme);
321 newwrds[*nnw].entries->tnvariant = tnvariant;
322 }
323 else
324 {
325 newwrds[*nnw].lexeme = NULL;
326 newwrds[*nnw].entries->tnvariant = 1;
327 }
328
329 newwrds[*nnw].entries->idsubst = src->idsubst;
330 newwrds[*nnw].entries->posinsubst = src->posinsubst;
331
332 newwrds[*nnw].entries->nextentry = NULL;
333
334 (*nnw)++;
335 return newwrds;
336}
337
338static int
339cmpLexemeInfo(LexemeInfo *a, LexemeInfo *b)
340{
341 if (a == NULL || b == NULL)
342 return 0;
343
344 if (a->idsubst == b->idsubst)
345 {
346 if (a->posinsubst == b->posinsubst)
347 {
348 if (a->tnvariant == b->tnvariant)
349 return 0;
350
351 return (a->tnvariant > b->tnvariant) ? 1 : -1;
352 }
353
354 return (a->posinsubst > b->posinsubst) ? 1 : -1;
355 }
356
357 return (a->idsubst > b->idsubst) ? 1 : -1;
358}
359
360static int
361cmpLexeme(const TheLexeme *a, const TheLexeme *b)
362{
363 if (a->lexeme == NULL)
364 {
365 if (b->lexeme == NULL)
366 return 0;
367 else
368 return 1;
369 }
370 else if (b->lexeme == NULL)
371 return -1;
372
373 return strcmp(a->lexeme, b->lexeme);
374}
375
376static int
377cmpLexemeQ(const void *a, const void *b)
378{
379 return cmpLexeme((const TheLexeme *) a, (const TheLexeme *) b);
380}
381
382static int
383cmpTheLexeme(const void *a, const void *b)
384{
385 const TheLexeme *la = (const TheLexeme *) a;
386 const TheLexeme *lb = (const TheLexeme *) b;
387 int res;
388
389 if ((res = cmpLexeme(la, lb)) != 0)
390 return res;
391
392 return -cmpLexemeInfo(la->entries, lb->entries);
393}
394
395static void
396compileTheLexeme(DictThesaurus *d)
397{
398 int i,
399 nnw = 0,
400 tnm = 16;
401 TheLexeme *newwrds = (TheLexeme *) palloc(sizeof(TheLexeme) * tnm),
402 *ptrwrds;
403
404 for (i = 0; i < d->nwrds; i++)
405 {
406 TSLexeme *ptr;
407
408 if (strcmp(d->wrds[i].lexeme, "?") == 0) /* Is stop word marker? */
409 newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, NULL, d->wrds[i].entries, 0);
410 else
411 {
412 ptr = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
413 PointerGetDatum(d->subdict->dictData),
414 PointerGetDatum(d->wrds[i].lexeme),
415 Int32GetDatum(strlen(d->wrds[i].lexeme)),
416 PointerGetDatum(NULL)));
417
418 if (!ptr)
419 ereport(ERROR,
420 (errcode(ERRCODE_CONFIG_FILE_ERROR),
421 errmsg("thesaurus sample word \"%s\" isn't recognized by subdictionary (rule %d)",
422 d->wrds[i].lexeme,
423 d->wrds[i].entries->idsubst + 1)));
424 else if (!(ptr->lexeme))
425 ereport(ERROR,
426 (errcode(ERRCODE_CONFIG_FILE_ERROR),
427 errmsg("thesaurus sample word \"%s\" is a stop word (rule %d)",
428 d->wrds[i].lexeme,
429 d->wrds[i].entries->idsubst + 1),
430 errhint("Use \"?\" to represent a stop word within a sample phrase.")));
431 else
432 {
433 while (ptr->lexeme)
434 {
435 TSLexeme *remptr = ptr + 1;
436 int tnvar = 1;
437 int curvar = ptr->nvariant;
438
439 /* compute n words in one variant */
440 while (remptr->lexeme)
441 {
442 if (remptr->nvariant != (remptr - 1)->nvariant)
443 break;
444 tnvar++;
445 remptr++;
446 }
447
448 remptr = ptr;
449 while (remptr->lexeme && remptr->nvariant == curvar)
450 {
451 newwrds = addCompiledLexeme(newwrds, &nnw, &tnm, remptr, d->wrds[i].entries, tnvar);
452 remptr++;
453 }
454
455 ptr = remptr;
456 }
457 }
458 }
459
460 pfree(d->wrds[i].lexeme);
461 pfree(d->wrds[i].entries);
462 }
463
464 if (d->wrds)
465 pfree(d->wrds);
466 d->wrds = newwrds;
467 d->nwrds = nnw;
468 d->ntwrds = tnm;
469
470 if (d->nwrds > 1)
471 {
472 qsort(d->wrds, d->nwrds, sizeof(TheLexeme), cmpTheLexeme);
473
474 /* uniq */
475 newwrds = d->wrds;
476 ptrwrds = d->wrds + 1;
477 while (ptrwrds - d->wrds < d->nwrds)
478 {
479 if (cmpLexeme(ptrwrds, newwrds) == 0)
480 {
481 if (cmpLexemeInfo(ptrwrds->entries, newwrds->entries))
482 {
483 ptrwrds->entries->nextentry = newwrds->entries;
484 newwrds->entries = ptrwrds->entries;
485 }
486 else
487 pfree(ptrwrds->entries);
488
489 if (ptrwrds->lexeme)
490 pfree(ptrwrds->lexeme);
491 }
492 else
493 {
494 newwrds++;
495 *newwrds = *ptrwrds;
496 }
497
498 ptrwrds++;
499 }
500
501 d->nwrds = newwrds - d->wrds + 1;
502 d->wrds = (TheLexeme *) repalloc(d->wrds, sizeof(TheLexeme) * d->nwrds);
503 }
504}
505
506static void
507compileTheSubstitute(DictThesaurus *d)
508{
509 int i;
510
511 for (i = 0; i < d->nsubst; i++)
512 {
513 TSLexeme *rem = d->subst[i].res,
514 *outptr,
515 *inptr;
516 int n = 2;
517
518 outptr = d->subst[i].res = (TSLexeme *) palloc(sizeof(TSLexeme) * n);
519 outptr->lexeme = NULL;
520 inptr = rem;
521
522 while (inptr && inptr->lexeme)
523 {
524 TSLexeme *lexized,
525 tmplex[2];
526
527 if (inptr->flags & DT_USEASIS)
528 { /* do not lexize */
529 tmplex[0] = *inptr;
530 tmplex[0].flags = 0;
531 tmplex[1].lexeme = NULL;
532 lexized = tmplex;
533 }
534 else
535 {
536 lexized = (TSLexeme *) DatumGetPointer(
537 FunctionCall4(
538 &(d->subdict->lexize),
539 PointerGetDatum(d->subdict->dictData),
540 PointerGetDatum(inptr->lexeme),
541 Int32GetDatum(strlen(inptr->lexeme)),
542 PointerGetDatum(NULL)
543 )
544 );
545 }
546
547 if (lexized && lexized->lexeme)
548 {
549 int toset = (lexized->lexeme && outptr != d->subst[i].res) ? (outptr - d->subst[i].res) : -1;
550
551 while (lexized->lexeme)
552 {
553 if (outptr - d->subst[i].res + 1 >= n)
554 {
555 int diff = outptr - d->subst[i].res;
556
557 n *= 2;
558 d->subst[i].res = (TSLexeme *) repalloc(d->subst[i].res, sizeof(TSLexeme) * n);
559 outptr = d->subst[i].res + diff;
560 }
561
562 *outptr = *lexized;
563 outptr->lexeme = pstrdup(lexized->lexeme);
564
565 outptr++;
566 lexized++;
567 }
568
569 if (toset > 0)
570 d->subst[i].res[toset].flags |= TSL_ADDPOS;
571 }
572 else if (lexized)
573 {
574 ereport(ERROR,
575 (errcode(ERRCODE_CONFIG_FILE_ERROR),
576 errmsg("thesaurus substitute word \"%s\" is a stop word (rule %d)",
577 inptr->lexeme, i + 1)));
578 }
579 else
580 {
581 ereport(ERROR,
582 (errcode(ERRCODE_CONFIG_FILE_ERROR),
583 errmsg("thesaurus substitute word \"%s\" isn't recognized by subdictionary (rule %d)",
584 inptr->lexeme, i + 1)));
585 }
586
587 if (inptr->lexeme)
588 pfree(inptr->lexeme);
589 inptr++;
590 }
591
592 if (outptr == d->subst[i].res)
593 ereport(ERROR,
594 (errcode(ERRCODE_CONFIG_FILE_ERROR),
595 errmsg("thesaurus substitute phrase is empty (rule %d)",
596 i + 1)));
597
598 d->subst[i].reslen = outptr - d->subst[i].res;
599
600 pfree(rem);
601 }
602}
603
604Datum
605thesaurus_init(PG_FUNCTION_ARGS)
606{
607 List *dictoptions = (List *) PG_GETARG_POINTER(0);
608 DictThesaurus *d;
609 char *subdictname = NULL;
610 bool fileloaded = false;
611 ListCell *l;
612
613 d = (DictThesaurus *) palloc0(sizeof(DictThesaurus));
614
615 foreach(l, dictoptions)
616 {
617 DefElem *defel = (DefElem *) lfirst(l);
618
619 if (strcmp(defel->defname, "dictfile") == 0)
620 {
621 if (fileloaded)
622 ereport(ERROR,
623 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
624 errmsg("multiple DictFile parameters")));
625 thesaurusRead(defGetString(defel), d);
626 fileloaded = true;
627 }
628 else if (strcmp(defel->defname, "dictionary") == 0)
629 {
630 if (subdictname)
631 ereport(ERROR,
632 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
633 errmsg("multiple Dictionary parameters")));
634 subdictname = pstrdup(defGetString(defel));
635 }
636 else
637 {
638 ereport(ERROR,
639 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
640 errmsg("unrecognized Thesaurus parameter: \"%s\"",
641 defel->defname)));
642 }
643 }
644
645 if (!fileloaded)
646 ereport(ERROR,
647 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
648 errmsg("missing DictFile parameter")));
649 if (!subdictname)
650 ereport(ERROR,
651 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
652 errmsg("missing Dictionary parameter")));
653
654 d->subdictOid = get_ts_dict_oid(stringToQualifiedNameList(subdictname), false);
655 d->subdict = lookup_ts_dictionary_cache(d->subdictOid);
656
657 compileTheLexeme(d);
658 compileTheSubstitute(d);
659
660 PG_RETURN_POINTER(d);
661}
662
663static LexemeInfo *
664findTheLexeme(DictThesaurus *d, char *lexeme)
665{
666 TheLexeme key,
667 *res;
668
669 if (d->nwrds == 0)
670 return NULL;
671
672 key.lexeme = lexeme;
673 key.entries = NULL;
674
675 res = bsearch(&key, d->wrds, d->nwrds, sizeof(TheLexeme), cmpLexemeQ);
676
677 if (res == NULL)
678 return NULL;
679 return res->entries;
680}
681
682static bool
683matchIdSubst(LexemeInfo *stored, uint32 idsubst)
684{
685 bool res = true;
686
687 if (stored)
688 {
689 res = false;
690
691 for (; stored; stored = stored->nextvariant)
692 if (stored->idsubst == idsubst)
693 {
694 res = true;
695 break;
696 }
697 }
698
699 return res;
700}
701
702static LexemeInfo *
703findVariant(LexemeInfo *in, LexemeInfo *stored, uint16 curpos, LexemeInfo **newin, int newn)
704{
705 for (;;)
706 {
707 int i;
708 LexemeInfo *ptr = newin[0];
709
710 for (i = 0; i < newn; i++)
711 {
712 while (newin[i] && newin[i]->idsubst < ptr->idsubst)
713 newin[i] = newin[i]->nextentry;
714
715 if (newin[i] == NULL)
716 return in;
717
718 if (newin[i]->idsubst > ptr->idsubst)
719 {
720 ptr = newin[i];
721 i = -1;
722 continue;
723 }
724
725 while (newin[i]->idsubst == ptr->idsubst)
726 {
727 if (newin[i]->posinsubst == curpos && newin[i]->tnvariant == newn)
728 {
729 ptr = newin[i];
730 break;
731 }
732
733 newin[i] = newin[i]->nextentry;
734 if (newin[i] == NULL)
735 return in;
736 }
737
738 if (newin[i]->idsubst != ptr->idsubst)
739 {
740 ptr = newin[i];
741 i = -1;
742 continue;
743 }
744 }
745
746 if (i == newn && matchIdSubst(stored, ptr->idsubst) && (in == NULL || !matchIdSubst(in, ptr->idsubst)))
747 { /* found */
748
749 ptr->nextvariant = in;
750 in = ptr;
751 }
752
753 /* step forward */
754 for (i = 0; i < newn; i++)
755 newin[i] = newin[i]->nextentry;
756 }
757}
758
759static TSLexeme *
760copyTSLexeme(TheSubstitute *ts)
761{
762 TSLexeme *res;
763 uint16 i;
764
765 res = (TSLexeme *) palloc(sizeof(TSLexeme) * (ts->reslen + 1));
766 for (i = 0; i < ts->reslen; i++)
767 {
768 res[i] = ts->res[i];
769 res[i].lexeme = pstrdup(ts->res[i].lexeme);
770 }
771
772 res[ts->reslen].lexeme = NULL;
773
774 return res;
775}
776
777static TSLexeme *
778checkMatch(DictThesaurus *d, LexemeInfo *info, uint16 curpos, bool *moreres)
779{
780 *moreres = false;
781 while (info)
782 {
783 Assert(info->idsubst < d->nsubst);
784 if (info->nextvariant)
785 *moreres = true;
786 if (d->subst[info->idsubst].lastlexeme == curpos)
787 return copyTSLexeme(d->subst + info->idsubst);
788 info = info->nextvariant;
789 }
790
791 return NULL;
792}
793
794Datum
795thesaurus_lexize(PG_FUNCTION_ARGS)
796{
797 DictThesaurus *d = (DictThesaurus *) PG_GETARG_POINTER(0);
798 DictSubState *dstate = (DictSubState *) PG_GETARG_POINTER(3);
799 TSLexeme *res = NULL;
800 LexemeInfo *stored,
801 *info = NULL;
802 uint16 curpos = 0;
803 bool moreres = false;
804
805 if (PG_NARGS() != 4 || dstate == NULL)
806 elog(ERROR, "forbidden call of thesaurus or nested call");
807
808 if (dstate->isend)
809 PG_RETURN_POINTER(NULL);
810 stored = (LexemeInfo *) dstate->private_state;
811
812 if (stored)
813 curpos = stored->posinsubst + 1;
814
815 if (!d->subdict->isvalid)
816 d->subdict = lookup_ts_dictionary_cache(d->subdictOid);
817
818 res = (TSLexeme *) DatumGetPointer(FunctionCall4(&(d->subdict->lexize),
819 PointerGetDatum(d->subdict->dictData),
820 PG_GETARG_DATUM(1),
821 PG_GETARG_DATUM(2),
822 PointerGetDatum(NULL)));
823
824 if (res && res->lexeme)
825 {
826 TSLexeme *ptr = res,
827 *basevar;
828
829 while (ptr->lexeme)
830 {
831 uint16 nv = ptr->nvariant;
832 uint16 i,
833 nlex = 0;
834 LexemeInfo **infos;
835
836 basevar = ptr;
837 while (ptr->lexeme && nv == ptr->nvariant)
838 {
839 nlex++;
840 ptr++;
841 }
842
843 infos = (LexemeInfo **) palloc(sizeof(LexemeInfo *) * nlex);
844 for (i = 0; i < nlex; i++)
845 if ((infos[i] = findTheLexeme(d, basevar[i].lexeme)) == NULL)
846 break;
847
848 if (i < nlex)
849 {
850 /* no chance to find */
851 pfree(infos);
852 continue;
853 }
854
855 info = findVariant(info, stored, curpos, infos, nlex);
856 }
857 }
858 else if (res)
859 { /* stop-word */
860 LexemeInfo *infos = findTheLexeme(d, NULL);
861
862 info = findVariant(NULL, stored, curpos, &infos, 1);
863 }
864 else
865 {
866 info = NULL; /* word isn't recognized */
867 }
868
869 dstate->private_state = (void *) info;
870
871 if (!info)
872 {
873 dstate->getnext = false;
874 PG_RETURN_POINTER(NULL);
875 }
876
877 if ((res = checkMatch(d, info, curpos, &moreres)) != NULL)
878 {
879 dstate->getnext = moreres;
880 PG_RETURN_POINTER(res);
881 }
882
883 dstate->getnext = true;
884
885 PG_RETURN_POINTER(NULL);
886}
887