tsvector_op.c source code [PostgreSQL/src/backend/utils/adt/tsvector_op.c]

1	/-------------------------------------------------------------------------*
2	*
3	* tsvector_op.c
4	* operations over tsvector
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	*
8	*
9	* IDENTIFICATION
10	* src/backend/utils/adt/tsvector_op.c
11	*
12	*-------------------------------------------------------------------------
13	*/
14	#include "postgres.h"
15
16	#include <limits.h>
17
18	#include "access/htup_details.h"
19	#include "catalog/namespace.h"
20	#include "catalog/pg_type.h"
21	#include "commands/trigger.h"
22	#include "executor/spi.h"
23	#include "funcapi.h"
24	#include "mb/pg_wchar.h"
25	#include "miscadmin.h"
26	#include "parser/parse_coerce.h"
27	#include "tsearch/ts_utils.h"
28	#include "utils/builtins.h"
29	#include "utils/lsyscache.h"
30	#include "utils/regproc.h"
31	#include "utils/rel.h"
32
33
34	typedef struct
35	{
36	WordEntry *arrb;
37	WordEntry *arre;
38	char *values;
39	char *operand;
40	} CHKVAL;
41
42
43	typedef struct StatEntry
44	{
45	uint32 ndoc; / zero indicates that we were already here*
46	* while walking through the tree */
47	uint32 nentry;
48	struct StatEntry *left;
49	struct StatEntry *right;
50	uint32 lenlexeme;
51	char lexeme[FLEXIBLE_ARRAY_MEMBER];
52	} StatEntry;
53
54	#define STATENTRYHDRSZ (offsetof(StatEntry, lexeme))
55
56	typedef struct
57	{
58	int32 weight;
59
60	uint32 maxdepth;
61
62	StatEntry **stack;
63	uint32 stackpos;
64
65	StatEntry *root;
66	} TSVectorStat;
67
68	static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
69	static int tsvector_bsearch(const TSVector tsv, char lexeme, int* lexeme_len);
70
71	/*
72	* Order: haspos, len, word, for all positions (pos, weight)
73	*/
74	static int
75	silly_cmp_tsvector(const TSVector a, const TSVector b)
76	{
77	if (VARSIZE(a) < VARSIZE(b))
78	return -`1`;
79	else if (VARSIZE(a) > VARSIZE(b))
80	return `1`;
81	else if (a->size < b->size)
82	return -`1`;
83	else if (a->size > b->size)
84	return `1`;
85	else
86	{
87	WordEntry *aptr = ARRPTR(a);
88	WordEntry *bptr = ARRPTR(b);
89	int i = `0`;
90	int res;
91
92
93	for (i = `0`; i < a->size; i++)
94	{
95	if (aptr->haspos != bptr->haspos)
96	{
97	return (aptr->haspos > bptr->haspos) ? -`1` : `1`;
98	}
99	else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != `0`)
100	{
101	return res;
102	}
103	else if (aptr->haspos)
104	{
105	WordEntryPos *ap = POSDATAPTR(a, aptr);
106	WordEntryPos *bp = POSDATAPTR(b, bptr);
107	int j;
108
109	if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
110	return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -`1` : `1`;
111
112	for (j = `0`; j < POSDATALEN(a, aptr); j++)
113	{
114	if (WEP_GETPOS(ap) != WEP_GETPOS(bp))
115	{
116	return (WEP_GETPOS(ap) > WEP_GETPOS(bp)) ? -`1` : `1`;
117	}
118	else if (WEP_GETWEIGHT(ap) != WEP_GETWEIGHT(bp))
119	{
120	return (WEP_GETWEIGHT(ap) > WEP_GETWEIGHT(bp)) ? -`1` : `1`;
121	}
122	ap++, bp++;
123	}
124	}
125
126	aptr++;
127	bptr++;
128	}
129	}
130
131	return `0`;
132	}
133
134	#define TSVECTORCMPFUNC( type, action, ret ) \
135	Datum \
136	tsvector_##type(PG_FUNCTION_ARGS) \
137	{ \
138	TSVector a = PG_GETARG_TSVECTOR(0); \
139	TSVector b = PG_GETARG_TSVECTOR(1); \
140	int res = silly_cmp_tsvector(a, b); \
141	PG_FREE_IF_COPY(a,0); \
142	PG_FREE_IF_COPY(b,1); \
143	PG_RETURN_##ret( res action 0 ); \
144	} \
145	/* keep compiler quiet - no extra ; */ \
146	extern int no_such_variable
147
148	TSVECTORCMPFUNC(lt, <, BOOL);
149	TSVECTORCMPFUNC(le, <=, BOOL);
150	TSVECTORCMPFUNC(eq, ==, BOOL);
151	TSVECTORCMPFUNC(ge, >=, BOOL);
152	TSVECTORCMPFUNC(gt, >, BOOL);
153	TSVECTORCMPFUNC(ne, !=, BOOL);
154	TSVECTORCMPFUNC(cmp, +, INT32);
155
156	Datum
157	tsvector_strip(PG_FUNCTION_ARGS)
158	{
159	TSVector in = PG_GETARG_TSVECTOR(`0`);
160	TSVector out;
161	int i,
162	len = `0`;
163	WordEntry *arrin = ARRPTR(in),
164	*arrout;
165	char *cur;
166
167	for (i = `0`; i < in->size; i++)
168	len += arrin[i].len;
169
170	len = CALCDATASIZE(in->size, len);
171	out = (TSVector) palloc0(len);
172	SET_VARSIZE(out, len);
173	out->size = in->size;
174	arrout = ARRPTR(out);
175	cur = STRPTR(out);
176	for (i = `0`; i < in->size; i++)
177	{
178	memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
179	arrout[i].haspos = `0`;
180	arrout[i].len = arrin[i].len;
181	arrout[i].pos = cur - STRPTR(out);
182	cur += arrout[i].len;
183	}
184
185	PG_FREE_IF_COPY(in, `0`);
186	PG_RETURN_POINTER(out);
187	}
188
189	Datum
190	tsvector_length(PG_FUNCTION_ARGS)
191	{
192	TSVector in = PG_GETARG_TSVECTOR(`0`);
193	int32 ret = in->size;
194
195	PG_FREE_IF_COPY(in, `0`);
196	PG_RETURN_INT32(ret);
197	}
198
199	Datum
200	tsvector_setweight(PG_FUNCTION_ARGS)
201	{
202	TSVector in = PG_GETARG_TSVECTOR(`0`);
203	char cw = PG_GETARG_CHAR(`1`);
204	TSVector out;
205	int i,
206	j;
207	WordEntry *entry;
208	WordEntryPos *p;
209	int w = `0`;
210
211	switch (cw)
212	{
213	case `'A'`:
214	case `'a'`:
215	w = `3`;
216	break;
217	case `'B'`:
218	case `'b'`:
219	w = `2`;
220	break;
221	case `'C'`:
222	case `'c'`:
223	w = `1`;
224	break;
225	case `'D'`:
226	case `'d'`:
227	w = `0`;
228	break;
229	default:
230	/ internal error /
231	elog(ERROR, "unrecognized weight: %d", cw);
232	}
233
234	out = (TSVector) palloc(VARSIZE(in));
235	memcpy(out, in, VARSIZE(in));
236	entry = ARRPTR(out);
237	i = out->size;
238	while (i--)
239	{
240	if ((j = POSDATALEN(out, entry)) != `0`)
241	{
242	p = POSDATAPTR(out, entry);
243	while (j--)
244	{
245	WEP_SETWEIGHT(*p, w);
246	p++;
247	}
248	}
249	entry++;
250	}
251
252	PG_FREE_IF_COPY(in, `0`);
253	PG_RETURN_POINTER(out);
254	}
255
256	/*
257	* setweight(tsin tsvector, char_weight "char", lexemes "text"[])
258	*
259	* Assign weight w to elements of tsin that are listed in lexemes.
260	*/
261	Datum
262	tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
263	{
264	TSVector tsin = PG_GETARG_TSVECTOR(`0`);
265	char char_weight = PG_GETARG_CHAR(`1`);
266	ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(`2`);
267
268	TSVector tsout;
269	int i,
270	j,
271	nlexemes,
272	weight;
273	WordEntry *entry;
274	Datum *dlexemes;
275	bool *nulls;
276
277	switch (char_weight)
278	{
279	case `'A'`:
280	case `'a'`:
281	weight = `3`;
282	break;
283	case `'B'`:
284	case `'b'`:
285	weight = `2`;
286	break;
287	case `'C'`:
288	case `'c'`:
289	weight = `1`;
290	break;
291	case `'D'`:
292	case `'d'`:
293	weight = `0`;
294	break;
295	default:
296	/ internal error /
297	elog(ERROR, "unrecognized weight: %c", char_weight);
298	}
299
300	tsout = (TSVector) palloc(VARSIZE(tsin));
301	memcpy(tsout, tsin, VARSIZE(tsin));
302	entry = ARRPTR(tsout);
303
304	deconstruct_array(lexemes, TEXTOID, -`1`, false, `'i'`,
305	&dlexemes, &nulls, &nlexemes);
306
307	/*
308	* Assuming that lexemes array is significantly shorter than tsvector we
309	* can iterate through lexemes performing binary search of each lexeme
310	* from lexemes in tsvector.
311	*/
312	for (i = `0`; i < nlexemes; i++)
313	{
314	char *lex;
315	int lex_len,
316	lex_pos;
317
318	if (nulls[i])
319	ereport(ERROR,
320	(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
321	errmsg("lexeme array may not contain nulls")));
322
323	lex = VARDATA(dlexemes[i]);
324	lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
325	lex_pos = tsvector_bsearch(tsout, lex, lex_len);
326
327	if (lex_pos >= `0` && (j = POSDATALEN(tsout, entry + lex_pos)) != `0`)
328	{
329	WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
330
331	while (j--)
332	{
333	WEP_SETWEIGHT(*p, weight);
334	p++;
335	}
336	}
337	}
338
339	PG_FREE_IF_COPY(tsin, `0`);
340	PG_FREE_IF_COPY(lexemes, `2`);
341
342	PG_RETURN_POINTER(tsout);
343	}
344
345	#define compareEntry(pa, a, pb, b) \
346	tsCompareString((pa) + (a)->pos, (a)->len, \
347	(pb) + (b)->pos, (b)->len, \
348	false)
349
350	/*
351	* Add positions from src to dest after offsetting them by maxpos.
352	* Return the number added (might be less than expected due to overflow)
353	*/
354	static int32
355	add_pos(TSVector src, WordEntry *srcptr,
356	TSVector dest, WordEntry *destptr,
357	int32 maxpos)
358	{
359	uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
360	int i;
361	uint16 slen = POSDATALEN(src, srcptr),
362	startlen;
363	WordEntryPos *spos = POSDATAPTR(src, srcptr),
364	*dpos = POSDATAPTR(dest, destptr);
365
366	if (!destptr->haspos)
367	*clen = `0`;
368
369	startlen = *clen;
370	for (i = `0`;
371	i < slen && *clen < MAXNUMPOS &&
372	(clen == `0` \|\| WEP_GETPOS(dpos[clen - `1`]) != MAXENTRYPOS - `1`);
373	i++)
374	{
375	WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
376	WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
377	(*clen)++;
378	}
379
380	if (*clen != startlen)
381	destptr->haspos = `1`;
382	return *clen - startlen;
383	}
384
385	/*
386	* Perform binary search of given lexeme in TSVector.
387	* Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
388	* found.
389	*/
390	static int
391	tsvector_bsearch(const TSVector tsv, char lexeme, int* lexeme_len)
392	{
393	WordEntry *arrin = ARRPTR(tsv);
394	int StopLow = `0`,
395	StopHigh = tsv->size,
396	StopMiddle,
397	cmp;
398
399	while (StopLow < StopHigh)
400	{
401	StopMiddle = (StopLow + StopHigh) / `2`;
402
403	cmp = tsCompareString(lexeme, lexeme_len,
404	STRPTR(tsv) + arrin[StopMiddle].pos,
405	arrin[StopMiddle].len,
406	false);
407
408	if (cmp < `0`)
409	StopHigh = StopMiddle;
410	else if (cmp > `0`)
411	StopLow = StopMiddle + `1`;
412	else / found it /
413	return StopMiddle;
414	}
415
416	return -`1`;
417	}
418
419	/*
420	* qsort comparator functions
421	*/
422
423	static int
424	compare_int(const void va, const* void *vb)
425	{
426	int a = ((const* int *) va);
427	int b = ((const* int *) vb);
428
429	if (a == b)
430	return `0`;
431	return (a > b) ? `1` : -`1`;
432	}
433
434	static int
435	compare_text_lexemes(const void va, const* void *vb)
436	{
437	Datum a = ((const* Datum *) va);
438	Datum b = ((const* Datum *) vb);
439	char *alex = VARDATA_ANY(a);
440	int alex_len = VARSIZE_ANY_EXHDR(a);
441	char *blex = VARDATA_ANY(b);
442	int blex_len = VARSIZE_ANY_EXHDR(b);
443
444	return tsCompareString(alex, alex_len, blex, blex_len, false);
445	}
446
447	/*
448	* Internal routine to delete lexemes from TSVector by array of offsets.
449	*
450	* int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
451	* int indices_count -- size of that array
452	*
453	* Returns new TSVector without given lexemes along with their positions
454	* and weights.
455	*/
456	static TSVector
457	tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
458	int indices_count)
459	{
460	TSVector tsout;
461	WordEntry *arrin = ARRPTR(tsv),
462	*arrout;
463	char *data = STRPTR(tsv),
464	*dataout;
465	int i, / index in arrin /
466	j, / index in arrout /
467	k, / index in indices_to_delete /
468	curoff; / index in dataout area /
469
470	/*
471	* Sort the filter array to simplify membership checks below. Also, get
472	* rid of any duplicate entries, so that we can assume that indices_count
473	* is exactly equal to the number of lexemes that will be removed.
474	*/
475	if (indices_count > `1`)
476	{
477	int kp;
478
479	qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
480	kp = `0`;
481	for (k = `1`; k < indices_count; k++)
482	{
483	if (indices_to_delete[k] != indices_to_delete[kp])
484	indices_to_delete[++kp] = indices_to_delete[k];
485	}
486	indices_count = ++kp;
487	}
488
489	/*
490	* Here we overestimate tsout size, since we don't know how much space is
491	* used by the deleted lexeme(s). We will set exact size below.
492	*/
493	tsout = (TSVector) palloc0(VARSIZE(tsv));
494
495	/ This count must be correct because STRPTR(tsout) relies on it. /
496	tsout->size = tsv->size - indices_count;
497
498	/*
499	* Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
500	*/
501	arrout = ARRPTR(tsout);
502	dataout = STRPTR(tsout);
503	curoff = `0`;
504	for (i = j = k = `0`; i < tsv->size; i++)
505	{
506	/*
507	* If current i is present in indices_to_delete, skip this lexeme.
508	* Since indices_to_delete is already sorted, we only need to check
509	* the current (k'th) entry.
510	*/
511	if (k < indices_count && i == indices_to_delete[k])
512	{
513	k++;
514	continue;
515	}
516
517	/ Copy lexeme and its positions and weights /
518	memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
519	arrout[j].haspos = arrin[i].haspos;
520	arrout[j].len = arrin[i].len;
521	arrout[j].pos = curoff;
522	curoff += arrin[i].len;
523	if (arrin[i].haspos)
524	{
525	int len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
526	+ sizeof(uint16);
527
528	curoff = SHORTALIGN(curoff);
529	memcpy(dataout + curoff,
530	STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
531	len);
532	curoff += len;
533	}
534
535	j++;
536	}
537
538	/*
539	* k should now be exactly equal to indices_count. If it isn't then the
540	* caller provided us with indices outside of [0, tsv->size) range and
541	* estimation of tsout's size is wrong.
542	*/
543	Assert(k == indices_count);
544
545	SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
546	return tsout;
547	}
548
549	/*
550	* Delete given lexeme from tsvector.
551	* Implementation of user-level ts_delete(tsvector, text).
552	*/
553	Datum
554	tsvector_delete_str(PG_FUNCTION_ARGS)
555	{
556	TSVector tsin = PG_GETARG_TSVECTOR(`0`),
557	tsout;
558	text *tlexeme = PG_GETARG_TEXT_PP(`1`);
559	char *lexeme = VARDATA_ANY(tlexeme);
560	int lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
561	skip_index;
562
563	if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -`1`)
564	PG_RETURN_POINTER(tsin);
565
566	tsout = tsvector_delete_by_indices(tsin, &skip_index, `1`);
567
568	PG_FREE_IF_COPY(tsin, `0`);
569	PG_FREE_IF_COPY(tlexeme, `1`);
570	PG_RETURN_POINTER(tsout);
571	}
572
573	/*
574	* Delete given array of lexemes from tsvector.
575	* Implementation of user-level ts_delete(tsvector, text[]).
576	*/
577	Datum
578	tsvector_delete_arr(PG_FUNCTION_ARGS)
579	{
580	TSVector tsin = PG_GETARG_TSVECTOR(`0`),
581	tsout;
582	ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(`1`);
583	int i,
584	nlex,
585	skip_count,
586	*skip_indices;
587	Datum *dlexemes;
588	bool *nulls;
589
590	deconstruct_array(lexemes, TEXTOID, -`1`, false, `'i'`,
591	&dlexemes, &nulls, &nlex);
592
593	/*
594	* In typical use case array of lexemes to delete is relatively small. So
595	* here we optimize things for that scenario: iterate through lexarr
596	* performing binary search of each lexeme from lexarr in tsvector.
597	*/
598	skip_indices = palloc0(nlex * sizeof(int));
599	for (i = skip_count = `0`; i < nlex; i++)
600	{
601	char *lex;
602	int lex_len,
603	lex_pos;
604
605	if (nulls[i])
606	ereport(ERROR,
607	(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
608	errmsg("lexeme array may not contain nulls")));
609
610	lex = VARDATA(dlexemes[i]);
611	lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
612	lex_pos = tsvector_bsearch(tsin, lex, lex_len);
613
614	if (lex_pos >= `0`)
615	skip_indices[skip_count++] = lex_pos;
616	}
617
618	tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
619
620	pfree(skip_indices);
621	PG_FREE_IF_COPY(tsin, `0`);
622	PG_FREE_IF_COPY(lexemes, `1`);
623
624	PG_RETURN_POINTER(tsout);
625	}
626
627	/*
628	* Expand tsvector as table with following columns:
629	* lexeme: lexeme text
630	* positions: integer array of lexeme positions
631	* weights: char array of weights corresponding to positions
632	*/
633	Datum
634	tsvector_unnest(PG_FUNCTION_ARGS)
635	{
636	FuncCallContext *funcctx;
637	TSVector tsin;
638
639	if (SRF_IS_FIRSTCALL())
640	{
641	MemoryContext oldcontext;
642	TupleDesc tupdesc;
643
644	funcctx = SRF_FIRSTCALL_INIT();
645	oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
646
647	tupdesc = CreateTemplateTupleDesc(`3`);
648	TupleDescInitEntry(tupdesc, (AttrNumber) `1`, "lexeme",
649	TEXTOID, -`1`, `0`);
650	TupleDescInitEntry(tupdesc, (AttrNumber) `2`, "positions",
651	INT2ARRAYOID, -`1`, `0`);
652	TupleDescInitEntry(tupdesc, (AttrNumber) `3`, "weights",
653	TEXTARRAYOID, -`1`, `0`);
654	funcctx->tuple_desc = BlessTupleDesc(tupdesc);
655
656	funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(`0`);
657
658	MemoryContextSwitchTo(oldcontext);
659	}
660
661	funcctx = SRF_PERCALL_SETUP();
662	tsin = (TSVector) funcctx->user_fctx;
663
664	if (funcctx->call_cntr < tsin->size)
665	{
666	WordEntry *arrin = ARRPTR(tsin);
667	char *data = STRPTR(tsin);
668	HeapTuple tuple;
669	int j,
670	i = funcctx->call_cntr;
671	bool nulls[] = {false, false, false};
672	Datum values[`3`];
673
674	values[`0`] = PointerGetDatum(
675	cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len)
676	);
677
678	if (arrin[i].haspos)
679	{
680	WordEntryPosVector *posv;
681	Datum *positions;
682	Datum *weights;
683	char weight;
684
685	/*
686	* Internally tsvector stores position and weight in the same
687	* uint16 (2 bits for weight, 14 for position). Here we extract
688	* that in two separate arrays.
689	*/
690	posv = _POSVECPTR(tsin, arrin + i);
691	positions = palloc(posv->npos * sizeof(Datum));
692	weights = palloc(posv->npos * sizeof(Datum));
693	for (j = `0`; j < posv->npos; j++)
694	{
695	positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
696	weight = `'D'` - WEP_GETWEIGHT(posv->pos[j]);
697	weights[j] = PointerGetDatum(
698	cstring_to_text_with_len(&weight, `1`)
699	);
700	}
701
702	values[`1`] = PointerGetDatum(
703	construct_array(positions, posv->npos, INT2OID, `2`, true, `'s'`));
704	values[`2`] = PointerGetDatum(
705	construct_array(weights, posv->npos, TEXTOID, -`1`, false, `'i'`));
706	}
707	else
708	{
709	nulls[`1`] = nulls[`2`] = true;
710	}
711
712	tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
713	SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
714	}
715	else
716	{
717	pfree(tsin);
718	SRF_RETURN_DONE(funcctx);
719	}
720	}
721
722	/*
723	* Convert tsvector to array of lexemes.
724	*/
725	Datum
726	tsvector_to_array(PG_FUNCTION_ARGS)
727	{
728	TSVector tsin = PG_GETARG_TSVECTOR(`0`);
729	WordEntry *arrin = ARRPTR(tsin);
730	Datum *elements;
731	int i;
732	ArrayType *array;
733
734	elements = palloc(tsin->size * sizeof(Datum));
735
736	for (i = `0`; i < tsin->size; i++)
737	{
738	elements[i] = PointerGetDatum(
739	cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos, arrin[i].len)
740	);
741	}
742
743	array = construct_array(elements, tsin->size, TEXTOID, -`1`, false, `'i'`);
744
745	pfree(elements);
746	PG_FREE_IF_COPY(tsin, `0`);
747	PG_RETURN_POINTER(array);
748	}
749
750	/*
751	* Build tsvector from array of lexemes.
752	*/
753	Datum
754	array_to_tsvector(PG_FUNCTION_ARGS)
755	{
756	ArrayType *v = PG_GETARG_ARRAYTYPE_P(`0`);
757	TSVector tsout;
758	Datum *dlexemes;
759	WordEntry *arrout;
760	bool *nulls;
761	int nitems,
762	i,
763	j,
764	tslen,
765	datalen = `0`;
766	char *cur;
767
768	deconstruct_array(v, TEXTOID, -`1`, false, `'i'`, &dlexemes, &nulls, &nitems);
769
770	/ Reject nulls (maybe we should just ignore them, instead?) /
771	for (i = `0`; i < nitems; i++)
772	{
773	if (nulls[i])
774	ereport(ERROR,
775	(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
776	errmsg("lexeme array may not contain nulls")));
777	}
778
779	/ Sort and de-dup, because this is required for a valid tsvector. /
780	if (nitems > `1`)
781	{
782	qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
783	j = `0`;
784	for (i = `1`; i < nitems; i++)
785	{
786	if (compare_text_lexemes(&dlexemes[j], &dlexemes[i]) < `0`)
787	dlexemes[++j] = dlexemes[i];
788	}
789	nitems = ++j;
790	}
791
792	/ Calculate space needed for surviving lexemes. /
793	for (i = `0`; i < nitems; i++)
794	datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
795	tslen = CALCDATASIZE(nitems, datalen);
796
797	/ Allocate and fill tsvector. /
798	tsout = (TSVector) palloc0(tslen);
799	SET_VARSIZE(tsout, tslen);
800	tsout->size = nitems;
801
802	arrout = ARRPTR(tsout);
803	cur = STRPTR(tsout);
804	for (i = `0`; i < nitems; i++)
805	{
806	char *lex = VARDATA(dlexemes[i]);
807	int lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
808
809	memcpy(cur, lex, lex_len);
810	arrout[i].haspos = `0`;
811	arrout[i].len = lex_len;
812	arrout[i].pos = cur - STRPTR(tsout);
813	cur += lex_len;
814	}
815
816	PG_FREE_IF_COPY(v, `0`);
817	PG_RETURN_POINTER(tsout);
818	}
819
820	/*
821	* ts_filter(): keep only lexemes with given weights in tsvector.
822	*/
823	Datum
824	tsvector_filter(PG_FUNCTION_ARGS)
825	{
826	TSVector tsin = PG_GETARG_TSVECTOR(`0`),
827	tsout;
828	ArrayType *weights = PG_GETARG_ARRAYTYPE_P(`1`);
829	WordEntry *arrin = ARRPTR(tsin),
830	*arrout;
831	char *datain = STRPTR(tsin),
832	*dataout;
833	Datum *dweights;
834	bool *nulls;
835	int nweights;
836	int i,
837	j;
838	int cur_pos = `0`;
839	char mask = `0`;
840
841	deconstruct_array(weights, CHAROID, `1`, true, `'c'`,
842	&dweights, &nulls, &nweights);
843
844	for (i = `0`; i < nweights; i++)
845	{
846	char char_weight;
847
848	if (nulls[i])
849	ereport(ERROR,
850	(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
851	errmsg("weight array may not contain nulls")));
852
853	char_weight = DatumGetChar(dweights[i]);
854	switch (char_weight)
855	{
856	case `'A'`:
857	case `'a'`:
858	mask = mask \| `8`;
859	break;
860	case `'B'`:
861	case `'b'`:
862	mask = mask \| `4`;
863	break;
864	case `'C'`:
865	case `'c'`:
866	mask = mask \| `2`;
867	break;
868	case `'D'`:
869	case `'d'`:
870	mask = mask \| `1`;
871	break;
872	default:
873	ereport(ERROR,
874	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
875	errmsg("unrecognized weight: \"%c\"", char_weight)));
876	}
877	}
878
879	tsout = (TSVector) palloc0(VARSIZE(tsin));
880	tsout->size = tsin->size;
881	arrout = ARRPTR(tsout);
882	dataout = STRPTR(tsout);
883
884	for (i = j = `0`; i < tsin->size; i++)
885	{
886	WordEntryPosVector *posvin,
887	*posvout;
888	int npos = `0`;
889	int k;
890
891	if (!arrin[i].haspos)
892	continue;
893
894	posvin = _POSVECPTR(tsin, arrin + i);
895	posvout = (WordEntryPosVector *)
896	(dataout + SHORTALIGN(cur_pos + arrin[i].len));
897
898	for (k = `0`; k < posvin->npos; k++)
899	{
900	if (mask & (`1` << WEP_GETWEIGHT(posvin->pos[k])))
901	posvout->pos[npos++] = posvin->pos[k];
902	}
903
904	/ if no satisfactory positions found, skip lexeme /
905	if (!npos)
906	continue;
907
908	arrout[j].haspos = true;
909	arrout[j].len = arrin[i].len;
910	arrout[j].pos = cur_pos;
911
912	memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
913	posvout->npos = npos;
914	cur_pos += SHORTALIGN(arrin[i].len);
915	cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
916	sizeof(uint16);
917	j++;
918	}
919
920	tsout->size = j;
921	if (dataout != STRPTR(tsout))
922	memmove(STRPTR(tsout), dataout, cur_pos);
923
924	SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
925
926	PG_FREE_IF_COPY(tsin, `0`);
927	PG_RETURN_POINTER(tsout);
928	}
929
930	Datum
931	tsvector_concat(PG_FUNCTION_ARGS)
932	{
933	TSVector in1 = PG_GETARG_TSVECTOR(`0`);
934	TSVector in2 = PG_GETARG_TSVECTOR(`1`);
935	TSVector out;
936	WordEntry *ptr;
937	WordEntry *ptr1,
938	*ptr2;
939	WordEntryPos *p;
940	int maxpos = `0`,
941	i,
942	j,
943	i1,
944	i2,
945	dataoff,
946	output_bytes,
947	output_size;
948	char *data,
949	*data1,
950	*data2;
951
952	/ Get max position in in1; we'll need this to offset in2's positions /
953	ptr = ARRPTR(in1);
954	i = in1->size;
955	while (i--)
956	{
957	if ((j = POSDATALEN(in1, ptr)) != `0`)
958	{
959	p = POSDATAPTR(in1, ptr);
960	while (j--)
961	{
962	if (WEP_GETPOS(*p) > maxpos)
963	maxpos = WEP_GETPOS(*p);
964	p++;
965	}
966	}
967	ptr++;
968	}
969
970	ptr1 = ARRPTR(in1);
971	ptr2 = ARRPTR(in2);
972	data1 = STRPTR(in1);
973	data2 = STRPTR(in2);
974	i1 = in1->size;
975	i2 = in2->size;
976
977	/*
978	* Conservative estimate of space needed. We might need all the data in
979	* both inputs, and conceivably add a pad byte before position data for
980	* each item where there was none before.
981	*/
982	output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
983
984	out = (TSVector) palloc0(output_bytes);
985	SET_VARSIZE(out, output_bytes);
986
987	/*
988	* We must make out->size valid so that STRPTR(out) is sensible. We'll
989	* collapse out any unused space at the end.
990	*/
991	out->size = in1->size + in2->size;
992
993	ptr = ARRPTR(out);
994	data = STRPTR(out);
995	dataoff = `0`;
996	while (i1 && i2)
997	{
998	int cmp = compareEntry(data1, ptr1, data2, ptr2);
999
1000	if (cmp < `0`)
1001	{ / in1 first /
1002	ptr->haspos = ptr1->haspos;
1003	ptr->len = ptr1->len;
1004	memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1005	ptr->pos = dataoff;
1006	dataoff += ptr1->len;
1007	if (ptr->haspos)
1008	{
1009	dataoff = SHORTALIGN(dataoff);
1010	memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1011	dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1012	}
1013
1014	ptr++;
1015	ptr1++;
1016	i1--;
1017	}
1018	else if (cmp > `0`)
1019	{ / in2 first /
1020	ptr->haspos = ptr2->haspos;
1021	ptr->len = ptr2->len;
1022	memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1023	ptr->pos = dataoff;
1024	dataoff += ptr2->len;
1025	if (ptr->haspos)
1026	{
1027	int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1028
1029	if (addlen == `0`)
1030	ptr->haspos = `0`;
1031	else
1032	{
1033	dataoff = SHORTALIGN(dataoff);
1034	dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1035	}
1036	}
1037
1038	ptr++;
1039	ptr2++;
1040	i2--;
1041	}
1042	else
1043	{
1044	ptr->haspos = ptr1->haspos \| ptr2->haspos;
1045	ptr->len = ptr1->len;
1046	memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1047	ptr->pos = dataoff;
1048	dataoff += ptr1->len;
1049	if (ptr->haspos)
1050	{
1051	if (ptr1->haspos)
1052	{
1053	dataoff = SHORTALIGN(dataoff);
1054	memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1055	dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1056	if (ptr2->haspos)
1057	dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
1058	}
1059	else / must have ptr2->haspos /
1060	{
1061	int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1062
1063	if (addlen == `0`)
1064	ptr->haspos = `0`;
1065	else
1066	{
1067	dataoff = SHORTALIGN(dataoff);
1068	dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1069	}
1070	}
1071	}
1072
1073	ptr++;
1074	ptr1++;
1075	ptr2++;
1076	i1--;
1077	i2--;
1078	}
1079	}
1080
1081	while (i1)
1082	{
1083	ptr->haspos = ptr1->haspos;
1084	ptr->len = ptr1->len;
1085	memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
1086	ptr->pos = dataoff;
1087	dataoff += ptr1->len;
1088	if (ptr->haspos)
1089	{
1090	dataoff = SHORTALIGN(dataoff);
1091	memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
1092	dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
1093	}
1094
1095	ptr++;
1096	ptr1++;
1097	i1--;
1098	}
1099
1100	while (i2)
1101	{
1102	ptr->haspos = ptr2->haspos;
1103	ptr->len = ptr2->len;
1104	memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
1105	ptr->pos = dataoff;
1106	dataoff += ptr2->len;
1107	if (ptr->haspos)
1108	{
1109	int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
1110
1111	if (addlen == `0`)
1112	ptr->haspos = `0`;
1113	else
1114	{
1115	dataoff = SHORTALIGN(dataoff);
1116	dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
1117	}
1118	}
1119
1120	ptr++;
1121	ptr2++;
1122	i2--;
1123	}
1124
1125	/*
1126	* Instead of checking each offset individually, we check for overflow of
1127	* pos fields once at the end.
1128	*/
1129	if (dataoff > MAXSTRPOS)
1130	ereport(ERROR,
1131	(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1132	errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
1133
1134	/*
1135	* Adjust sizes (asserting that we didn't overrun the original estimates)
1136	* and collapse out any unused array entries.
1137	*/
1138	output_size = ptr - ARRPTR(out);
1139	Assert(output_size <= out->size);
1140	out->size = output_size;
1141	if (data != STRPTR(out))
1142	memmove(STRPTR(out), data, dataoff);
1143	output_bytes = CALCDATASIZE(out->size, dataoff);
1144	Assert(output_bytes <= VARSIZE(out));
1145	SET_VARSIZE(out, output_bytes);
1146
1147	PG_FREE_IF_COPY(in1, `0`);
1148	PG_FREE_IF_COPY(in2, `1`);
1149	PG_RETURN_POINTER(out);
1150	}
1151
1152	/*
1153	* Compare two strings by tsvector rules.
1154	*
1155	* if isPrefix = true then it returns zero value iff b has prefix a
1156	*/
1157	int32
1158	tsCompareString(char a, int* lena, char b, int* lenb, bool prefix)
1159	{
1160	int cmp;
1161
1162	if (lena == `0`)
1163	{
1164	if (prefix)
1165	cmp = `0`; / empty string is prefix of anything /
1166	else
1167	cmp = (lenb > `0`) ? -`1` : `0`;
1168	}
1169	else if (lenb == `0`)
1170	{
1171	cmp = (lena > `0`) ? `1` : `0`;
1172	}
1173	else
1174	{
1175	cmp = memcmp(a, b, Min(lena, lenb));
1176
1177	if (prefix)
1178	{
1179	if (cmp == `0` && lena > lenb)
1180	cmp = `1`; / a is longer, so not a prefix of b /
1181	}
1182	else if (cmp == `0` && lena != lenb)
1183	{
1184	cmp = (lena < lenb) ? -`1` : `1`;
1185	}
1186	}
1187
1188	return cmp;
1189	}
1190
1191	/*
1192	* Check weight info or/and fill 'data' with the required positions
1193	*/
1194	static bool
1195	checkclass_str(CHKVAL chkval, WordEntry entry, QueryOperand *val,
1196	ExecPhraseData *data)
1197	{
1198	bool result = false;
1199
1200	if (entry->haspos && (val->weight \|\| data))
1201	{
1202	WordEntryPosVector *posvec;
1203
1204	/*
1205	* We can't use the _POSVECPTR macro here because the pointer to the
1206	* tsvector's lexeme storage is already contained in chkval->values.
1207	*/
1208	posvec = (WordEntryPosVector *)
1209	(chkval->values + SHORTALIGN(entry->pos + entry->len));
1210
1211	if (val->weight && data)
1212	{
1213	WordEntryPos *posvec_iter = posvec->pos;
1214	WordEntryPos *dptr;
1215
1216	/*
1217	* Filter position information by weights
1218	*/
1219	dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
1220	data->allocated = true;
1221
1222	/ Is there a position with a matching weight? /
1223	while (posvec_iter < posvec->pos + posvec->npos)
1224	{
1225	/ If true, append this position to the data->pos /
1226	if (val->weight & (`1` << WEP_GETWEIGHT(*posvec_iter)))
1227	{
1228	dptr = WEP_GETPOS(posvec_iter);
1229	dptr++;
1230	}
1231
1232	posvec_iter++;
1233	}
1234
1235	data->npos = dptr - data->pos;
1236
1237	if (data->npos > `0`)
1238	result = true;
1239	}
1240	else if (val->weight)
1241	{
1242	WordEntryPos *posvec_iter = posvec->pos;
1243
1244	/ Is there a position with a matching weight? /
1245	while (posvec_iter < posvec->pos + posvec->npos)
1246	{
1247	if (val->weight & (`1` << WEP_GETWEIGHT(*posvec_iter)))
1248	{
1249	result = true;
1250	break; / no need to go further /
1251	}
1252
1253	posvec_iter++;
1254	}
1255	}
1256	else / data != NULL /
1257	{
1258	data->npos = posvec->npos;
1259	data->pos = posvec->pos;
1260	data->allocated = false;
1261	result = true;
1262	}
1263	}
1264	else
1265	{
1266	result = true;
1267	}
1268
1269	return result;
1270	}
1271
1272	/*
1273	* Removes duplicate pos entries. We can't use uniquePos() from
1274	* tsvector.c because array might be longer than MAXENTRYPOS
1275	*
1276	* Returns new length.
1277	*/
1278	static int
1279	uniqueLongPos(WordEntryPos pos, int* npos)
1280	{
1281	WordEntryPos *pos_iter,
1282	*result;
1283
1284	if (npos <= `1`)
1285	return npos;
1286
1287	qsort((void ) pos, npos, sizeof*(WordEntryPos), compareWordEntryPos);
1288
1289	result = pos;
1290	pos_iter = pos + `1`;
1291	while (pos_iter < pos + npos)
1292	{
1293	if (WEP_GETPOS(pos_iter) != WEP_GETPOS(result))
1294	{
1295	result++;
1296	result = WEP_GETPOS(pos_iter);
1297	}
1298
1299	pos_iter++;
1300	}
1301
1302	return result + `1` - pos;
1303	}
1304
1305	/*
1306	* is there value 'val' in array or not ?
1307	*/
1308	static bool
1309	checkcondition_str(void checkval, QueryOperand val, ExecPhraseData *data)
1310	{
1311	CHKVAL chkval = (CHKVAL ) checkval;
1312	WordEntry *StopLow = chkval->arrb;
1313	WordEntry *StopHigh = chkval->arre;
1314	WordEntry *StopMiddle = StopHigh;
1315	int difference = -`1`;
1316	bool res = false;
1317
1318	/ Loop invariant: StopLow <= val < StopHigh /
1319	while (StopLow < StopHigh)
1320	{
1321	StopMiddle = StopLow + (StopHigh - StopLow) / `2`;
1322	difference = tsCompareString(chkval->operand + val->distance,
1323	val->length,
1324	chkval->values + StopMiddle->pos,
1325	StopMiddle->len,
1326	false);
1327
1328	if (difference == `0`)
1329	{
1330	/ Check weight info & fill 'data' with positions /
1331	res = checkclass_str(chkval, StopMiddle, val, data);
1332	break;
1333	}
1334	else if (difference > `0`)
1335	StopLow = StopMiddle + `1`;
1336	else
1337	StopHigh = StopMiddle;
1338	}
1339
1340	if ((!res \|\| data) && val->prefix)
1341	{
1342	WordEntryPos *allpos = NULL;
1343	int npos = `0`,
1344	totalpos = `0`;
1345
1346	/*
1347	* there was a failed exact search, so we should scan further to find
1348	* a prefix match. We also need to do so if caller needs position info
1349	*/
1350	if (StopLow >= StopHigh)
1351	StopMiddle = StopHigh;
1352
1353	while ((!res \|\| data) && StopMiddle < chkval->arre &&
1354	tsCompareString(chkval->operand + val->distance,
1355	val->length,
1356	chkval->values + StopMiddle->pos,
1357	StopMiddle->len,
1358	true) == `0`)
1359	{
1360	if (data)
1361	{
1362	/*
1363	* We need to join position information
1364	*/
1365	res = checkclass_str(chkval, StopMiddle, val, data);
1366
1367	if (res)
1368	{
1369	while (npos + data->npos >= totalpos)
1370	{
1371	if (totalpos == `0`)
1372	{
1373	totalpos = `256`;
1374	allpos = palloc(sizeof(WordEntryPos) * totalpos);
1375	}
1376	else
1377	{
1378	totalpos *= `2`;
1379	allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
1380	}
1381	}
1382
1383	memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
1384	npos += data->npos;
1385	}
1386	}
1387	else
1388	{
1389	res = checkclass_str(chkval, StopMiddle, val, NULL);
1390	}
1391
1392	StopMiddle++;
1393	}
1394
1395	if (res && data)
1396	{
1397	/ Sort and make unique array of found positions /
1398	data->pos = allpos;
1399	data->npos = uniqueLongPos(allpos, npos);
1400	data->allocated = true;
1401	}
1402	}
1403
1404	return res;
1405	}
1406
1407	/*
1408	* Compute output position list for a tsquery operator in phrase mode.
1409	*
1410	* Merge the position lists in Ldata and Rdata as specified by "emit",
1411	* returning the result list into *data. The input position lists must be
1412	* sorted and unique, and the output will be as well.
1413	*
1414	* data: pointer to initially-all-zeroes output struct, or NULL
1415	* Ldata, Rdata: input position lists
1416	* emit: bitmask of TSPO_XXX flags
1417	* Loffset: offset to be added to Ldata positions before comparing/outputting
1418	* Roffset: offset to be added to Rdata positions before comparing/outputting
1419	* max_npos: maximum possible required size of output position array
1420	*
1421	* Loffset and Roffset should not be negative, else we risk trying to output
1422	* negative positions, which won't fit into WordEntryPos.
1423	*
1424	* Returns true if any positions were emitted to *data; or if data is NULL,
1425	* returns true if any positions would have been emitted.
1426	*/
1427	#define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */
1428	#define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */
1429	#define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */
1430
1431	static bool
1432	TS_phrase_output(ExecPhraseData *data,
1433	ExecPhraseData *Ldata,
1434	ExecPhraseData *Rdata,
1435	int emit,
1436	int Loffset,
1437	int Roffset,
1438	int max_npos)
1439	{
1440	int Lindex,
1441	Rindex;
1442
1443	/ Loop until both inputs are exhausted /
1444	Lindex = Rindex = `0`;
1445	while (Lindex < Ldata->npos \|\| Rindex < Rdata->npos)
1446	{
1447	int Lpos,
1448	Rpos;
1449	int output_pos = `0`;
1450
1451	/*
1452	* Fetch current values to compare. WEP_GETPOS() is needed because
1453	* ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
1454	*/
1455	if (Lindex < Ldata->npos)
1456	Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
1457	else
1458	{
1459	/ L array exhausted, so we're done if R_ONLY isn't set /
1460	if (!(emit & TSPO_R_ONLY))
1461	break;
1462	Lpos = INT_MAX;
1463	}
1464	if (Rindex < Rdata->npos)
1465	Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
1466	else
1467	{
1468	/ R array exhausted, so we're done if L_ONLY isn't set /
1469	if (!(emit & TSPO_L_ONLY))
1470	break;
1471	Rpos = INT_MAX;
1472	}
1473
1474	/ Merge-join the two input lists /
1475	if (Lpos < Rpos)
1476	{
1477	/ Lpos is not matched in Rdata, should we output it? /
1478	if (emit & TSPO_L_ONLY)
1479	output_pos = Lpos;
1480	Lindex++;
1481	}
1482	else if (Lpos == Rpos)
1483	{
1484	/ Lpos and Rpos match ... should we output it? /
1485	if (emit & TSPO_BOTH)
1486	output_pos = Rpos;
1487	Lindex++;
1488	Rindex++;
1489	}
1490	else / Lpos > Rpos /
1491	{
1492	/ Rpos is not matched in Ldata, should we output it? /
1493	if (emit & TSPO_R_ONLY)
1494	output_pos = Rpos;
1495	Rindex++;
1496	}
1497
1498	if (output_pos > `0`)
1499	{
1500	if (data)
1501	{
1502	/ Store position, first allocating output array if needed /
1503	if (data->pos == NULL)
1504	{
1505	data->pos = (WordEntryPos *)
1506	palloc(max_npos * sizeof(WordEntryPos));
1507	data->allocated = true;
1508	}
1509	data->pos[data->npos++] = output_pos;
1510	}
1511	else
1512	{
1513	/*
1514	* Exact positions not needed, so return true as soon as we
1515	* know there is at least one.
1516	*/
1517	return true;
1518	}
1519	}
1520	}
1521
1522	if (data && data->npos > `0`)
1523	{
1524	/ Let's assert we didn't overrun the array /
1525	Assert(data->npos <= max_npos);
1526	return true;
1527	}
1528	return false;
1529	}
1530
1531	/*
1532	* Execute tsquery at or below an OP_PHRASE operator.
1533	*
1534	* This handles tsquery execution at recursion levels where we need to care
1535	* about match locations.
1536	*
1537	* In addition to the same arguments used for TS_execute, the caller may pass
1538	* a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
1539	* match position info on success. data == NULL if no position data need be
1540	* returned. (In practice, outside callers pass NULL, and only the internal
1541	* recursion cases pass a data pointer.)
1542	* Note: the function assumes data != NULL for operators other than OP_PHRASE.
1543	* This is OK because an outside call always starts from an OP_PHRASE node.
1544	*
1545	* The detailed semantics of the match data, given that the function returned
1546	* "true" (successful match, or possible match), are:
1547	*
1548	* npos > 0, negate = false:
1549	* query is matched at specified position(s) (and only those positions)
1550	* npos > 0, negate = true:
1551	* query is matched at all positions except specified position(s)
1552	* npos = 0, negate = false:
1553	* query is possibly matched, matching position(s) are unknown
1554	* (this should only be returned when TS_EXEC_PHRASE_NO_POS flag is set)
1555	* npos = 0, negate = true:
1556	* query is matched at all positions
1557	*
1558	* Successful matches also return a "width" value which is the match width in
1559	* lexemes, less one. Hence, "width" is zero for simple one-lexeme matches,
1560	* and is the sum of the phrase operator distances for phrase matches. Note
1561	* that when width > 0, the listed positions represent the ends of matches not
1562	* the starts. (This unintuitive rule is needed to avoid possibly generating
1563	* negative positions, which wouldn't fit into the WordEntryPos arrays.)
1564	*
1565	* When the function returns "false" (no match), it must return npos = 0,
1566	* negate = false (which is the state initialized by the caller); but the
1567	* "width" output in such cases is undefined.
1568	*/
1569	static bool
1570	TS_phrase_execute(QueryItem curitem, void* *arg, uint32 flags,
1571	TSExecuteCallback chkcond,
1572	ExecPhraseData *data)
1573	{
1574	ExecPhraseData Ldata,
1575	Rdata;
1576	bool lmatch,
1577	rmatch;
1578	int Loffset,
1579	Roffset,
1580	maxwidth;
1581
1582	/ since this function recurses, it could be driven to stack overflow /
1583	check_stack_depth();
1584
1585	if (curitem->type == QI_VAL)
1586	return chkcond(arg, (QueryOperand *) curitem, data);
1587
1588	switch (curitem->qoperator.oper)
1589	{
1590	case OP_NOT:
1591
1592	/*
1593	* Because a "true" result with no specific positions is taken as
1594	* uncertain, we need no special care here for !TS_EXEC_CALC_NOT.
1595	* If it's a false positive, the right things happen anyway.
1596	*
1597	* Also, we need not touch data->width, since a NOT operation does
1598	* not change the match width.
1599	*/
1600	if (TS_phrase_execute(curitem + `1`, arg, flags, chkcond, data))
1601	{
1602	if (data->npos > `0`)
1603	{
1604	/ we have some positions, invert negate flag /
1605	data->negate = !data->negate;
1606	return true;
1607	}
1608	else if (data->negate)
1609	{
1610	/ change "match everywhere" to "match nowhere" /
1611	data->negate = false;
1612	return false;
1613	}
1614	/ match positions are, and remain, uncertain /
1615	return true;
1616	}
1617	else
1618	{
1619	/ change "match nowhere" to "match everywhere" /
1620	Assert(data->npos == `0` && !data->negate);
1621	data->negate = true;
1622	return true;
1623	}
1624
1625	case OP_PHRASE:
1626	case OP_AND:
1627	memset(&Ldata, `0`, sizeof(Ldata));
1628	memset(&Rdata, `0`, sizeof(Rdata));
1629
1630	if (!TS_phrase_execute(curitem + curitem->qoperator.left,
1631	arg, flags, chkcond, &Ldata))
1632	return false;
1633
1634	if (!TS_phrase_execute(curitem + `1`,
1635	arg, flags, chkcond, &Rdata))
1636	return false;
1637
1638	/*
1639	* If either operand has no position information, then we can't
1640	* return position data, only a "possible match" result. "Possible
1641	* match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag
1642	* is set, otherwise return false.
1643	*/
1644	if ((Ldata.npos == `0` && !Ldata.negate) \|\|
1645	(Rdata.npos == `0` && !Rdata.negate))
1646	return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false;
1647
1648	if (curitem->qoperator.oper == OP_PHRASE)
1649	{
1650	/*
1651	* Compute Loffset and Roffset suitable for phrase match, and
1652	* compute overall width of whole phrase match.
1653	*/
1654	Loffset = curitem->qoperator.distance + Rdata.width;
1655	Roffset = `0`;
1656	if (data)
1657	data->width = curitem->qoperator.distance +
1658	Ldata.width + Rdata.width;
1659	}
1660	else
1661	{
1662	/*
1663	* For OP_AND, set output width and alignment like OP_OR (see
1664	* comment below)
1665	*/
1666	maxwidth = Max(Ldata.width, Rdata.width);
1667	Loffset = maxwidth - Ldata.width;
1668	Roffset = maxwidth - Rdata.width;
1669	if (data)
1670	data->width = maxwidth;
1671	}
1672
1673	if (Ldata.negate && Rdata.negate)
1674	{
1675	/ !L & !R: treat as !(L \| R) /
1676	(void) TS_phrase_output(data, &Ldata, &Rdata,
1677	TSPO_BOTH \| TSPO_L_ONLY \| TSPO_R_ONLY,
1678	Loffset, Roffset,
1679	Ldata.npos + Rdata.npos);
1680	if (data)
1681	data->negate = true;
1682	return true;
1683	}
1684	else if (Ldata.negate)
1685	{
1686	/ !L & R /
1687	return TS_phrase_output(data, &Ldata, &Rdata,
1688	TSPO_R_ONLY,
1689	Loffset, Roffset,
1690	Rdata.npos);
1691	}
1692	else if (Rdata.negate)
1693	{
1694	/ L & !R /
1695	return TS_phrase_output(data, &Ldata, &Rdata,
1696	TSPO_L_ONLY,
1697	Loffset, Roffset,
1698	Ldata.npos);
1699	}
1700	else
1701	{
1702	/ straight AND /
1703	return TS_phrase_output(data, &Ldata, &Rdata,
1704	TSPO_BOTH,
1705	Loffset, Roffset,
1706	Min(Ldata.npos, Rdata.npos));
1707	}
1708
1709	case OP_OR:
1710	memset(&Ldata, `0`, sizeof(Ldata));
1711	memset(&Rdata, `0`, sizeof(Rdata));
1712
1713	lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
1714	arg, flags, chkcond, &Ldata);
1715	rmatch = TS_phrase_execute(curitem + `1`,
1716	arg, flags, chkcond, &Rdata);
1717
1718	if (!lmatch && !rmatch)
1719	return false;
1720
1721	/*
1722	* If a valid operand has no position information, then we can't
1723	* return position data, only a "possible match" result. "Possible
1724	* match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag
1725	* is set, otherwise return false.
1726	*/
1727	if ((lmatch && Ldata.npos == `0` && !Ldata.negate) \|\|
1728	(rmatch && Rdata.npos == `0` && !Rdata.negate))
1729	return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false;
1730
1731	/*
1732	* Cope with undefined output width from failed submatch. (This
1733	* takes less code than trying to ensure that all failure returns
1734	* set data->width to zero.)
1735	*/
1736	if (!lmatch)
1737	Ldata.width = `0`;
1738	if (!rmatch)
1739	Rdata.width = `0`;
1740
1741	/*
1742	* For OP_AND and OP_OR, report the width of the wider of the two
1743	* inputs, and align the narrower input's positions to the right
1744	* end of that width. This rule deals at least somewhat
1745	* reasonably with cases like "x <-> (y \| z <-> q)".
1746	*/
1747	maxwidth = Max(Ldata.width, Rdata.width);
1748	Loffset = maxwidth - Ldata.width;
1749	Roffset = maxwidth - Rdata.width;
1750	data->width = maxwidth;
1751
1752	if (Ldata.negate && Rdata.negate)
1753	{
1754	/ !L \| !R: treat as !(L & R) /
1755	(void) TS_phrase_output(data, &Ldata, &Rdata,
1756	TSPO_BOTH,
1757	Loffset, Roffset,
1758	Min(Ldata.npos, Rdata.npos));
1759	data->negate = true;
1760	return true;
1761	}
1762	else if (Ldata.negate)
1763	{
1764	/ !L \| R: treat as !(L & !R) /
1765	(void) TS_phrase_output(data, &Ldata, &Rdata,
1766	TSPO_L_ONLY,
1767	Loffset, Roffset,
1768	Ldata.npos);
1769	data->negate = true;
1770	return true;
1771	}
1772	else if (Rdata.negate)
1773	{
1774	/ L \| !R: treat as !(!L & R) /
1775	(void) TS_phrase_output(data, &Ldata, &Rdata,
1776	TSPO_R_ONLY,
1777	Loffset, Roffset,
1778	Rdata.npos);
1779	data->negate = true;
1780	return true;
1781	}
1782	else
1783	{
1784	/ straight OR /
1785	return TS_phrase_output(data, &Ldata, &Rdata,
1786	TSPO_BOTH \| TSPO_L_ONLY \| TSPO_R_ONLY,
1787	Loffset, Roffset,
1788	Ldata.npos + Rdata.npos);
1789	}
1790
1791	default:
1792	elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1793	}
1794
1795	/ not reachable, but keep compiler quiet /
1796	return false;
1797	}
1798
1799
1800	/*
1801	* Evaluate tsquery boolean expression.
1802	*
1803	* curitem: current tsquery item (initially, the first one)
1804	* arg: opaque value to pass through to callback function
1805	* flags: bitmask of flag bits shown in ts_utils.h
1806	* chkcond: callback function to check whether a primitive value is present
1807	*
1808	* The logic here deals only with operators above any phrase operator, for
1809	* which we do not need to worry about lexeme positions. As soon as we hit an
1810	* OP_PHRASE operator, we pass it off to TS_phrase_execute which does worry.
1811	*/
1812	bool
1813	TS_execute(QueryItem curitem, void* *arg, uint32 flags,
1814	TSExecuteCallback chkcond)
1815	{
1816	/ since this function recurses, it could be driven to stack overflow /
1817	check_stack_depth();
1818
1819	if (curitem->type == QI_VAL)
1820	return chkcond(arg, (QueryOperand *) curitem,
1821	NULL / we don't need position info / );
1822
1823	switch (curitem->qoperator.oper)
1824	{
1825	case OP_NOT:
1826	if (flags & TS_EXEC_CALC_NOT)
1827	return !TS_execute(curitem + `1`, arg, flags, chkcond);
1828	else
1829	return true;
1830
1831	case OP_AND:
1832	if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
1833	return TS_execute(curitem + `1`, arg, flags, chkcond);
1834	else
1835	return false;
1836
1837	case OP_OR:
1838	if (TS_execute(curitem + curitem->qoperator.left, arg, flags, chkcond))
1839	return true;
1840	else
1841	return TS_execute(curitem + `1`, arg, flags, chkcond);
1842
1843	case OP_PHRASE:
1844	return TS_phrase_execute(curitem, arg, flags, chkcond, NULL);
1845
1846	default:
1847	elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1848	}
1849
1850	/ not reachable, but keep compiler quiet /
1851	return false;
1852	}
1853
1854	/*
1855	* Detect whether a tsquery boolean expression requires any positive matches
1856	* to values shown in the tsquery.
1857	*
1858	* This is needed to know whether a GIN index search requires full index scan.
1859	* For example, 'x & !y' requires a match of x, so it's sufficient to scan
1860	* entries for x; but 'x \| !y' could match rows containing neither x nor y.
1861	*/
1862	bool
1863	tsquery_requires_match(QueryItem *curitem)
1864	{
1865	/ since this function recurses, it could be driven to stack overflow /
1866	check_stack_depth();
1867
1868	if (curitem->type == QI_VAL)
1869	return true;
1870
1871	switch (curitem->qoperator.oper)
1872	{
1873	case OP_NOT:
1874
1875	/*
1876	* Assume there are no required matches underneath a NOT. For
1877	* some cases with nested NOTs, we could prove there's a required
1878	* match, but it seems unlikely to be worth the trouble.
1879	*/
1880	return false;
1881
1882	case OP_PHRASE:
1883
1884	/*
1885	* Treat OP_PHRASE as OP_AND here
1886	*/
1887	case OP_AND:
1888	/ If either side requires a match, we're good /
1889	if (tsquery_requires_match(curitem + curitem->qoperator.left))
1890	return true;
1891	else
1892	return tsquery_requires_match(curitem + `1`);
1893
1894	case OP_OR:
1895	/ Both sides must require a match /
1896	if (tsquery_requires_match(curitem + curitem->qoperator.left))
1897	return tsquery_requires_match(curitem + `1`);
1898	else
1899	return false;
1900
1901	default:
1902	elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
1903	}
1904
1905	/ not reachable, but keep compiler quiet /
1906	return false;
1907	}
1908
1909	/*
1910	* boolean operations
1911	*/
1912	Datum
1913	ts_match_qv(PG_FUNCTION_ARGS)
1914	{
1915	PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
1916	PG_GETARG_DATUM(`1`),
1917	PG_GETARG_DATUM(`0`)));
1918	}
1919
1920	Datum
1921	ts_match_vq(PG_FUNCTION_ARGS)
1922	{
1923	TSVector val = PG_GETARG_TSVECTOR(`0`);
1924	TSQuery query = PG_GETARG_TSQUERY(`1`);
1925	CHKVAL chkval;
1926	bool result;
1927
1928	/ empty query matches nothing /
1929	if (!query->size)
1930	{
1931	PG_FREE_IF_COPY(val, `0`);
1932	PG_FREE_IF_COPY(query, `1`);
1933	PG_RETURN_BOOL(false);
1934	}
1935
1936	chkval.arrb = ARRPTR(val);
1937	chkval.arre = chkval.arrb + val->size;
1938	chkval.values = STRPTR(val);
1939	chkval.operand = GETOPERAND(query);
1940	result = TS_execute(GETQUERY(query),
1941	&chkval,
1942	TS_EXEC_CALC_NOT,
1943	checkcondition_str);
1944
1945	PG_FREE_IF_COPY(val, `0`);
1946	PG_FREE_IF_COPY(query, `1`);
1947	PG_RETURN_BOOL(result);
1948	}
1949
1950	Datum
1951	ts_match_tt(PG_FUNCTION_ARGS)
1952	{
1953	TSVector vector;
1954	TSQuery query;
1955	bool res;
1956
1957	vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
1958	PG_GETARG_DATUM(`0`)));
1959	query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
1960	PG_GETARG_DATUM(`1`)));
1961
1962	res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
1963	TSVectorGetDatum(vector),
1964	TSQueryGetDatum(query)));
1965
1966	pfree(vector);
1967	pfree(query);
1968
1969	PG_RETURN_BOOL(res);
1970	}
1971
1972	Datum
1973	ts_match_tq(PG_FUNCTION_ARGS)
1974	{
1975	TSVector vector;
1976	TSQuery query = PG_GETARG_TSQUERY(`1`);
1977	bool res;
1978
1979	vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
1980	PG_GETARG_DATUM(`0`)));
1981
1982	res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
1983	TSVectorGetDatum(vector),
1984	TSQueryGetDatum(query)));
1985
1986	pfree(vector);
1987	PG_FREE_IF_COPY(query, `1`);
1988
1989	PG_RETURN_BOOL(res);
1990	}
1991
1992	/*
1993	* ts_stat statistic function support
1994	*/
1995
1996
1997	/*
1998	* Returns the number of positions in value 'wptr' within tsvector 'txt',
1999	* that have a weight equal to one of the weights in 'weight' bitmask.
2000	*/
2001	static int
2002	check_weight(TSVector txt, WordEntry *wptr, int8 weight)
2003	{
2004	int len = POSDATALEN(txt, wptr);
2005	int num = `0`;
2006	WordEntryPos *ptr = POSDATAPTR(txt, wptr);
2007
2008	while (len--)
2009	{
2010	if (weight & (`1` << WEP_GETWEIGHT(*ptr)))
2011	num++;
2012	ptr++;
2013	}
2014	return num;
2015	}
2016
2017	#define compareStatWord(a,e,t) \
2018	tsCompareString((a)->lexeme, (a)->lenlexeme, \
2019	STRPTR(t) + (e)->pos, (e)->len, \
2020	false)
2021
2022	static void
2023	insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
2024	{
2025	WordEntry *we = ARRPTR(txt) + off;
2026	StatEntry *node = stat->root,
2027	*pnode = NULL;
2028	int n,
2029	res = `0`;
2030	uint32 depth = `1`;
2031
2032	if (stat->weight == `0`)
2033	n = (we->haspos) ? POSDATALEN(txt, we) : `1`;
2034	else
2035	n = (we->haspos) ? check_weight(txt, we, stat->weight) : `0`;
2036
2037	if (n == `0`)
2038	return; / nothing to insert /
2039
2040	while (node)
2041	{
2042	res = compareStatWord(node, we, txt);
2043
2044	if (res == `0`)
2045	{
2046	break;
2047	}
2048	else
2049	{
2050	pnode = node;
2051	node = (res < `0`) ? node->left : node->right;
2052	}
2053	depth++;
2054	}
2055
2056	if (depth > stat->maxdepth)
2057	stat->maxdepth = depth;
2058
2059	if (node == NULL)
2060	{
2061	node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
2062	node->left = node->right = NULL;
2063	node->ndoc = `1`;
2064	node->nentry = n;
2065	node->lenlexeme = we->len;
2066	memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
2067
2068	if (pnode == NULL)
2069	{
2070	stat->root = node;
2071	}
2072	else
2073	{
2074	if (res < `0`)
2075	pnode->left = node;
2076	else
2077	pnode->right = node;
2078	}
2079
2080	}
2081	else
2082	{
2083	node->ndoc++;
2084	node->nentry += n;
2085	}
2086	}
2087
2088	static void
2089	chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
2090	uint32 low, uint32 high, uint32 offset)
2091	{
2092	uint32 pos;
2093	uint32 middle = (low + high) >> `1`;
2094
2095	pos = (low + middle) >> `1`;
2096	if (low != middle && pos >= offset && pos - offset < txt->size)
2097	insertStatEntry(persistentContext, stat, txt, pos - offset);
2098	pos = (high + middle + `1`) >> `1`;
2099	if (middle + `1` != high && pos >= offset && pos - offset < txt->size)
2100	insertStatEntry(persistentContext, stat, txt, pos - offset);
2101
2102	if (low != middle)
2103	chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
2104	if (high != middle + `1`)
2105	chooseNextStatEntry(persistentContext, stat, txt, middle + `1`, high, offset);
2106	}
2107
2108	/*
2109	* This is written like a custom aggregate function, because the
2110	* original plan was to do just that. Unfortunately, an aggregate function
2111	* can't return a set, so that plan was abandoned. If that limitation is
2112	* lifted in the future, ts_stat could be a real aggregate function so that
2113	* you could use it like this:
2114	*
2115	* SELECT ts_stat(vector_column) FROM vector_table;
2116	*
2117	* where vector_column is a tsvector-type column in vector_table.
2118	*/
2119
2120	static TSVectorStat *
2121	ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
2122	{
2123	TSVector txt = DatumGetTSVector(data);
2124	uint32 i,
2125	nbit = `0`,
2126	offset;
2127
2128	if (stat == NULL)
2129	{ / Init in first /
2130	stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2131	stat->maxdepth = `1`;
2132	}
2133
2134	/ simple check of correctness /
2135	if (txt == NULL \|\| txt->size == `0`)
2136	{
2137	if (txt && txt != (TSVector) DatumGetPointer(data))
2138	pfree(txt);
2139	return stat;
2140	}
2141
2142	i = txt->size - `1`;
2143	for (; i > `0`; i >>= `1`)
2144	nbit++;
2145
2146	nbit = `1` << nbit;
2147	offset = (nbit - txt->size) / `2`;
2148
2149	insertStatEntry(persistentContext, stat, txt, (nbit >> `1`) - offset);
2150	chooseNextStatEntry(persistentContext, stat, txt, `0`, nbit, offset);
2151
2152	return stat;
2153	}
2154
2155	static void
2156	ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
2157	TSVectorStat *stat)
2158	{
2159	TupleDesc tupdesc;
2160	MemoryContext oldcontext;
2161	StatEntry *node;
2162
2163	funcctx->user_fctx = (void *) stat;
2164
2165	oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
2166
2167	stat->stack = palloc0(sizeof(StatEntry ) (stat->maxdepth + `1`));
2168	stat->stackpos = `0`;
2169
2170	node = stat->root;
2171	/ find leftmost value /
2172	if (node == NULL)
2173	stat->stack[stat->stackpos] = NULL;
2174	else
2175	for (;;)
2176	{
2177	stat->stack[stat->stackpos] = node;
2178	if (node->left)
2179	{
2180	stat->stackpos++;
2181	node = node->left;
2182	}
2183	else
2184	break;
2185	}
2186	Assert(stat->stackpos <= stat->maxdepth);
2187
2188	tupdesc = CreateTemplateTupleDesc(`3`);
2189	TupleDescInitEntry(tupdesc, (AttrNumber) `1`, "word",
2190	TEXTOID, -`1`, `0`);
2191	TupleDescInitEntry(tupdesc, (AttrNumber) `2`, "ndoc",
2192	INT4OID, -`1`, `0`);
2193	TupleDescInitEntry(tupdesc, (AttrNumber) `3`, "nentry",
2194	INT4OID, -`1`, `0`);
2195	funcctx->tuple_desc = BlessTupleDesc(tupdesc);
2196	funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
2197
2198	MemoryContextSwitchTo(oldcontext);
2199	}
2200
2201	static StatEntry *
2202	walkStatEntryTree(TSVectorStat *stat)
2203	{
2204	StatEntry *node = stat->stack[stat->stackpos];
2205
2206	if (node == NULL)
2207	return NULL;
2208
2209	if (node->ndoc != `0`)
2210	{
2211	/ return entry itself: we already was at left sublink /
2212	return node;
2213	}
2214	else if (node->right && node->right != stat->stack[stat->stackpos + `1`])
2215	{
2216	/ go on right sublink /
2217	stat->stackpos++;
2218	node = node->right;
2219
2220	/ find most-left value /
2221	for (;;)
2222	{
2223	stat->stack[stat->stackpos] = node;
2224	if (node->left)
2225	{
2226	stat->stackpos++;
2227	node = node->left;
2228	}
2229	else
2230	break;
2231	}
2232	Assert(stat->stackpos <= stat->maxdepth);
2233	}
2234	else
2235	{
2236	/ we already return all left subtree, itself and right subtree /
2237	if (stat->stackpos == `0`)
2238	return NULL;
2239
2240	stat->stackpos--;
2241	return walkStatEntryTree(stat);
2242	}
2243
2244	return node;
2245	}
2246
2247	static Datum
2248	ts_process_call(FuncCallContext *funcctx)
2249	{
2250	TSVectorStat *st;
2251	StatEntry *entry;
2252
2253	st = (TSVectorStat *) funcctx->user_fctx;
2254
2255	entry = walkStatEntryTree(st);
2256
2257	if (entry != NULL)
2258	{
2259	Datum result;
2260	char *values[`3`];
2261	char ndoc[`16`];
2262	char nentry[`16`];
2263	HeapTuple tuple;
2264
2265	values[`0`] = palloc(entry->lenlexeme + `1`);
2266	memcpy(values[`0`], entry->lexeme, entry->lenlexeme);
2267	(values[`0`])[entry->lenlexeme] = `'\0'`;
2268	sprintf(ndoc, "%d", entry->ndoc);
2269	values[`1`] = ndoc;
2270	sprintf(nentry, "%d", entry->nentry);
2271	values[`2`] = nentry;
2272
2273	tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
2274	result = HeapTupleGetDatum(tuple);
2275
2276	pfree(values[`0`]);
2277
2278	/ mark entry as already visited /
2279	entry->ndoc = `0`;
2280
2281	return result;
2282	}
2283
2284	return (Datum) `0`;
2285	}
2286
2287	static TSVectorStat *
2288	ts_stat_sql(MemoryContext persistentContext, text txt, text ws)
2289	{
2290	char *query = text_to_cstring(txt);
2291	TSVectorStat *stat;
2292	bool isnull;
2293	Portal portal;
2294	SPIPlanPtr plan;
2295
2296	if ((plan = SPI_prepare(query, `0`, NULL)) == NULL)
2297	/ internal error /
2298	elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2299
2300	if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2301	/ internal error /
2302	elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2303
2304	SPI_cursor_fetch(portal, true, `100`);
2305
2306	if (SPI_tuptable == NULL \|\|
2307	SPI_tuptable->tupdesc->natts != `1` \|\|
2308	!IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, `1`),
2309	TSVECTOROID))
2310	ereport(ERROR,
2311	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2312	errmsg("ts_stat query must return one tsvector column")));
2313
2314	stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
2315	stat->maxdepth = `1`;
2316
2317	if (ws)
2318	{
2319	char *buf;
2320
2321	buf = VARDATA_ANY(ws);
2322	while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
2323	{
2324	if (pg_mblen(buf) == `1`)
2325	{
2326	switch (*buf)
2327	{
2328	case `'A'`:
2329	case `'a'`:
2330	stat->weight \|= `1` << `3`;
2331	break;
2332	case `'B'`:
2333	case `'b'`:
2334	stat->weight \|= `1` << `2`;
2335	break;
2336	case `'C'`:
2337	case `'c'`:
2338	stat->weight \|= `1` << `1`;
2339	break;
2340	case `'D'`:
2341	case `'d'`:
2342	stat->weight \|= `1`;
2343	break;
2344	default:
2345	stat->weight \|= `0`;
2346	}
2347	}
2348	buf += pg_mblen(buf);
2349	}
2350	}
2351
2352	while (SPI_processed > `0`)
2353	{
2354	uint64 i;
2355
2356	for (i = `0`; i < SPI_processed; i++)
2357	{
2358	Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, `1`, &isnull);
2359
2360	if (!isnull)
2361	stat = ts_accum(persistentContext, stat, data);
2362	}
2363
2364	SPI_freetuptable(SPI_tuptable);
2365	SPI_cursor_fetch(portal, true, `100`);
2366	}
2367
2368	SPI_freetuptable(SPI_tuptable);
2369	SPI_cursor_close(portal);
2370	SPI_freeplan(plan);
2371	pfree(query);
2372
2373	return stat;
2374	}
2375
2376	Datum
2377	ts_stat1(PG_FUNCTION_ARGS)
2378	{
2379	FuncCallContext *funcctx;
2380	Datum result;
2381
2382	if (SRF_IS_FIRSTCALL())
2383	{
2384	TSVectorStat *stat;
2385	text *txt = PG_GETARG_TEXT_PP(`0`);
2386
2387	funcctx = SRF_FIRSTCALL_INIT();
2388	SPI_connect();
2389	stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
2390	PG_FREE_IF_COPY(txt, `0`);
2391	ts_setup_firstcall(fcinfo, funcctx, stat);
2392	SPI_finish();
2393	}
2394
2395	funcctx = SRF_PERCALL_SETUP();
2396	if ((result = ts_process_call(funcctx)) != (Datum) `0`)
2397	SRF_RETURN_NEXT(funcctx, result);
2398	SRF_RETURN_DONE(funcctx);
2399	}
2400
2401	Datum
2402	ts_stat2(PG_FUNCTION_ARGS)
2403	{
2404	FuncCallContext *funcctx;
2405	Datum result;
2406
2407	if (SRF_IS_FIRSTCALL())
2408	{
2409	TSVectorStat *stat;
2410	text *txt = PG_GETARG_TEXT_PP(`0`);
2411	text *ws = PG_GETARG_TEXT_PP(`1`);
2412
2413	funcctx = SRF_FIRSTCALL_INIT();
2414	SPI_connect();
2415	stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
2416	PG_FREE_IF_COPY(txt, `0`);
2417	PG_FREE_IF_COPY(ws, `1`);
2418	ts_setup_firstcall(fcinfo, funcctx, stat);
2419	SPI_finish();
2420	}
2421
2422	funcctx = SRF_PERCALL_SETUP();
2423	if ((result = ts_process_call(funcctx)) != (Datum) `0`)
2424	SRF_RETURN_NEXT(funcctx, result);
2425	SRF_RETURN_DONE(funcctx);
2426	}
2427
2428
2429	/*
2430	* Triggers for automatic update of a tsvector column from text column(s)
2431	*
2432	* Trigger arguments are either
2433	* name of tsvector col, name of tsconfig to use, name(s) of text col(s)
2434	* name of tsvector col, name of regconfig col, name(s) of text col(s)
2435	* ie, tsconfig can either be specified by name, or indirectly as the
2436	* contents of a regconfig field in the row. If the name is used, it must
2437	* be explicitly schema-qualified.
2438	*/
2439	Datum
2440	tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
2441	{
2442	return tsvector_update_trigger(fcinfo, false);
2443	}
2444
2445	Datum
2446	tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
2447	{
2448	return tsvector_update_trigger(fcinfo, true);
2449	}
2450
2451	static Datum
2452	tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
2453	{
2454	TriggerData *trigdata;
2455	Trigger *trigger;
2456	Relation rel;
2457	HeapTuple rettuple = NULL;
2458	int tsvector_attr_num,
2459	i;
2460	ParsedText prs;
2461	Datum datum;
2462	bool isnull;
2463	text *txt;
2464	Oid cfgId;
2465
2466	/ Check call context /
2467	if (!CALLED_AS_TRIGGER(fcinfo)) / internal error /
2468	elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
2469
2470	trigdata = (TriggerData *) fcinfo->context;
2471	if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
2472	elog(ERROR, "tsvector_update_trigger: must be fired for row");
2473	if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
2474	elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
2475
2476	if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
2477	rettuple = trigdata->tg_trigtuple;
2478	else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
2479	rettuple = trigdata->tg_newtuple;
2480	else
2481	elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
2482
2483	trigger = trigdata->tg_trigger;
2484	rel = trigdata->tg_relation;
2485
2486	if (trigger->tgnargs < `3`)
2487	elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
2488
2489	/ Find the target tsvector column /
2490	tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[`0`]);
2491	if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
2492	ereport(ERROR,
2493	(errcode(ERRCODE_UNDEFINED_COLUMN),
2494	errmsg("tsvector column \"%s\" does not exist",
2495	trigger->tgargs[`0`])));
2496	/ This will effectively reject system columns, so no separate test: /
2497	if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
2498	TSVECTOROID))
2499	ereport(ERROR,
2500	(errcode(ERRCODE_DATATYPE_MISMATCH),
2501	errmsg("column \"%s\" is not of tsvector type",
2502	trigger->tgargs[`0`])));
2503
2504	/ Find the configuration to use /
2505	if (config_column)
2506	{
2507	int config_attr_num;
2508
2509	config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[`1`]);
2510	if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
2511	ereport(ERROR,
2512	(errcode(ERRCODE_UNDEFINED_COLUMN),
2513	errmsg("configuration column \"%s\" does not exist",
2514	trigger->tgargs[`1`])));
2515	if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
2516	REGCONFIGOID))
2517	ereport(ERROR,
2518	(errcode(ERRCODE_DATATYPE_MISMATCH),
2519	errmsg("column \"%s\" is not of regconfig type",
2520	trigger->tgargs[`1`])));
2521
2522	datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
2523	if (isnull)
2524	ereport(ERROR,
2525	(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
2526	errmsg("configuration column \"%s\" must not be null",
2527	trigger->tgargs[`1`])));
2528	cfgId = DatumGetObjectId(datum);
2529	}
2530	else
2531	{
2532	List *names;
2533
2534	names = stringToQualifiedNameList(trigger->tgargs[`1`]);
2535	/ require a schema so that results are not search path dependent /
2536	if (list_length(names) < `2`)
2537	ereport(ERROR,
2538	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2539	errmsg("text search configuration name \"%s\" must be schema-qualified",
2540	trigger->tgargs[`1`])));
2541	cfgId = get_ts_config_oid(names, false);
2542	}
2543
2544	/ initialize parse state /
2545	prs.lenwords = `32`;
2546	prs.curwords = `0`;
2547	prs.pos = `0`;
2548	prs.words = (ParsedWord ) palloc(sizeof(ParsedWord) prs.lenwords);
2549
2550	/ find all words in indexable column(s) /
2551	for (i = `2`; i < trigger->tgnargs; i++)
2552	{
2553	int numattr;
2554
2555	numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
2556	if (numattr == SPI_ERROR_NOATTRIBUTE)
2557	ereport(ERROR,
2558	(errcode(ERRCODE_UNDEFINED_COLUMN),
2559	errmsg("column \"%s\" does not exist",
2560	trigger->tgargs[i])));
2561	if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
2562	ereport(ERROR,
2563	(errcode(ERRCODE_DATATYPE_MISMATCH),
2564	errmsg("column \"%s\" is not of a character type",
2565	trigger->tgargs[i])));
2566
2567	datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
2568	if (isnull)
2569	continue;
2570
2571	txt = DatumGetTextPP(datum);
2572
2573	parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
2574
2575	if (txt != (text *) DatumGetPointer(datum))
2576	pfree(txt);
2577	}
2578
2579	/ make tsvector value /
2580	datum = TSVectorGetDatum(make_tsvector(&prs));
2581	isnull = false;
2582
2583	/ and insert it into tuple /
2584	rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
2585	`1`, &tsvector_attr_num,
2586	&datum, &isnull);
2587
2588	pfree(DatumGetPointer(datum));
2589
2590	return PointerGetDatum(rettuple);
2591	}
2592

Browse the source code of PostgreSQL/src/backend/utils/adt/tsvector_op.c