dict_synonym.c source code [PostgreSQL/src/backend/tsearch/dict_synonym.c]

1	/-------------------------------------------------------------------------*
2	*
3	* dict_synonym.c
4	* Synonym dictionary: replace word by its synonym
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	*
8	*
9	* IDENTIFICATION
10	* src/backend/tsearch/dict_synonym.c
11	*
12	*-------------------------------------------------------------------------
13	*/
14	#include "postgres.h"
15
16	#include "commands/defrem.h"
17	#include "tsearch/ts_locale.h"
18	#include "tsearch/ts_utils.h"
19	#include "utils/builtins.h"
20
21	typedef struct
22	{
23	char *in;
24	char *out;
25	int outlen;
26	uint16 flags;
27	} Syn;
28
29	typedef struct
30	{
31	int len; / length of syn array /
32	Syn *syn;
33	bool case_sensitive;
34	} DictSyn;
35
36	/*
37	* Finds the next whitespace-delimited word within the 'in' string.
38	* Returns a pointer to the first character of the word, and a pointer
39	* to the next byte after the last character in the word (in *end).
40	* Character '*' at the end of word will not be threated as word
41	* character if flags is not null.
42	*/
43	static char *
44	findwrd(char in, char* *end, uint16 flags)
45	{
46	char *start;
47	char *lastchar;
48
49	/ Skip leading spaces /
50	while (*in && t_isspace(in))
51	in += pg_mblen(in);
52
53	/ Return NULL on empty lines /
54	if (*in == `'\0'`)
55	{
56	*end = NULL;
57	return NULL;
58	}
59
60	lastchar = start = in;
61
62	/ Find end of word /
63	while (*in && !t_isspace(in))
64	{
65	lastchar = in;
66	in += pg_mblen(in);
67	}
68
69	if (in - lastchar == `1` && t_iseq(lastchar, `'*'`) && flags)
70	{
71	*flags = TSL_PREFIX;
72	*end = lastchar;
73	}
74	else
75	{
76	if (flags)
77	*flags = `0`;
78	*end = in;
79	}
80
81	return start;
82	}
83
84	static int
85	compareSyn(const void a, const* void *b)
86	{
87	return strcmp(((const Syn ) a)->in, ((const* Syn *) b)->in);
88	}
89
90
91	Datum
92	dsynonym_init(PG_FUNCTION_ARGS)
93	{
94	List dictoptions = (List ) PG_GETARG_POINTER(`0`);
95	DictSyn *d;
96	ListCell *l;
97	char *filename = NULL;
98	bool case_sensitive = false;
99	tsearch_readline_state trst;
100	char *starti,
101	*starto,
102	*end = NULL;
103	int cur = `0`;
104	char *line = NULL;
105	uint16 flags = `0`;
106
107	foreach(l, dictoptions)
108	{
109	DefElem defel = (DefElem ) lfirst(l);
110
111	if (strcmp(defel->defname, "synonyms") == `0`)
112	filename = defGetString(defel);
113	else if (strcmp(defel->defname, "casesensitive") == `0`)
114	case_sensitive = defGetBoolean(defel);
115	else
116	ereport(ERROR,
117	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
118	errmsg("unrecognized synonym parameter: \"%s\"",
119	defel->defname)));
120	}
121
122	if (!filename)
123	ereport(ERROR,
124	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
125	errmsg("missing Synonyms parameter")));
126
127	filename = get_tsearch_config_filename(filename, "syn");
128
129	if (!tsearch_readline_begin(&trst, filename))
130	ereport(ERROR,
131	(errcode(ERRCODE_CONFIG_FILE_ERROR),
132	errmsg("could not open synonym file \"%s\": %m",
133	filename)));
134
135	d = (DictSyn ) palloc0(sizeof*(DictSyn));
136
137	while ((line = tsearch_readline(&trst)) != NULL)
138	{
139	starti = findwrd(line, &end, NULL);
140	if (!starti)
141	{
142	/ Empty line /
143	goto skipline;
144	}
145	if (*end == `'\0'`)
146	{
147	/ A line with only one word. Ignore silently. /
148	goto skipline;
149	}
150	*end = `'\0'`;
151
152	starto = findwrd(end + `1`, &end, &flags);
153	if (!starto)
154	{
155	/ A line with only one word (+whitespace). Ignore silently. /
156	goto skipline;
157	}
158	*end = `'\0'`;
159
160	/*
161	* starti now points to the first word, and starto to the second word
162	* on the line, with a \0 terminator at the end of both words.
163	*/
164
165	if (cur >= d->len)
166	{
167	if (d->len == `0`)
168	{
169	d->len = `64`;
170	d->syn = (Syn ) palloc(sizeof(Syn) d->len);
171	}
172	else
173	{
174	d->len *= `2`;
175	d->syn = (Syn ) repalloc(d->syn, sizeof(Syn) d->len);
176	}
177	}
178
179	if (case_sensitive)
180	{
181	d->syn[cur].in = pstrdup(starti);
182	d->syn[cur].out = pstrdup(starto);
183	}
184	else
185	{
186	d->syn[cur].in = lowerstr(starti);
187	d->syn[cur].out = lowerstr(starto);
188	}
189
190	d->syn[cur].outlen = strlen(starto);
191	d->syn[cur].flags = flags;
192
193	cur++;
194
195	skipline:
196	pfree(line);
197	}
198
199	tsearch_readline_end(&trst);
200
201	d->len = cur;
202	qsort(d->syn, d->len, sizeof(Syn), compareSyn);
203
204	d->case_sensitive = case_sensitive;
205
206	PG_RETURN_POINTER(d);
207	}
208
209	Datum
210	dsynonym_lexize(PG_FUNCTION_ARGS)
211	{
212	DictSyn d = (DictSyn ) PG_GETARG_POINTER(`0`);
213	char in = (char* *) PG_GETARG_POINTER(`1`);
214	int32 len = PG_GETARG_INT32(`2`);
215	Syn key,
216	*found;
217	TSLexeme *res;
218
219	/ note: d->len test protects against Solaris bsearch-of-no-items bug /
220	if (len <= `0` \|\| d->len <= `0`)
221	PG_RETURN_POINTER(NULL);
222
223	if (d->case_sensitive)
224	key.in = pnstrdup(in, len);
225	else
226	key.in = lowerstr_with_len(in, len);
227
228	key.out = NULL;
229
230	found = (Syn ) bsearch(&key, d->syn, d->len, sizeof*(Syn), compareSyn);
231	pfree(key.in);
232
233	if (!found)
234	PG_RETURN_POINTER(NULL);
235
236	res = palloc0(sizeof(TSLexeme) * `2`);
237	res[`0`].lexeme = pnstrdup(found->out, found->outlen);
238	res[`0`].flags = found->flags;
239
240	PG_RETURN_POINTER(res);
241	}
242

Browse the source code of PostgreSQL/src/backend/tsearch/dict_synonym.c