1/*-------------------------------------------------------------------------
2 *
3 * dict_synonym.c
4 * Synonym dictionary: replace word by its synonym
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/tsearch/dict_synonym.c
11 *
12 *-------------------------------------------------------------------------
13 */
14#include "postgres.h"
15
16#include "commands/defrem.h"
17#include "tsearch/ts_locale.h"
18#include "tsearch/ts_utils.h"
19#include "utils/builtins.h"
20
21typedef struct
22{
23 char *in;
24 char *out;
25 int outlen;
26 uint16 flags;
27} Syn;
28
29typedef struct
30{
31 int len; /* length of syn array */
32 Syn *syn;
33 bool case_sensitive;
34} DictSyn;
35
36/*
37 * Finds the next whitespace-delimited word within the 'in' string.
38 * Returns a pointer to the first character of the word, and a pointer
39 * to the next byte after the last character in the word (in *end).
40 * Character '*' at the end of word will not be threated as word
41 * character if flags is not null.
42 */
43static char *
44findwrd(char *in, char **end, uint16 *flags)
45{
46 char *start;
47 char *lastchar;
48
49 /* Skip leading spaces */
50 while (*in && t_isspace(in))
51 in += pg_mblen(in);
52
53 /* Return NULL on empty lines */
54 if (*in == '\0')
55 {
56 *end = NULL;
57 return NULL;
58 }
59
60 lastchar = start = in;
61
62 /* Find end of word */
63 while (*in && !t_isspace(in))
64 {
65 lastchar = in;
66 in += pg_mblen(in);
67 }
68
69 if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags)
70 {
71 *flags = TSL_PREFIX;
72 *end = lastchar;
73 }
74 else
75 {
76 if (flags)
77 *flags = 0;
78 *end = in;
79 }
80
81 return start;
82}
83
84static int
85compareSyn(const void *a, const void *b)
86{
87 return strcmp(((const Syn *) a)->in, ((const Syn *) b)->in);
88}
89
90
91Datum
92dsynonym_init(PG_FUNCTION_ARGS)
93{
94 List *dictoptions = (List *) PG_GETARG_POINTER(0);
95 DictSyn *d;
96 ListCell *l;
97 char *filename = NULL;
98 bool case_sensitive = false;
99 tsearch_readline_state trst;
100 char *starti,
101 *starto,
102 *end = NULL;
103 int cur = 0;
104 char *line = NULL;
105 uint16 flags = 0;
106
107 foreach(l, dictoptions)
108 {
109 DefElem *defel = (DefElem *) lfirst(l);
110
111 if (strcmp(defel->defname, "synonyms") == 0)
112 filename = defGetString(defel);
113 else if (strcmp(defel->defname, "casesensitive") == 0)
114 case_sensitive = defGetBoolean(defel);
115 else
116 ereport(ERROR,
117 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
118 errmsg("unrecognized synonym parameter: \"%s\"",
119 defel->defname)));
120 }
121
122 if (!filename)
123 ereport(ERROR,
124 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
125 errmsg("missing Synonyms parameter")));
126
127 filename = get_tsearch_config_filename(filename, "syn");
128
129 if (!tsearch_readline_begin(&trst, filename))
130 ereport(ERROR,
131 (errcode(ERRCODE_CONFIG_FILE_ERROR),
132 errmsg("could not open synonym file \"%s\": %m",
133 filename)));
134
135 d = (DictSyn *) palloc0(sizeof(DictSyn));
136
137 while ((line = tsearch_readline(&trst)) != NULL)
138 {
139 starti = findwrd(line, &end, NULL);
140 if (!starti)
141 {
142 /* Empty line */
143 goto skipline;
144 }
145 if (*end == '\0')
146 {
147 /* A line with only one word. Ignore silently. */
148 goto skipline;
149 }
150 *end = '\0';
151
152 starto = findwrd(end + 1, &end, &flags);
153 if (!starto)
154 {
155 /* A line with only one word (+whitespace). Ignore silently. */
156 goto skipline;
157 }
158 *end = '\0';
159
160 /*
161 * starti now points to the first word, and starto to the second word
162 * on the line, with a \0 terminator at the end of both words.
163 */
164
165 if (cur >= d->len)
166 {
167 if (d->len == 0)
168 {
169 d->len = 64;
170 d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
171 }
172 else
173 {
174 d->len *= 2;
175 d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
176 }
177 }
178
179 if (case_sensitive)
180 {
181 d->syn[cur].in = pstrdup(starti);
182 d->syn[cur].out = pstrdup(starto);
183 }
184 else
185 {
186 d->syn[cur].in = lowerstr(starti);
187 d->syn[cur].out = lowerstr(starto);
188 }
189
190 d->syn[cur].outlen = strlen(starto);
191 d->syn[cur].flags = flags;
192
193 cur++;
194
195skipline:
196 pfree(line);
197 }
198
199 tsearch_readline_end(&trst);
200
201 d->len = cur;
202 qsort(d->syn, d->len, sizeof(Syn), compareSyn);
203
204 d->case_sensitive = case_sensitive;
205
206 PG_RETURN_POINTER(d);
207}
208
209Datum
210dsynonym_lexize(PG_FUNCTION_ARGS)
211{
212 DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
213 char *in = (char *) PG_GETARG_POINTER(1);
214 int32 len = PG_GETARG_INT32(2);
215 Syn key,
216 *found;
217 TSLexeme *res;
218
219 /* note: d->len test protects against Solaris bsearch-of-no-items bug */
220 if (len <= 0 || d->len <= 0)
221 PG_RETURN_POINTER(NULL);
222
223 if (d->case_sensitive)
224 key.in = pnstrdup(in, len);
225 else
226 key.in = lowerstr_with_len(in, len);
227
228 key.out = NULL;
229
230 found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
231 pfree(key.in);
232
233 if (!found)
234 PG_RETURN_POINTER(NULL);
235
236 res = palloc0(sizeof(TSLexeme) * 2);
237 res[0].lexeme = pnstrdup(found->out, found->outlen);
238 res[0].flags = found->flags;
239
240 PG_RETURN_POINTER(res);
241}
242