1/*-------------------------------------------------------------------------
2 *
3 * regis.c
4 * Fast regex subset
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 *
8 *
9 * IDENTIFICATION
10 * src/backend/tsearch/regis.c
11 *
12 *-------------------------------------------------------------------------
13 */
14
15#include "postgres.h"
16
17#include "tsearch/dicts/regis.h"
18#include "tsearch/ts_locale.h"
19
20#define RS_IN_ONEOF 1
21#define RS_IN_ONEOF_IN 2
22#define RS_IN_NONEOF 3
23#define RS_IN_WAIT 4
24
25
26/*
27 * Test whether a regex is of the subset supported here.
28 * Keep this in sync with RS_compile!
29 */
30bool
31RS_isRegis(const char *str)
32{
33 int state = RS_IN_WAIT;
34 const char *c = str;
35
36 while (*c)
37 {
38 if (state == RS_IN_WAIT)
39 {
40 if (t_isalpha(c))
41 /* okay */ ;
42 else if (t_iseq(c, '['))
43 state = RS_IN_ONEOF;
44 else
45 return false;
46 }
47 else if (state == RS_IN_ONEOF)
48 {
49 if (t_iseq(c, '^'))
50 state = RS_IN_NONEOF;
51 else if (t_isalpha(c))
52 state = RS_IN_ONEOF_IN;
53 else
54 return false;
55 }
56 else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
57 {
58 if (t_isalpha(c))
59 /* okay */ ;
60 else if (t_iseq(c, ']'))
61 state = RS_IN_WAIT;
62 else
63 return false;
64 }
65 else
66 elog(ERROR, "internal error in RS_isRegis: state %d", state);
67 c += pg_mblen(c);
68 }
69
70 return (state == RS_IN_WAIT);
71}
72
73static RegisNode *
74newRegisNode(RegisNode *prev, int len)
75{
76 RegisNode *ptr;
77
78 ptr = (RegisNode *) palloc0(RNHDRSZ + len + 1);
79 if (prev)
80 prev->next = ptr;
81 return ptr;
82}
83
84void
85RS_compile(Regis *r, bool issuffix, const char *str)
86{
87 int len = strlen(str);
88 int state = RS_IN_WAIT;
89 const char *c = str;
90 RegisNode *ptr = NULL;
91
92 memset(r, 0, sizeof(Regis));
93 r->issuffix = (issuffix) ? 1 : 0;
94
95 while (*c)
96 {
97 if (state == RS_IN_WAIT)
98 {
99 if (t_isalpha(c))
100 {
101 if (ptr)
102 ptr = newRegisNode(ptr, len);
103 else
104 ptr = r->node = newRegisNode(NULL, len);
105 COPYCHAR(ptr->data, c);
106 ptr->type = RSF_ONEOF;
107 ptr->len = pg_mblen(c);
108 }
109 else if (t_iseq(c, '['))
110 {
111 if (ptr)
112 ptr = newRegisNode(ptr, len);
113 else
114 ptr = r->node = newRegisNode(NULL, len);
115 ptr->type = RSF_ONEOF;
116 state = RS_IN_ONEOF;
117 }
118 else /* shouldn't get here */
119 elog(ERROR, "invalid regis pattern: \"%s\"", str);
120 }
121 else if (state == RS_IN_ONEOF)
122 {
123 if (t_iseq(c, '^'))
124 {
125 ptr->type = RSF_NONEOF;
126 state = RS_IN_NONEOF;
127 }
128 else if (t_isalpha(c))
129 {
130 COPYCHAR(ptr->data, c);
131 ptr->len = pg_mblen(c);
132 state = RS_IN_ONEOF_IN;
133 }
134 else /* shouldn't get here */
135 elog(ERROR, "invalid regis pattern: \"%s\"", str);
136 }
137 else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF)
138 {
139 if (t_isalpha(c))
140 {
141 COPYCHAR(ptr->data + ptr->len, c);
142 ptr->len += pg_mblen(c);
143 }
144 else if (t_iseq(c, ']'))
145 state = RS_IN_WAIT;
146 else /* shouldn't get here */
147 elog(ERROR, "invalid regis pattern: \"%s\"", str);
148 }
149 else
150 elog(ERROR, "internal error in RS_compile: state %d", state);
151 c += pg_mblen(c);
152 }
153
154 if (state != RS_IN_WAIT) /* shouldn't get here */
155 elog(ERROR, "invalid regis pattern: \"%s\"", str);
156
157 ptr = r->node;
158 while (ptr)
159 {
160 r->nchar++;
161 ptr = ptr->next;
162 }
163}
164
165void
166RS_free(Regis *r)
167{
168 RegisNode *ptr = r->node,
169 *tmp;
170
171 while (ptr)
172 {
173 tmp = ptr->next;
174 pfree(ptr);
175 ptr = tmp;
176 }
177
178 r->node = NULL;
179}
180
181static bool
182mb_strchr(char *str, char *c)
183{
184 int clen,
185 plen,
186 i;
187 char *ptr = str;
188 bool res = false;
189
190 clen = pg_mblen(c);
191 while (*ptr && !res)
192 {
193 plen = pg_mblen(ptr);
194 if (plen == clen)
195 {
196 i = plen;
197 res = true;
198 while (i--)
199 if (*(ptr + i) != *(c + i))
200 {
201 res = false;
202 break;
203 }
204 }
205
206 ptr += plen;
207 }
208
209 return res;
210}
211
212bool
213RS_execute(Regis *r, char *str)
214{
215 RegisNode *ptr = r->node;
216 char *c = str;
217 int len = 0;
218
219 while (*c)
220 {
221 len++;
222 c += pg_mblen(c);
223 }
224
225 if (len < r->nchar)
226 return 0;
227
228 c = str;
229 if (r->issuffix)
230 {
231 len -= r->nchar;
232 while (len-- > 0)
233 c += pg_mblen(c);
234 }
235
236
237 while (ptr)
238 {
239 switch (ptr->type)
240 {
241 case RSF_ONEOF:
242 if (!mb_strchr((char *) ptr->data, c))
243 return false;
244 break;
245 case RSF_NONEOF:
246 if (mb_strchr((char *) ptr->data, c))
247 return false;
248 break;
249 default:
250 elog(ERROR, "unrecognized regis node type: %d", ptr->type);
251 }
252 ptr = ptr->next;
253 c += pg_mblen(c);
254 }
255
256 return true;
257}
258