1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * like.c |
4 | * like expression handling code. |
5 | * |
6 | * NOTES |
7 | * A big hack of the regexp.c code!! Contributed by |
8 | * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95). |
9 | * |
10 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
11 | * Portions Copyright (c) 1994, Regents of the University of California |
12 | * |
13 | * IDENTIFICATION |
14 | * src/backend/utils/adt/like.c |
15 | * |
16 | *------------------------------------------------------------------------- |
17 | */ |
18 | #include "postgres.h" |
19 | |
20 | #include <ctype.h> |
21 | |
22 | #include "catalog/pg_collation.h" |
23 | #include "mb/pg_wchar.h" |
24 | #include "miscadmin.h" |
25 | #include "utils/builtins.h" |
26 | #include "utils/pg_locale.h" |
27 | |
28 | |
29 | #define LIKE_TRUE 1 |
30 | #define LIKE_FALSE 0 |
31 | #define LIKE_ABORT (-1) |
32 | |
33 | |
34 | static int SB_MatchText(const char *t, int tlen, const char *p, int plen, |
35 | pg_locale_t locale, bool locale_is_c); |
36 | static text *SB_do_like_escape(text *, text *); |
37 | |
38 | static int MB_MatchText(const char *t, int tlen, const char *p, int plen, |
39 | pg_locale_t locale, bool locale_is_c); |
40 | static text *MB_do_like_escape(text *, text *); |
41 | |
42 | static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen, |
43 | pg_locale_t locale, bool locale_is_c); |
44 | |
45 | static int SB_IMatchText(const char *t, int tlen, const char *p, int plen, |
46 | pg_locale_t locale, bool locale_is_c); |
47 | |
48 | static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation); |
49 | static int Generic_Text_IC_like(text *str, text *pat, Oid collation); |
50 | |
51 | /*-------------------- |
52 | * Support routine for MatchText. Compares given multibyte streams |
53 | * as wide characters. If they match, returns 1 otherwise returns 0. |
54 | *-------------------- |
55 | */ |
56 | static inline int |
57 | wchareq(const char *p1, const char *p2) |
58 | { |
59 | int p1_len; |
60 | |
61 | /* Optimization: quickly compare the first byte. */ |
62 | if (*p1 != *p2) |
63 | return 0; |
64 | |
65 | p1_len = pg_mblen(p1); |
66 | if (pg_mblen(p2) != p1_len) |
67 | return 0; |
68 | |
69 | /* They are the same length */ |
70 | while (p1_len--) |
71 | { |
72 | if (*p1++ != *p2++) |
73 | return 0; |
74 | } |
75 | return 1; |
76 | } |
77 | |
78 | /* |
79 | * Formerly we had a routine iwchareq() here that tried to do case-insensitive |
80 | * comparison of multibyte characters. It did not work at all, however, |
81 | * because it relied on tolower() which has a single-byte API ... and |
82 | * towlower() wouldn't be much better since we have no suitably cheap way |
83 | * of getting a single character transformed to the system's wchar_t format. |
84 | * So now, we just downcase the strings using lower() and apply regular LIKE |
85 | * comparison. This should be revisited when we install better locale support. |
86 | */ |
87 | |
88 | /* |
89 | * We do handle case-insensitive matching for single-byte encodings using |
90 | * fold-on-the-fly processing, however. |
91 | */ |
92 | static char |
93 | SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) |
94 | { |
95 | if (locale_is_c) |
96 | return pg_ascii_tolower(c); |
97 | #ifdef HAVE_LOCALE_T |
98 | else if (locale) |
99 | return tolower_l(c, locale->info.lt); |
100 | #endif |
101 | else |
102 | return pg_tolower(c); |
103 | } |
104 | |
105 | |
106 | #define NextByte(p, plen) ((p)++, (plen)--) |
107 | |
108 | /* Set up to compile like_match.c for multibyte characters */ |
109 | #define CHAREQ(p1, p2) wchareq((p1), (p2)) |
110 | #define NextChar(p, plen) \ |
111 | do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0) |
112 | #define CopyAdvChar(dst, src, srclen) \ |
113 | do { int __l = pg_mblen(src); \ |
114 | (srclen) -= __l; \ |
115 | while (__l-- > 0) \ |
116 | *(dst)++ = *(src)++; \ |
117 | } while (0) |
118 | |
119 | #define MatchText MB_MatchText |
120 | #define do_like_escape MB_do_like_escape |
121 | |
122 | #include "like_match.c" |
123 | |
124 | /* Set up to compile like_match.c for single-byte characters */ |
125 | #define CHAREQ(p1, p2) (*(p1) == *(p2)) |
126 | #define NextChar(p, plen) NextByte((p), (plen)) |
127 | #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--) |
128 | |
129 | #define MatchText SB_MatchText |
130 | #define do_like_escape SB_do_like_escape |
131 | |
132 | #include "like_match.c" |
133 | |
134 | /* setup to compile like_match.c for single byte case insensitive matches */ |
135 | #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c) |
136 | #define NextChar(p, plen) NextByte((p), (plen)) |
137 | #define MatchText SB_IMatchText |
138 | |
139 | #include "like_match.c" |
140 | |
141 | /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */ |
142 | |
143 | #define NextChar(p, plen) \ |
144 | do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 ) |
145 | #define MatchText UTF8_MatchText |
146 | |
147 | #include "like_match.c" |
148 | |
149 | /* Generic for all cases not requiring inline case-folding */ |
150 | static inline int |
151 | GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation) |
152 | { |
153 | if (collation && !lc_ctype_is_c(collation) && collation != DEFAULT_COLLATION_OID) |
154 | { |
155 | pg_locale_t locale = pg_newlocale_from_collation(collation); |
156 | |
157 | if (locale && !locale->deterministic) |
158 | ereport(ERROR, |
159 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
160 | errmsg("nondeterministic collations are not supported for LIKE" ))); |
161 | } |
162 | |
163 | if (pg_database_encoding_max_length() == 1) |
164 | return SB_MatchText(s, slen, p, plen, 0, true); |
165 | else if (GetDatabaseEncoding() == PG_UTF8) |
166 | return UTF8_MatchText(s, slen, p, plen, 0, true); |
167 | else |
168 | return MB_MatchText(s, slen, p, plen, 0, true); |
169 | } |
170 | |
171 | static inline int |
172 | Generic_Text_IC_like(text *str, text *pat, Oid collation) |
173 | { |
174 | char *s, |
175 | *p; |
176 | int slen, |
177 | plen; |
178 | pg_locale_t locale = 0; |
179 | bool locale_is_c = false; |
180 | |
181 | if (lc_ctype_is_c(collation)) |
182 | locale_is_c = true; |
183 | else if (collation != DEFAULT_COLLATION_OID) |
184 | { |
185 | if (!OidIsValid(collation)) |
186 | { |
187 | /* |
188 | * This typically means that the parser could not resolve a |
189 | * conflict of implicit collations, so report it that way. |
190 | */ |
191 | ereport(ERROR, |
192 | (errcode(ERRCODE_INDETERMINATE_COLLATION), |
193 | errmsg("could not determine which collation to use for ILIKE" ), |
194 | errhint("Use the COLLATE clause to set the collation explicitly." ))); |
195 | } |
196 | locale = pg_newlocale_from_collation(collation); |
197 | |
198 | if (locale && !locale->deterministic) |
199 | ereport(ERROR, |
200 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
201 | errmsg("nondeterministic collations are not supported for ILIKE" ))); |
202 | } |
203 | |
204 | /* |
205 | * For efficiency reasons, in the single byte case we don't call lower() |
206 | * on the pattern and text, but instead call SB_lower_char on each |
207 | * character. In the multi-byte case we don't have much choice :-(. Also, |
208 | * ICU does not support single-character case folding, so we go the long |
209 | * way. |
210 | */ |
211 | |
212 | if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU)) |
213 | { |
214 | pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation, |
215 | PointerGetDatum(pat))); |
216 | p = VARDATA_ANY(pat); |
217 | plen = VARSIZE_ANY_EXHDR(pat); |
218 | str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation, |
219 | PointerGetDatum(str))); |
220 | s = VARDATA_ANY(str); |
221 | slen = VARSIZE_ANY_EXHDR(str); |
222 | if (GetDatabaseEncoding() == PG_UTF8) |
223 | return UTF8_MatchText(s, slen, p, plen, 0, true); |
224 | else |
225 | return MB_MatchText(s, slen, p, plen, 0, true); |
226 | } |
227 | else |
228 | { |
229 | p = VARDATA_ANY(pat); |
230 | plen = VARSIZE_ANY_EXHDR(pat); |
231 | s = VARDATA_ANY(str); |
232 | slen = VARSIZE_ANY_EXHDR(str); |
233 | return SB_IMatchText(s, slen, p, plen, locale, locale_is_c); |
234 | } |
235 | } |
236 | |
237 | /* |
238 | * interface routines called by the function manager |
239 | */ |
240 | |
241 | Datum |
242 | namelike(PG_FUNCTION_ARGS) |
243 | { |
244 | Name str = PG_GETARG_NAME(0); |
245 | text *pat = PG_GETARG_TEXT_PP(1); |
246 | bool result; |
247 | char *s, |
248 | *p; |
249 | int slen, |
250 | plen; |
251 | |
252 | s = NameStr(*str); |
253 | slen = strlen(s); |
254 | p = VARDATA_ANY(pat); |
255 | plen = VARSIZE_ANY_EXHDR(pat); |
256 | |
257 | result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE); |
258 | |
259 | PG_RETURN_BOOL(result); |
260 | } |
261 | |
262 | Datum |
263 | namenlike(PG_FUNCTION_ARGS) |
264 | { |
265 | Name str = PG_GETARG_NAME(0); |
266 | text *pat = PG_GETARG_TEXT_PP(1); |
267 | bool result; |
268 | char *s, |
269 | *p; |
270 | int slen, |
271 | plen; |
272 | |
273 | s = NameStr(*str); |
274 | slen = strlen(s); |
275 | p = VARDATA_ANY(pat); |
276 | plen = VARSIZE_ANY_EXHDR(pat); |
277 | |
278 | result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE); |
279 | |
280 | PG_RETURN_BOOL(result); |
281 | } |
282 | |
283 | Datum |
284 | textlike(PG_FUNCTION_ARGS) |
285 | { |
286 | text *str = PG_GETARG_TEXT_PP(0); |
287 | text *pat = PG_GETARG_TEXT_PP(1); |
288 | bool result; |
289 | char *s, |
290 | *p; |
291 | int slen, |
292 | plen; |
293 | |
294 | s = VARDATA_ANY(str); |
295 | slen = VARSIZE_ANY_EXHDR(str); |
296 | p = VARDATA_ANY(pat); |
297 | plen = VARSIZE_ANY_EXHDR(pat); |
298 | |
299 | result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE); |
300 | |
301 | PG_RETURN_BOOL(result); |
302 | } |
303 | |
304 | Datum |
305 | textnlike(PG_FUNCTION_ARGS) |
306 | { |
307 | text *str = PG_GETARG_TEXT_PP(0); |
308 | text *pat = PG_GETARG_TEXT_PP(1); |
309 | bool result; |
310 | char *s, |
311 | *p; |
312 | int slen, |
313 | plen; |
314 | |
315 | s = VARDATA_ANY(str); |
316 | slen = VARSIZE_ANY_EXHDR(str); |
317 | p = VARDATA_ANY(pat); |
318 | plen = VARSIZE_ANY_EXHDR(pat); |
319 | |
320 | result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE); |
321 | |
322 | PG_RETURN_BOOL(result); |
323 | } |
324 | |
325 | Datum |
326 | bytealike(PG_FUNCTION_ARGS) |
327 | { |
328 | bytea *str = PG_GETARG_BYTEA_PP(0); |
329 | bytea *pat = PG_GETARG_BYTEA_PP(1); |
330 | bool result; |
331 | char *s, |
332 | *p; |
333 | int slen, |
334 | plen; |
335 | |
336 | s = VARDATA_ANY(str); |
337 | slen = VARSIZE_ANY_EXHDR(str); |
338 | p = VARDATA_ANY(pat); |
339 | plen = VARSIZE_ANY_EXHDR(pat); |
340 | |
341 | result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE); |
342 | |
343 | PG_RETURN_BOOL(result); |
344 | } |
345 | |
346 | Datum |
347 | byteanlike(PG_FUNCTION_ARGS) |
348 | { |
349 | bytea *str = PG_GETARG_BYTEA_PP(0); |
350 | bytea *pat = PG_GETARG_BYTEA_PP(1); |
351 | bool result; |
352 | char *s, |
353 | *p; |
354 | int slen, |
355 | plen; |
356 | |
357 | s = VARDATA_ANY(str); |
358 | slen = VARSIZE_ANY_EXHDR(str); |
359 | p = VARDATA_ANY(pat); |
360 | plen = VARSIZE_ANY_EXHDR(pat); |
361 | |
362 | result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE); |
363 | |
364 | PG_RETURN_BOOL(result); |
365 | } |
366 | |
367 | /* |
368 | * Case-insensitive versions |
369 | */ |
370 | |
371 | Datum |
372 | nameiclike(PG_FUNCTION_ARGS) |
373 | { |
374 | Name str = PG_GETARG_NAME(0); |
375 | text *pat = PG_GETARG_TEXT_PP(1); |
376 | bool result; |
377 | text *strtext; |
378 | |
379 | strtext = DatumGetTextPP(DirectFunctionCall1(name_text, |
380 | NameGetDatum(str))); |
381 | result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE); |
382 | |
383 | PG_RETURN_BOOL(result); |
384 | } |
385 | |
386 | Datum |
387 | nameicnlike(PG_FUNCTION_ARGS) |
388 | { |
389 | Name str = PG_GETARG_NAME(0); |
390 | text *pat = PG_GETARG_TEXT_PP(1); |
391 | bool result; |
392 | text *strtext; |
393 | |
394 | strtext = DatumGetTextPP(DirectFunctionCall1(name_text, |
395 | NameGetDatum(str))); |
396 | result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE); |
397 | |
398 | PG_RETURN_BOOL(result); |
399 | } |
400 | |
401 | Datum |
402 | texticlike(PG_FUNCTION_ARGS) |
403 | { |
404 | text *str = PG_GETARG_TEXT_PP(0); |
405 | text *pat = PG_GETARG_TEXT_PP(1); |
406 | bool result; |
407 | |
408 | result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE); |
409 | |
410 | PG_RETURN_BOOL(result); |
411 | } |
412 | |
413 | Datum |
414 | texticnlike(PG_FUNCTION_ARGS) |
415 | { |
416 | text *str = PG_GETARG_TEXT_PP(0); |
417 | text *pat = PG_GETARG_TEXT_PP(1); |
418 | bool result; |
419 | |
420 | result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE); |
421 | |
422 | PG_RETURN_BOOL(result); |
423 | } |
424 | |
425 | /* |
426 | * like_escape() --- given a pattern and an ESCAPE string, |
427 | * convert the pattern to use Postgres' standard backslash escape convention. |
428 | */ |
429 | Datum |
430 | like_escape(PG_FUNCTION_ARGS) |
431 | { |
432 | text *pat = PG_GETARG_TEXT_PP(0); |
433 | text *esc = PG_GETARG_TEXT_PP(1); |
434 | text *result; |
435 | |
436 | if (pg_database_encoding_max_length() == 1) |
437 | result = SB_do_like_escape(pat, esc); |
438 | else |
439 | result = MB_do_like_escape(pat, esc); |
440 | |
441 | PG_RETURN_TEXT_P(result); |
442 | } |
443 | |
444 | /* |
445 | * like_escape_bytea() --- given a pattern and an ESCAPE string, |
446 | * convert the pattern to use Postgres' standard backslash escape convention. |
447 | */ |
448 | Datum |
449 | like_escape_bytea(PG_FUNCTION_ARGS) |
450 | { |
451 | bytea *pat = PG_GETARG_BYTEA_PP(0); |
452 | bytea *esc = PG_GETARG_BYTEA_PP(1); |
453 | bytea *result = SB_do_like_escape((text *) pat, (text *) esc); |
454 | |
455 | PG_RETURN_BYTEA_P((bytea *) result); |
456 | } |
457 | |