1/*************************************************
2* Perl-Compatible Regular Expressions *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016-2022 University of Cambridge
11
12-----------------------------------------------------------------------------
13Redistribution and use in source and binary forms, with or without
14modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37POSSIBILITY OF SUCH DAMAGE.
38-----------------------------------------------------------------------------
39*/
40
41
42#ifdef HAVE_CONFIG_H
43#include "config.h"
44#endif
45
46#include "pcre2_internal.h"
47
48#define TYPE_OPTIONS (PCRE2_CONVERT_GLOB| \
49 PCRE2_CONVERT_POSIX_BASIC|PCRE2_CONVERT_POSIX_EXTENDED)
50
51#define ALL_OPTIONS (PCRE2_CONVERT_UTF|PCRE2_CONVERT_NO_UTF_CHECK| \
52 PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR| \
53 PCRE2_CONVERT_GLOB_NO_STARSTAR| \
54 TYPE_OPTIONS)
55
56#define DUMMY_BUFFER_SIZE 100
57
58/* Generated pattern fragments */
59
60#define STR_BACKSLASH_A STR_BACKSLASH STR_A
61#define STR_BACKSLASH_z STR_BACKSLASH STR_z
62#define STR_COLON_RIGHT_SQUARE_BRACKET STR_COLON STR_RIGHT_SQUARE_BRACKET
63#define STR_DOT_STAR_LOOKBEHIND STR_DOT STR_ASTERISK STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_LESS_THAN_SIGN STR_EQUALS_SIGN
64#define STR_LOOKAHEAD_NOT_DOT STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_EXCLAMATION_MARK STR_BACKSLASH STR_DOT STR_RIGHT_PARENTHESIS
65#define STR_QUERY_s STR_LEFT_PARENTHESIS STR_QUESTION_MARK STR_s STR_RIGHT_PARENTHESIS
66#define STR_STAR_NUL STR_LEFT_PARENTHESIS STR_ASTERISK STR_N STR_U STR_L STR_RIGHT_PARENTHESIS
67
68/* States for POSIX processing */
69
70enum { POSIX_START_REGEX, POSIX_ANCHORED, POSIX_NOT_BRACKET,
71 POSIX_CLASS_NOT_STARTED, POSIX_CLASS_STARTING, POSIX_CLASS_STARTED };
72
73/* Macro to add a character string to the output buffer, checking for overflow. */
74
75#define PUTCHARS(string) \
76 { \
77 for (s = (char *)(string); *s != 0; s++) \
78 { \
79 if (p >= endp) return PCRE2_ERROR_NOMEMORY; \
80 *p++ = *s; \
81 } \
82 }
83
84/* Literals that must be escaped: \ ? * + | . ^ $ { } [ ] ( ) */
85
86static const char *pcre2_escaped_literals =
87 STR_BACKSLASH STR_QUESTION_MARK STR_ASTERISK STR_PLUS
88 STR_VERTICAL_LINE STR_DOT STR_CIRCUMFLEX_ACCENT STR_DOLLAR_SIGN
89 STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
90 STR_LEFT_SQUARE_BRACKET STR_RIGHT_SQUARE_BRACKET
91 STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS;
92
93/* Recognized escaped metacharacters in POSIX basic patterns. */
94
95static const char *posix_meta_escapes =
96 STR_LEFT_PARENTHESIS STR_RIGHT_PARENTHESIS
97 STR_LEFT_CURLY_BRACKET STR_RIGHT_CURLY_BRACKET
98 STR_1 STR_2 STR_3 STR_4 STR_5 STR_6 STR_7 STR_8 STR_9;
99
100
101
102/*************************************************
103* Convert a POSIX pattern *
104*************************************************/
105
106/* This function handles both basic and extended POSIX patterns.
107
108Arguments:
109 pattype the pattern type
110 pattern the pattern
111 plength length in code units
112 utf TRUE if UTF
113 use_buffer where to put the output
114 use_length length of use_buffer
115 bufflenptr where to put the used length
116 dummyrun TRUE if a dummy run
117 ccontext the convert context
118
119Returns: 0 => success
120 !0 => error code
121*/
122
123static int
124convert_posix(uint32_t pattype, PCRE2_SPTR pattern, PCRE2_SIZE plength,
125 BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
126 PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
127{
128char *s;
129PCRE2_SPTR posix = pattern;
130PCRE2_UCHAR *p = use_buffer;
131PCRE2_UCHAR *pp = p;
132PCRE2_UCHAR *endp = p + use_length - 1; /* Allow for trailing zero */
133PCRE2_SIZE convlength = 0;
134
135uint32_t bracount = 0;
136uint32_t posix_state = POSIX_START_REGEX;
137uint32_t lastspecial = 0;
138BOOL extended = (pattype & PCRE2_CONVERT_POSIX_EXTENDED) != 0;
139BOOL nextisliteral = FALSE;
140
141(void)utf; /* Not used when Unicode not supported */
142(void)ccontext; /* Not currently used */
143
144/* Initialize default for error offset as end of input. */
145
146*bufflenptr = plength;
147PUTCHARS(STR_STAR_NUL);
148
149/* Now scan the input. */
150
151while (plength > 0)
152 {
153 uint32_t c, sc;
154 int clength = 1;
155
156 /* Add in the length of the last item, then, if in the dummy run, pull the
157 pointer back to the start of the (temporary) buffer and then remember the
158 start of the next item. */
159
160 convlength += p - pp;
161 if (dummyrun) p = use_buffer;
162 pp = p;
163
164 /* Pick up the next character */
165
166#ifndef SUPPORT_UNICODE
167 c = *posix;
168#else
169 GETCHARLENTEST(c, posix, clength);
170#endif
171 posix += clength;
172 plength -= clength;
173
174 sc = nextisliteral? 0 : c;
175 nextisliteral = FALSE;
176
177 /* Handle a character within a class. */
178
179 if (posix_state >= POSIX_CLASS_NOT_STARTED)
180 {
181 if (c == CHAR_RIGHT_SQUARE_BRACKET)
182 {
183 PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
184 posix_state = POSIX_NOT_BRACKET;
185 }
186
187 /* Not the end of the class */
188
189 else
190 {
191 switch (posix_state)
192 {
193 case POSIX_CLASS_STARTED:
194 if (c <= 127 && islower(c)) break; /* Remain in started state */
195 posix_state = POSIX_CLASS_NOT_STARTED;
196 if (c == CHAR_COLON && plength > 0 &&
197 *posix == CHAR_RIGHT_SQUARE_BRACKET)
198 {
199 PUTCHARS(STR_COLON_RIGHT_SQUARE_BRACKET);
200 plength--;
201 posix++;
202 continue; /* With next character after :] */
203 }
204 /* Fall through */
205
206 case POSIX_CLASS_NOT_STARTED:
207 if (c == CHAR_LEFT_SQUARE_BRACKET)
208 posix_state = POSIX_CLASS_STARTING;
209 break;
210
211 case POSIX_CLASS_STARTING:
212 if (c == CHAR_COLON) posix_state = POSIX_CLASS_STARTED;
213 break;
214 }
215
216 if (c == CHAR_BACKSLASH) PUTCHARS(STR_BACKSLASH);
217 if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
218 memcpy(p, posix - clength, CU2BYTES(clength));
219 p += clength;
220 }
221 }
222
223 /* Handle a character not within a class. */
224
225 else switch(sc)
226 {
227 case CHAR_LEFT_SQUARE_BRACKET:
228 PUTCHARS(STR_LEFT_SQUARE_BRACKET);
229
230#ifdef NEVER
231 /* We could handle special cases [[:<:]] and [[:>:]] (which PCRE does
232 support) but they are not part of POSIX 1003.1. */
233
234 if (plength >= 6)
235 {
236 if (posix[0] == CHAR_LEFT_SQUARE_BRACKET &&
237 posix[1] == CHAR_COLON &&
238 (posix[2] == CHAR_LESS_THAN_SIGN ||
239 posix[2] == CHAR_GREATER_THAN_SIGN) &&
240 posix[3] == CHAR_COLON &&
241 posix[4] == CHAR_RIGHT_SQUARE_BRACKET &&
242 posix[5] == CHAR_RIGHT_SQUARE_BRACKET)
243 {
244 if (p + 6 > endp) return PCRE2_ERROR_NOMEMORY;
245 memcpy(p, posix, CU2BYTES(6));
246 p += 6;
247 posix += 6;
248 plength -= 6;
249 continue; /* With next character */
250 }
251 }
252#endif
253
254 /* Handle start of "normal" character classes */
255
256 posix_state = POSIX_CLASS_NOT_STARTED;
257
258 /* Handle ^ and ] as first characters */
259
260 if (plength > 0)
261 {
262 if (*posix == CHAR_CIRCUMFLEX_ACCENT)
263 {
264 posix++;
265 plength--;
266 PUTCHARS(STR_CIRCUMFLEX_ACCENT);
267 }
268 if (plength > 0 && *posix == CHAR_RIGHT_SQUARE_BRACKET)
269 {
270 posix++;
271 plength--;
272 PUTCHARS(STR_RIGHT_SQUARE_BRACKET);
273 }
274 }
275 break;
276
277 case CHAR_BACKSLASH:
278 if (plength == 0) return PCRE2_ERROR_END_BACKSLASH;
279 if (extended) nextisliteral = TRUE; else
280 {
281 if (*posix < 127 && strchr(posix_meta_escapes, *posix) != NULL)
282 {
283 if (isdigit(*posix)) PUTCHARS(STR_BACKSLASH);
284 if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
285 lastspecial = *p++ = *posix++;
286 plength--;
287 }
288 else nextisliteral = TRUE;
289 }
290 break;
291
292 case CHAR_RIGHT_PARENTHESIS:
293 if (!extended || bracount == 0) goto ESCAPE_LITERAL;
294 bracount--;
295 goto COPY_SPECIAL;
296
297 case CHAR_LEFT_PARENTHESIS:
298 bracount++;
299 /* Fall through */
300
301 case CHAR_QUESTION_MARK:
302 case CHAR_PLUS:
303 case CHAR_LEFT_CURLY_BRACKET:
304 case CHAR_RIGHT_CURLY_BRACKET:
305 case CHAR_VERTICAL_LINE:
306 if (!extended) goto ESCAPE_LITERAL;
307 /* Fall through */
308
309 case CHAR_DOT:
310 case CHAR_DOLLAR_SIGN:
311 posix_state = POSIX_NOT_BRACKET;
312 COPY_SPECIAL:
313 lastspecial = c;
314 if (p + 1 > endp) return PCRE2_ERROR_NOMEMORY;
315 *p++ = c;
316 break;
317
318 case CHAR_ASTERISK:
319 if (lastspecial != CHAR_ASTERISK)
320 {
321 if (!extended && (posix_state < POSIX_NOT_BRACKET ||
322 lastspecial == CHAR_LEFT_PARENTHESIS))
323 goto ESCAPE_LITERAL;
324 goto COPY_SPECIAL;
325 }
326 break; /* Ignore second and subsequent asterisks */
327
328 case CHAR_CIRCUMFLEX_ACCENT:
329 if (extended) goto COPY_SPECIAL;
330 if (posix_state == POSIX_START_REGEX ||
331 lastspecial == CHAR_LEFT_PARENTHESIS)
332 {
333 posix_state = POSIX_ANCHORED;
334 goto COPY_SPECIAL;
335 }
336 /* Fall through */
337
338 default:
339 if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
340 {
341 ESCAPE_LITERAL:
342 PUTCHARS(STR_BACKSLASH);
343 }
344 lastspecial = 0xff; /* Indicates nothing special */
345 if (p + clength > endp) return PCRE2_ERROR_NOMEMORY;
346 memcpy(p, posix - clength, CU2BYTES(clength));
347 p += clength;
348 posix_state = POSIX_NOT_BRACKET;
349 break;
350 }
351 }
352
353if (posix_state >= POSIX_CLASS_NOT_STARTED)
354 return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
355convlength += p - pp; /* Final segment */
356*bufflenptr = convlength;
357*p++ = 0;
358return 0;
359}
360
361
362/*************************************************
363* Convert a glob pattern *
364*************************************************/
365
366/* Context for writing the output into a buffer. */
367
368typedef struct pcre2_output_context {
369 PCRE2_UCHAR *output; /* current output position */
370 PCRE2_SPTR output_end; /* output end */
371 PCRE2_SIZE output_size; /* size of the output */
372 uint8_t out_str[8]; /* string copied to the output */
373} pcre2_output_context;
374
375
376/* Write a character into the output.
377
378Arguments:
379 out output context
380 chr the next character
381*/
382
383static void
384convert_glob_write(pcre2_output_context *out, PCRE2_UCHAR chr)
385{
386out->output_size++;
387
388if (out->output < out->output_end)
389 *out->output++ = chr;
390}
391
392
393/* Write a string into the output.
394
395Arguments:
396 out output context
397 length length of out->out_str
398*/
399
400static void
401convert_glob_write_str(pcre2_output_context *out, PCRE2_SIZE length)
402{
403uint8_t *out_str = out->out_str;
404PCRE2_UCHAR *output = out->output;
405PCRE2_SPTR output_end = out->output_end;
406PCRE2_SIZE output_size = out->output_size;
407
408do
409 {
410 output_size++;
411
412 if (output < output_end)
413 *output++ = *out_str++;
414 }
415while (--length != 0);
416
417out->output = output;
418out->output_size = output_size;
419}
420
421
422/* Prints the separator into the output.
423
424Arguments:
425 out output context
426 separator glob separator
427 with_escape backslash is needed before separator
428*/
429
430static void
431convert_glob_print_separator(pcre2_output_context *out,
432 PCRE2_UCHAR separator, BOOL with_escape)
433{
434if (with_escape)
435 convert_glob_write(out, CHAR_BACKSLASH);
436
437convert_glob_write(out, separator);
438}
439
440
441/* Prints a wildcard into the output.
442
443Arguments:
444 out output context
445 separator glob separator
446 with_escape backslash is needed before separator
447*/
448
449static void
450convert_glob_print_wildcard(pcre2_output_context *out,
451 PCRE2_UCHAR separator, BOOL with_escape)
452{
453out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
454out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
455convert_glob_write_str(out, 2);
456
457convert_glob_print_separator(out, separator, with_escape);
458
459convert_glob_write(out, CHAR_RIGHT_SQUARE_BRACKET);
460}
461
462
463/* Parse a posix class.
464
465Arguments:
466 from starting point of scanning the range
467 pattern_end end of pattern
468 out output context
469
470Returns: >0 => class index
471 0 => malformed class
472*/
473
474static int
475convert_glob_parse_class(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
476 pcre2_output_context *out)
477{
478static const char *posix_classes = "alnum:alpha:ascii:blank:cntrl:digit:"
479 "graph:lower:print:punct:space:upper:word:xdigit:";
480PCRE2_SPTR start = *from + 1;
481PCRE2_SPTR pattern = start;
482const char *class_ptr;
483PCRE2_UCHAR c;
484int class_index;
485
486while (TRUE)
487 {
488 if (pattern >= pattern_end) return 0;
489
490 c = *pattern++;
491
492 if (c < CHAR_a || c > CHAR_z) break;
493 }
494
495if (c != CHAR_COLON || pattern >= pattern_end ||
496 *pattern != CHAR_RIGHT_SQUARE_BRACKET)
497 return 0;
498
499class_ptr = posix_classes;
500class_index = 1;
501
502while (TRUE)
503 {
504 if (*class_ptr == CHAR_NUL) return 0;
505
506 pattern = start;
507
508 while (*pattern == (PCRE2_UCHAR) *class_ptr)
509 {
510 if (*pattern == CHAR_COLON)
511 {
512 pattern += 2;
513 start -= 2;
514
515 do convert_glob_write(out, *start++); while (start < pattern);
516
517 *from = pattern;
518 return class_index;
519 }
520 pattern++;
521 class_ptr++;
522 }
523
524 while (*class_ptr != CHAR_COLON) class_ptr++;
525 class_ptr++;
526 class_index++;
527 }
528}
529
530/* Checks whether the character is in the class.
531
532Arguments:
533 class_index class index
534 c character
535
536Returns: !0 => character is found in the class
537 0 => otherwise
538*/
539
540static BOOL
541convert_glob_char_in_class(int class_index, PCRE2_UCHAR c)
542{
543switch (class_index)
544 {
545 case 1: return isalnum(c);
546 case 2: return isalpha(c);
547 case 3: return 1;
548 case 4: return c == CHAR_HT || c == CHAR_SPACE;
549 case 5: return iscntrl(c);
550 case 6: return isdigit(c);
551 case 7: return isgraph(c);
552 case 8: return islower(c);
553 case 9: return isprint(c);
554 case 10: return ispunct(c);
555 case 11: return isspace(c);
556 case 12: return isupper(c);
557 case 13: return isalnum(c) || c == CHAR_UNDERSCORE;
558 default: return isxdigit(c);
559 }
560}
561
562/* Parse a range of characters.
563
564Arguments:
565 from starting point of scanning the range
566 pattern_end end of pattern
567 out output context
568 separator glob separator
569 with_escape backslash is needed before separator
570
571Returns: 0 => success
572 !0 => error code
573*/
574
575static int
576convert_glob_parse_range(PCRE2_SPTR *from, PCRE2_SPTR pattern_end,
577 pcre2_output_context *out, BOOL utf, PCRE2_UCHAR separator,
578 BOOL with_escape, PCRE2_UCHAR escape, BOOL no_wildsep)
579{
580BOOL is_negative = FALSE;
581BOOL separator_seen = FALSE;
582BOOL has_prev_c;
583PCRE2_SPTR pattern = *from;
584PCRE2_SPTR char_start = NULL;
585uint32_t c, prev_c;
586int len, class_index;
587
588(void)utf; /* Avoid compiler warning. */
589
590if (pattern >= pattern_end)
591 {
592 *from = pattern;
593 return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
594 }
595
596if (*pattern == CHAR_EXCLAMATION_MARK
597 || *pattern == CHAR_CIRCUMFLEX_ACCENT)
598 {
599 pattern++;
600
601 if (pattern >= pattern_end)
602 {
603 *from = pattern;
604 return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
605 }
606
607 is_negative = TRUE;
608
609 out->out_str[0] = CHAR_LEFT_SQUARE_BRACKET;
610 out->out_str[1] = CHAR_CIRCUMFLEX_ACCENT;
611 len = 2;
612
613 if (!no_wildsep)
614 {
615 if (with_escape)
616 {
617 out->out_str[len] = CHAR_BACKSLASH;
618 len++;
619 }
620 out->out_str[len] = (uint8_t) separator;
621 }
622
623 convert_glob_write_str(out, len + 1);
624 }
625else
626 convert_glob_write(out, CHAR_LEFT_SQUARE_BRACKET);
627
628has_prev_c = FALSE;
629prev_c = 0;
630
631if (*pattern == CHAR_RIGHT_SQUARE_BRACKET)
632 {
633 out->out_str[0] = CHAR_BACKSLASH;
634 out->out_str[1] = CHAR_RIGHT_SQUARE_BRACKET;
635 convert_glob_write_str(out, 2);
636 has_prev_c = TRUE;
637 prev_c = CHAR_RIGHT_SQUARE_BRACKET;
638 pattern++;
639 }
640
641while (pattern < pattern_end)
642 {
643 char_start = pattern;
644 GETCHARINCTEST(c, pattern);
645
646 if (c == CHAR_RIGHT_SQUARE_BRACKET)
647 {
648 convert_glob_write(out, c);
649
650 if (!is_negative && !no_wildsep && separator_seen)
651 {
652 out->out_str[0] = CHAR_LEFT_PARENTHESIS;
653 out->out_str[1] = CHAR_QUESTION_MARK;
654 out->out_str[2] = CHAR_LESS_THAN_SIGN;
655 out->out_str[3] = CHAR_EXCLAMATION_MARK;
656 convert_glob_write_str(out, 4);
657
658 convert_glob_print_separator(out, separator, with_escape);
659 convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
660 }
661
662 *from = pattern;
663 return 0;
664 }
665
666 if (pattern >= pattern_end) break;
667
668 if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
669 {
670 *from = pattern;
671 class_index = convert_glob_parse_class(from, pattern_end, out);
672
673 if (class_index != 0)
674 {
675 pattern = *from;
676
677 has_prev_c = FALSE;
678 prev_c = 0;
679
680 if (!is_negative &&
681 convert_glob_char_in_class (class_index, separator))
682 separator_seen = TRUE;
683 continue;
684 }
685 }
686 else if (c == CHAR_MINUS && has_prev_c &&
687 *pattern != CHAR_RIGHT_SQUARE_BRACKET)
688 {
689 convert_glob_write(out, CHAR_MINUS);
690
691 char_start = pattern;
692 GETCHARINCTEST(c, pattern);
693
694 if (pattern >= pattern_end) break;
695
696 if (escape != 0 && c == escape)
697 {
698 char_start = pattern;
699 GETCHARINCTEST(c, pattern);
700 }
701 else if (c == CHAR_LEFT_SQUARE_BRACKET && *pattern == CHAR_COLON)
702 {
703 *from = pattern;
704 return PCRE2_ERROR_CONVERT_SYNTAX;
705 }
706
707 if (prev_c > c)
708 {
709 *from = pattern;
710 return PCRE2_ERROR_CONVERT_SYNTAX;
711 }
712
713 if (prev_c < separator && separator < c) separator_seen = TRUE;
714
715 has_prev_c = FALSE;
716 prev_c = 0;
717 }
718 else
719 {
720 if (escape != 0 && c == escape)
721 {
722 char_start = pattern;
723 GETCHARINCTEST(c, pattern);
724
725 if (pattern >= pattern_end) break;
726 }
727
728 has_prev_c = TRUE;
729 prev_c = c;
730 }
731
732 if (c == CHAR_LEFT_SQUARE_BRACKET || c == CHAR_RIGHT_SQUARE_BRACKET ||
733 c == CHAR_BACKSLASH || c == CHAR_MINUS)
734 convert_glob_write(out, CHAR_BACKSLASH);
735
736 if (c == separator) separator_seen = TRUE;
737
738 do convert_glob_write(out, *char_start++); while (char_start < pattern);
739 }
740
741*from = pattern;
742return PCRE2_ERROR_MISSING_SQUARE_BRACKET;
743}
744
745
746/* Prints a (*COMMIT) into the output.
747
748Arguments:
749 out output context
750*/
751
752static void
753convert_glob_print_commit(pcre2_output_context *out)
754{
755out->out_str[0] = CHAR_LEFT_PARENTHESIS;
756out->out_str[1] = CHAR_ASTERISK;
757out->out_str[2] = CHAR_C;
758out->out_str[3] = CHAR_O;
759out->out_str[4] = CHAR_M;
760out->out_str[5] = CHAR_M;
761out->out_str[6] = CHAR_I;
762out->out_str[7] = CHAR_T;
763convert_glob_write_str(out, 8);
764convert_glob_write(out, CHAR_RIGHT_PARENTHESIS);
765}
766
767
768/* Bash glob converter.
769
770Arguments:
771 pattype the pattern type
772 pattern the pattern
773 plength length in code units
774 utf TRUE if UTF
775 use_buffer where to put the output
776 use_length length of use_buffer
777 bufflenptr where to put the used length
778 dummyrun TRUE if a dummy run
779 ccontext the convert context
780
781Returns: 0 => success
782 !0 => error code
783*/
784
785static int
786convert_glob(uint32_t options, PCRE2_SPTR pattern, PCRE2_SIZE plength,
787 BOOL utf, PCRE2_UCHAR *use_buffer, PCRE2_SIZE use_length,
788 PCRE2_SIZE *bufflenptr, BOOL dummyrun, pcre2_convert_context *ccontext)
789{
790pcre2_output_context out;
791PCRE2_SPTR pattern_start = pattern;
792PCRE2_SPTR pattern_end = pattern + plength;
793PCRE2_UCHAR separator = ccontext->glob_separator;
794PCRE2_UCHAR escape = ccontext->glob_escape;
795PCRE2_UCHAR c;
796BOOL no_wildsep = (options & PCRE2_CONVERT_GLOB_NO_WILD_SEPARATOR) != 0;
797BOOL no_starstar = (options & PCRE2_CONVERT_GLOB_NO_STARSTAR) != 0;
798BOOL in_atomic = FALSE;
799BOOL after_starstar = FALSE;
800BOOL no_slash_z = FALSE;
801BOOL with_escape, is_start, after_separator;
802int result = 0;
803
804(void)utf; /* Avoid compiler warning. */
805
806#ifdef SUPPORT_UNICODE
807if (utf && (separator >= 128 || escape >= 128))
808 {
809 /* Currently only ASCII characters are supported. */
810 *bufflenptr = 0;
811 return PCRE2_ERROR_CONVERT_SYNTAX;
812 }
813#endif
814
815with_escape = strchr(pcre2_escaped_literals, separator) != NULL;
816
817/* Initialize default for error offset as end of input. */
818out.output = use_buffer;
819out.output_end = use_buffer + use_length;
820out.output_size = 0;
821
822out.out_str[0] = CHAR_LEFT_PARENTHESIS;
823out.out_str[1] = CHAR_QUESTION_MARK;
824out.out_str[2] = CHAR_s;
825out.out_str[3] = CHAR_RIGHT_PARENTHESIS;
826convert_glob_write_str(&out, 4);
827
828is_start = TRUE;
829
830if (pattern < pattern_end && pattern[0] == CHAR_ASTERISK)
831 {
832 if (no_wildsep)
833 is_start = FALSE;
834 else if (!no_starstar && pattern + 1 < pattern_end &&
835 pattern[1] == CHAR_ASTERISK)
836 is_start = FALSE;
837 }
838
839if (is_start)
840 {
841 out.out_str[0] = CHAR_BACKSLASH;
842 out.out_str[1] = CHAR_A;
843 convert_glob_write_str(&out, 2);
844 }
845
846while (pattern < pattern_end)
847 {
848 c = *pattern++;
849
850 if (c == CHAR_ASTERISK)
851 {
852 is_start = pattern == pattern_start + 1;
853
854 if (in_atomic)
855 {
856 convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
857 in_atomic = FALSE;
858 }
859
860 if (!no_starstar && pattern < pattern_end && *pattern == CHAR_ASTERISK)
861 {
862 after_separator = is_start || (pattern[-2] == separator);
863
864 do pattern++; while (pattern < pattern_end &&
865 *pattern == CHAR_ASTERISK);
866
867 if (pattern >= pattern_end)
868 {
869 no_slash_z = TRUE;
870 break;
871 }
872
873 after_starstar = TRUE;
874
875 if (after_separator && escape != 0 && *pattern == escape &&
876 pattern + 1 < pattern_end && pattern[1] == separator)
877 pattern++;
878
879 if (is_start)
880 {
881 if (*pattern != separator) continue;
882
883 out.out_str[0] = CHAR_LEFT_PARENTHESIS;
884 out.out_str[1] = CHAR_QUESTION_MARK;
885 out.out_str[2] = CHAR_COLON;
886 out.out_str[3] = CHAR_BACKSLASH;
887 out.out_str[4] = CHAR_A;
888 out.out_str[5] = CHAR_VERTICAL_LINE;
889 convert_glob_write_str(&out, 6);
890
891 convert_glob_print_separator(&out, separator, with_escape);
892 convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
893
894 pattern++;
895 continue;
896 }
897
898 convert_glob_print_commit(&out);
899
900 if (!after_separator || *pattern != separator)
901 {
902 out.out_str[0] = CHAR_DOT;
903 out.out_str[1] = CHAR_ASTERISK;
904 out.out_str[2] = CHAR_QUESTION_MARK;
905 convert_glob_write_str(&out, 3);
906 continue;
907 }
908
909 out.out_str[0] = CHAR_LEFT_PARENTHESIS;
910 out.out_str[1] = CHAR_QUESTION_MARK;
911 out.out_str[2] = CHAR_COLON;
912 out.out_str[3] = CHAR_DOT;
913 out.out_str[4] = CHAR_ASTERISK;
914 out.out_str[5] = CHAR_QUESTION_MARK;
915
916 convert_glob_write_str(&out, 6);
917
918 convert_glob_print_separator(&out, separator, with_escape);
919
920 out.out_str[0] = CHAR_RIGHT_PARENTHESIS;
921 out.out_str[1] = CHAR_QUESTION_MARK;
922 out.out_str[2] = CHAR_QUESTION_MARK;
923 convert_glob_write_str(&out, 3);
924
925 pattern++;
926 continue;
927 }
928
929 if (pattern < pattern_end && *pattern == CHAR_ASTERISK)
930 {
931 do pattern++; while (pattern < pattern_end &&
932 *pattern == CHAR_ASTERISK);
933 }
934
935 if (no_wildsep)
936 {
937 if (pattern >= pattern_end)
938 {
939 no_slash_z = TRUE;
940 break;
941 }
942
943 /* Start check must be after the end check. */
944 if (is_start) continue;
945 }
946
947 if (!is_start)
948 {
949 if (after_starstar)
950 {
951 out.out_str[0] = CHAR_LEFT_PARENTHESIS;
952 out.out_str[1] = CHAR_QUESTION_MARK;
953 out.out_str[2] = CHAR_GREATER_THAN_SIGN;
954 convert_glob_write_str(&out, 3);
955 in_atomic = TRUE;
956 }
957 else
958 convert_glob_print_commit(&out);
959 }
960
961 if (no_wildsep)
962 convert_glob_write(&out, CHAR_DOT);
963 else
964 convert_glob_print_wildcard(&out, separator, with_escape);
965
966 out.out_str[0] = CHAR_ASTERISK;
967 out.out_str[1] = CHAR_QUESTION_MARK;
968 if (pattern >= pattern_end)
969 out.out_str[1] = CHAR_PLUS;
970 convert_glob_write_str(&out, 2);
971 continue;
972 }
973
974 if (c == CHAR_QUESTION_MARK)
975 {
976 if (no_wildsep)
977 convert_glob_write(&out, CHAR_DOT);
978 else
979 convert_glob_print_wildcard(&out, separator, with_escape);
980 continue;
981 }
982
983 if (c == CHAR_LEFT_SQUARE_BRACKET)
984 {
985 result = convert_glob_parse_range(&pattern, pattern_end,
986 &out, utf, separator, with_escape, escape, no_wildsep);
987 if (result != 0) break;
988 continue;
989 }
990
991 if (escape != 0 && c == escape)
992 {
993 if (pattern >= pattern_end)
994 {
995 result = PCRE2_ERROR_CONVERT_SYNTAX;
996 break;
997 }
998 c = *pattern++;
999 }
1000
1001 if (c < 128 && strchr(pcre2_escaped_literals, c) != NULL)
1002 convert_glob_write(&out, CHAR_BACKSLASH);
1003
1004 convert_glob_write(&out, c);
1005 }
1006
1007if (result == 0)
1008 {
1009 if (!no_slash_z)
1010 {
1011 out.out_str[0] = CHAR_BACKSLASH;
1012 out.out_str[1] = CHAR_z;
1013 convert_glob_write_str(&out, 2);
1014 }
1015
1016 if (in_atomic)
1017 convert_glob_write(&out, CHAR_RIGHT_PARENTHESIS);
1018
1019 convert_glob_write(&out, CHAR_NUL);
1020
1021 if (!dummyrun && out.output_size != (PCRE2_SIZE) (out.output - use_buffer))
1022 result = PCRE2_ERROR_NOMEMORY;
1023 }
1024
1025if (result != 0)
1026 {
1027 *bufflenptr = pattern - pattern_start;
1028 return result;
1029 }
1030
1031*bufflenptr = out.output_size - 1;
1032return 0;
1033}
1034
1035
1036/*************************************************
1037* Convert pattern *
1038*************************************************/
1039
1040/* This is the external-facing function for converting other forms of pattern
1041into PCRE2 regular expression patterns. On error, the bufflenptr argument is
1042used to return an offset in the original pattern.
1043
1044Arguments:
1045 pattern the input pattern
1046 plength length of input, or PCRE2_ZERO_TERMINATED
1047 options options bits
1048 buffptr pointer to pointer to output buffer
1049 bufflenptr pointer to length of output buffer
1050 ccontext convert context or NULL
1051
1052Returns: 0 for success, else an error code (+ve or -ve)
1053*/
1054
1055PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
1056pcre2_pattern_convert(PCRE2_SPTR pattern, PCRE2_SIZE plength, uint32_t options,
1057 PCRE2_UCHAR **buffptr, PCRE2_SIZE *bufflenptr,
1058 pcre2_convert_context *ccontext)
1059{
1060int i, rc;
1061PCRE2_UCHAR dummy_buffer[DUMMY_BUFFER_SIZE];
1062PCRE2_UCHAR *use_buffer = dummy_buffer;
1063PCRE2_SIZE use_length = DUMMY_BUFFER_SIZE;
1064BOOL utf = (options & PCRE2_CONVERT_UTF) != 0;
1065uint32_t pattype = options & TYPE_OPTIONS;
1066
1067if (pattern == NULL || bufflenptr == NULL) return PCRE2_ERROR_NULL;
1068
1069if ((options & ~ALL_OPTIONS) != 0 || /* Undefined bit set */
1070 (pattype & (~pattype+1)) != pattype || /* More than one type set */
1071 pattype == 0) /* No type set */
1072 {
1073 *bufflenptr = 0; /* Error offset */
1074 return PCRE2_ERROR_BADOPTION;
1075 }
1076
1077if (plength == PCRE2_ZERO_TERMINATED) plength = PRIV(strlen)(pattern);
1078if (ccontext == NULL) ccontext =
1079 (pcre2_convert_context *)(&PRIV(default_convert_context));
1080
1081/* Check UTF if required. */
1082
1083#ifndef SUPPORT_UNICODE
1084if (utf)
1085 {
1086 *bufflenptr = 0; /* Error offset */
1087 return PCRE2_ERROR_UNICODE_NOT_SUPPORTED;
1088 }
1089#else
1090if (utf && (options & PCRE2_CONVERT_NO_UTF_CHECK) == 0)
1091 {
1092 PCRE2_SIZE erroroffset;
1093 rc = PRIV(valid_utf)(pattern, plength, &erroroffset);
1094 if (rc != 0)
1095 {
1096 *bufflenptr = erroroffset;
1097 return rc;
1098 }
1099 }
1100#endif
1101
1102/* If buffptr is not NULL, and what it points to is not NULL, we are being
1103provided with a buffer and a length, so set them as the buffer to use. */
1104
1105if (buffptr != NULL && *buffptr != NULL)
1106 {
1107 use_buffer = *buffptr;
1108 use_length = *bufflenptr;
1109 }
1110
1111/* Call an individual converter, either just once (if a buffer was provided or
1112just the length is needed), or twice (if a memory allocation is required). */
1113
1114for (i = 0; i < 2; i++)
1115 {
1116 PCRE2_UCHAR *allocated;
1117 BOOL dummyrun = buffptr == NULL || *buffptr == NULL;
1118
1119 switch(pattype)
1120 {
1121 case PCRE2_CONVERT_GLOB:
1122 rc = convert_glob(options & ~PCRE2_CONVERT_GLOB, pattern, plength, utf,
1123 use_buffer, use_length, bufflenptr, dummyrun, ccontext);
1124 break;
1125
1126 case PCRE2_CONVERT_POSIX_BASIC:
1127 case PCRE2_CONVERT_POSIX_EXTENDED:
1128 rc = convert_posix(pattype, pattern, plength, utf, use_buffer, use_length,
1129 bufflenptr, dummyrun, ccontext);
1130 break;
1131
1132 default:
1133 *bufflenptr = 0; /* Error offset */
1134 return PCRE2_ERROR_INTERNAL;
1135 }
1136
1137 if (rc != 0 || /* Error */
1138 buffptr == NULL || /* Just the length is required */
1139 *buffptr != NULL) /* Buffer was provided or allocated */
1140 return rc;
1141
1142 /* Allocate memory for the buffer, with hidden space for an allocator at
1143 the start. The next time round the loop runs the conversion for real. */
1144
1145 allocated = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
1146 (*bufflenptr + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)ccontext);
1147 if (allocated == NULL) return PCRE2_ERROR_NOMEMORY;
1148 *buffptr = (PCRE2_UCHAR *)(((char *)allocated) + sizeof(pcre2_memctl));
1149
1150 use_buffer = *buffptr;
1151 use_length = *bufflenptr + 1;
1152 }
1153
1154/* Control should never get here. */
1155
1156return PCRE2_ERROR_INTERNAL;
1157}
1158
1159
1160/*************************************************
1161* Free converted pattern *
1162*************************************************/
1163
1164/* This frees a converted pattern that was put in newly-allocated memory.
1165
1166Argument: the converted pattern
1167Returns: nothing
1168*/
1169
1170PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
1171pcre2_converted_pattern_free(PCRE2_UCHAR *converted)
1172{
1173if (converted != NULL)
1174 {
1175 pcre2_memctl *memctl =
1176 (pcre2_memctl *)((char *)converted - sizeof(pcre2_memctl));
1177 memctl->free(memctl, memctl->memory_data);
1178 }
1179}
1180
1181/* End of pcre2_convert.c */
1182