1 | /************************************************* |
2 | * Perl-Compatible Regular Expressions * |
3 | *************************************************/ |
4 | |
5 | /* PCRE is a library of functions to support regular expressions whose syntax |
6 | and semantics are as close as possible to those of the Perl 5 language. |
7 | |
8 | Written by Philip Hazel |
9 | Original API code Copyright (c) 1997-2012 University of Cambridge |
10 | New API code Copyright (c) 2016-2021 University of Cambridge |
11 | |
12 | ----------------------------------------------------------------------------- |
13 | Redistribution and use in source and binary forms, with or without |
14 | modification, are permitted provided that the following conditions are met: |
15 | |
16 | * Redistributions of source code must retain the above copyright notice, |
17 | this list of conditions and the following disclaimer. |
18 | |
19 | * Redistributions in binary form must reproduce the above copyright |
20 | notice, this list of conditions and the following disclaimer in the |
21 | documentation and/or other materials provided with the distribution. |
22 | |
23 | * Neither the name of the University of Cambridge nor the names of its |
24 | contributors may be used to endorse or promote products derived from |
25 | this software without specific prior written permission. |
26 | |
27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
37 | POSSIBILITY OF SUCH DAMAGE. |
38 | ----------------------------------------------------------------------------- |
39 | */ |
40 | |
41 | |
42 | #ifdef HAVE_CONFIG_H |
43 | #include "config.h" |
44 | #endif |
45 | |
46 | #include "pcre2_internal.h" |
47 | |
48 | #define STRING(a) # a |
49 | #define XSTRING(s) STRING(s) |
50 | |
51 | /* The texts of compile-time error messages. Compile-time error numbers start |
52 | at COMPILE_ERROR_BASE (100). |
53 | |
54 | This used to be a table of strings, but in order to reduce the number of |
55 | relocations needed when a shared library is loaded dynamically, it is now one |
56 | long string. We cannot use a table of offsets, because the lengths of inserts |
57 | such as XSTRING(MAX_NAME_SIZE) are not known. Instead, |
58 | pcre2_get_error_message() counts through to the one it wants - this isn't a |
59 | performance issue because these strings are used only when there is an error. |
60 | |
61 | Each substring ends with \0 to insert a null character. This includes the final |
62 | substring, so that the whole string ends with \0\0, which can be detected when |
63 | counting through. */ |
64 | |
65 | static const unsigned char compile_error_texts[] = |
66 | "no error\0" |
67 | "\\ at end of pattern\0" |
68 | "\\c at end of pattern\0" |
69 | "unrecognized character follows \\\0" |
70 | "numbers out of order in {} quantifier\0" |
71 | /* 5 */ |
72 | "number too big in {} quantifier\0" |
73 | "missing terminating ] for character class\0" |
74 | "escape sequence is invalid in character class\0" |
75 | "range out of order in character class\0" |
76 | "quantifier does not follow a repeatable item\0" |
77 | /* 10 */ |
78 | "internal error: unexpected repeat\0" |
79 | "unrecognized character after (? or (?-\0" |
80 | "POSIX named classes are supported only within a class\0" |
81 | "POSIX collating elements are not supported\0" |
82 | "missing closing parenthesis\0" |
83 | /* 15 */ |
84 | "reference to non-existent subpattern\0" |
85 | "pattern passed as NULL\0" |
86 | "unrecognised compile-time option bit(s)\0" |
87 | "missing ) after (?# comment\0" |
88 | "parentheses are too deeply nested\0" |
89 | /* 20 */ |
90 | "regular expression is too large\0" |
91 | "failed to allocate heap memory\0" |
92 | "unmatched closing parenthesis\0" |
93 | "internal error: code overflow\0" |
94 | "missing closing parenthesis for condition\0" |
95 | /* 25 */ |
96 | "lookbehind assertion is not fixed length\0" |
97 | "a relative value of zero is not allowed\0" |
98 | "conditional subpattern contains more than two branches\0" |
99 | "assertion expected after (?( or (?(?C)\0" |
100 | "digit expected after (?+ or (?-\0" |
101 | /* 30 */ |
102 | "unknown POSIX class name\0" |
103 | "internal error in pcre2_study(): should not occur\0" |
104 | "this version of PCRE2 does not have Unicode support\0" |
105 | "parentheses are too deeply nested (stack check)\0" |
106 | "character code point value in \\x{} or \\o{} is too large\0" |
107 | /* 35 */ |
108 | "lookbehind is too complicated\0" |
109 | "\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0" |
110 | "PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u\0" |
111 | "number after (?C is greater than 255\0" |
112 | "closing parenthesis for (?C expected\0" |
113 | /* 40 */ |
114 | "invalid escape sequence in (*VERB) name\0" |
115 | "unrecognized character after (?P\0" |
116 | "syntax error in subpattern name (missing terminator?)\0" |
117 | "two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0" |
118 | "subpattern name must start with a non-digit\0" |
119 | /* 45 */ |
120 | "this version of PCRE2 does not have support for \\P, \\p, or \\X\0" |
121 | "malformed \\P or \\p sequence\0" |
122 | "unknown property after \\P or \\p\0" |
123 | "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " code units)\0" |
124 | "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0" |
125 | /* 50 */ |
126 | "invalid range in character class\0" |
127 | "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0" |
128 | "internal error: overran compiling workspace\0" |
129 | "internal error: previously-checked referenced subpattern not found\0" |
130 | "DEFINE subpattern contains more than one branch\0" |
131 | /* 55 */ |
132 | "missing opening brace after \\o\0" |
133 | "internal error: unknown newline setting\0" |
134 | "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0" |
135 | "(?R (recursive pattern call) must be followed by a closing parenthesis\0" |
136 | /* "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" */ |
137 | "obsolete error (should not occur)\0" /* Was the above */ |
138 | /* 60 */ |
139 | "(*VERB) not recognized or malformed\0" |
140 | "subpattern number is too big\0" |
141 | "subpattern name expected\0" |
142 | "internal error: parsed pattern overflow\0" |
143 | "non-octal character in \\o{} (closing brace missing?)\0" |
144 | /* 65 */ |
145 | "different names for subpatterns of the same number are not allowed\0" |
146 | "(*MARK) must have an argument\0" |
147 | "non-hex character in \\x{} (closing brace missing?)\0" |
148 | #ifndef EBCDIC |
149 | "\\c must be followed by a printable ASCII character\0" |
150 | #else |
151 | "\\c must be followed by a letter or one of [\\]^_?\0" |
152 | #endif |
153 | "\\k is not followed by a braced, angle-bracketed, or quoted name\0" |
154 | /* 70 */ |
155 | "internal error: unknown meta code in check_lookbehinds()\0" |
156 | "\\N is not supported in a class\0" |
157 | "callout string is too long\0" |
158 | "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0" |
159 | "using UTF is disabled by the application\0" |
160 | /* 75 */ |
161 | "using UCP is disabled by the application\0" |
162 | "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" |
163 | "character code point value in \\u.... sequence is too large\0" |
164 | "digits missing in \\x{} or \\o{} or \\N{U+}\0" |
165 | "syntax error or number too big in (?(VERSION condition\0" |
166 | /* 80 */ |
167 | "internal error: unknown opcode in auto_possessify()\0" |
168 | "missing terminating delimiter for callout with string argument\0" |
169 | "unrecognized string delimiter follows (?C\0" |
170 | "using \\C is disabled by the application\0" |
171 | "(?| and/or (?J: or (?x: parentheses are too deeply nested\0" |
172 | /* 85 */ |
173 | "using \\C is disabled in this PCRE2 library\0" |
174 | "regular expression is too complicated\0" |
175 | "lookbehind assertion is too long\0" |
176 | "pattern string is longer than the limit set by the application\0" |
177 | "internal error: unknown code in parsed pattern\0" |
178 | /* 90 */ |
179 | "internal error: bad code value in parsed_skip()\0" |
180 | "PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0" |
181 | "invalid option bits with PCRE2_LITERAL\0" |
182 | "\\N{U+dddd} is supported only in Unicode (UTF) mode\0" |
183 | "invalid hyphen in option setting\0" |
184 | /* 95 */ |
185 | "(*alpha_assertion) not recognized\0" |
186 | "script runs require Unicode support, which this version of PCRE2 does not have\0" |
187 | "too many capturing groups (maximum 65535)\0" |
188 | "atomic assertion expected after (?( or (?(?C)\0" |
189 | "\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0" |
190 | ; |
191 | |
192 | /* Match-time and UTF error texts are in the same format. */ |
193 | |
194 | static const unsigned char match_error_texts[] = |
195 | "no error\0" |
196 | "no match\0" |
197 | "partial match\0" |
198 | "UTF-8 error: 1 byte missing at end\0" |
199 | "UTF-8 error: 2 bytes missing at end\0" |
200 | /* 5 */ |
201 | "UTF-8 error: 3 bytes missing at end\0" |
202 | "UTF-8 error: 4 bytes missing at end\0" |
203 | "UTF-8 error: 5 bytes missing at end\0" |
204 | "UTF-8 error: byte 2 top bits not 0x80\0" |
205 | "UTF-8 error: byte 3 top bits not 0x80\0" |
206 | /* 10 */ |
207 | "UTF-8 error: byte 4 top bits not 0x80\0" |
208 | "UTF-8 error: byte 5 top bits not 0x80\0" |
209 | "UTF-8 error: byte 6 top bits not 0x80\0" |
210 | "UTF-8 error: 5-byte character is not allowed (RFC 3629)\0" |
211 | "UTF-8 error: 6-byte character is not allowed (RFC 3629)\0" |
212 | /* 15 */ |
213 | "UTF-8 error: code points greater than 0x10ffff are not defined\0" |
214 | "UTF-8 error: code points 0xd800-0xdfff are not defined\0" |
215 | "UTF-8 error: overlong 2-byte sequence\0" |
216 | "UTF-8 error: overlong 3-byte sequence\0" |
217 | "UTF-8 error: overlong 4-byte sequence\0" |
218 | /* 20 */ |
219 | "UTF-8 error: overlong 5-byte sequence\0" |
220 | "UTF-8 error: overlong 6-byte sequence\0" |
221 | "UTF-8 error: isolated byte with 0x80 bit set\0" |
222 | "UTF-8 error: illegal byte (0xfe or 0xff)\0" |
223 | "UTF-16 error: missing low surrogate at end\0" |
224 | /* 25 */ |
225 | "UTF-16 error: invalid low surrogate\0" |
226 | "UTF-16 error: isolated low surrogate\0" |
227 | "UTF-32 error: code points 0xd800-0xdfff are not defined\0" |
228 | "UTF-32 error: code points greater than 0x10ffff are not defined\0" |
229 | "bad data value\0" |
230 | /* 30 */ |
231 | "patterns do not all use the same character tables\0" |
232 | "magic number missing\0" |
233 | "pattern compiled in wrong mode: 8/16/32-bit error\0" |
234 | "bad offset value\0" |
235 | "bad option value\0" |
236 | /* 35 */ |
237 | "invalid replacement string\0" |
238 | "bad offset into UTF string\0" |
239 | "callout error code\0" /* Never returned by PCRE2 itself */ |
240 | "invalid data in workspace for DFA restart\0" |
241 | "too much recursion for DFA matching\0" |
242 | /* 40 */ |
243 | "backreference condition or recursion test is not supported for DFA matching\0" |
244 | "function is not supported for DFA matching\0" |
245 | "pattern contains an item that is not supported for DFA matching\0" |
246 | "workspace size exceeded in DFA matching\0" |
247 | "internal error - pattern overwritten?\0" |
248 | /* 45 */ |
249 | "bad JIT option\0" |
250 | "JIT stack limit reached\0" |
251 | "match limit exceeded\0" |
252 | "no more memory\0" |
253 | "unknown substring\0" |
254 | /* 50 */ |
255 | "non-unique substring name\0" |
256 | "NULL argument passed with non-zero length\0" |
257 | "nested recursion at the same subject position\0" |
258 | "matching depth limit exceeded\0" |
259 | "requested value is not available\0" |
260 | /* 55 */ |
261 | "requested value is not set\0" |
262 | "offset limit set without PCRE2_USE_OFFSET_LIMIT\0" |
263 | "bad escape sequence in replacement string\0" |
264 | "expected closing curly bracket in replacement string\0" |
265 | "bad substitution in replacement string\0" |
266 | /* 60 */ |
267 | "match with end before start or start moved backwards is not supported\0" |
268 | "too many replacements (more than INT_MAX)\0" |
269 | "bad serialized data\0" |
270 | "heap limit exceeded\0" |
271 | "invalid syntax\0" |
272 | /* 65 */ |
273 | "internal error - duplicate substitution match\0" |
274 | "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0" |
275 | ; |
276 | |
277 | |
278 | /************************************************* |
279 | * Return error message * |
280 | *************************************************/ |
281 | |
282 | /* This function copies an error message into a buffer whose units are of an |
283 | appropriate width. Error numbers are positive for compile-time errors, and |
284 | negative for match-time errors (except for UTF errors), but the numbers are all |
285 | distinct. |
286 | |
287 | Arguments: |
288 | enumber error number |
289 | buffer where to put the message (zero terminated) |
290 | size size of the buffer in code units |
291 | |
292 | Returns: length of message if all is well |
293 | negative on error |
294 | */ |
295 | |
296 | PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION |
297 | pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, PCRE2_SIZE size) |
298 | { |
299 | const unsigned char *message; |
300 | PCRE2_SIZE i; |
301 | int n; |
302 | |
303 | if (size == 0) return PCRE2_ERROR_NOMEMORY; |
304 | |
305 | if (enumber >= COMPILE_ERROR_BASE) /* Compile error */ |
306 | { |
307 | message = compile_error_texts; |
308 | n = enumber - COMPILE_ERROR_BASE; |
309 | } |
310 | else if (enumber < 0) /* Match or UTF error */ |
311 | { |
312 | message = match_error_texts; |
313 | n = -enumber; |
314 | } |
315 | else /* Invalid error number */ |
316 | { |
317 | message = (unsigned char *)"\0" ; /* Empty message list */ |
318 | n = 1; |
319 | } |
320 | |
321 | for (; n > 0; n--) |
322 | { |
323 | while (*message++ != CHAR_NUL) {}; |
324 | if (*message == CHAR_NUL) return PCRE2_ERROR_BADDATA; |
325 | } |
326 | |
327 | for (i = 0; *message != 0; i++) |
328 | { |
329 | if (i >= size - 1) |
330 | { |
331 | buffer[i] = 0; /* Terminate partial message */ |
332 | return PCRE2_ERROR_NOMEMORY; |
333 | } |
334 | buffer[i] = *message++; |
335 | } |
336 | |
337 | buffer[i] = 0; |
338 | return (int)i; |
339 | } |
340 | |
341 | /* End of pcre2_error.c */ |
342 | |