| 1 | /************************************************* | 
|---|
| 2 | *      Perl-Compatible Regular Expressions       * | 
|---|
| 3 | *************************************************/ | 
|---|
| 4 |  | 
|---|
| 5 | /* PCRE is a library of functions to support regular expressions whose syntax | 
|---|
| 6 | and semantics are as close as possible to those of the Perl 5 language. | 
|---|
| 7 |  | 
|---|
| 8 | Written by Philip Hazel | 
|---|
| 9 | Original API code Copyright (c) 1997-2012 University of Cambridge | 
|---|
| 10 | New API code Copyright (c) 2016-2021 University of Cambridge | 
|---|
| 11 |  | 
|---|
| 12 | ----------------------------------------------------------------------------- | 
|---|
| 13 | Redistribution and use in source and binary forms, with or without | 
|---|
| 14 | modification, are permitted provided that the following conditions are met: | 
|---|
| 15 |  | 
|---|
| 16 | * Redistributions of source code must retain the above copyright notice, | 
|---|
| 17 | this list of conditions and the following disclaimer. | 
|---|
| 18 |  | 
|---|
| 19 | * Redistributions in binary form must reproduce the above copyright | 
|---|
| 20 | notice, this list of conditions and the following disclaimer in the | 
|---|
| 21 | documentation and/or other materials provided with the distribution. | 
|---|
| 22 |  | 
|---|
| 23 | * Neither the name of the University of Cambridge nor the names of its | 
|---|
| 24 | contributors may be used to endorse or promote products derived from | 
|---|
| 25 | this software without specific prior written permission. | 
|---|
| 26 |  | 
|---|
| 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
|---|
| 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
|---|
| 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
|---|
| 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
|---|
| 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
|---|
| 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
|---|
| 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
|---|
| 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
|---|
| 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
|---|
| 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
|---|
| 37 | POSSIBILITY OF SUCH DAMAGE. | 
|---|
| 38 | ----------------------------------------------------------------------------- | 
|---|
| 39 | */ | 
|---|
| 40 |  | 
|---|
| 41 |  | 
|---|
| 42 | #ifdef HAVE_CONFIG_H | 
|---|
| 43 | #include "config.h" | 
|---|
| 44 | #endif | 
|---|
| 45 |  | 
|---|
| 46 | #include "pcre2_internal.h" | 
|---|
| 47 |  | 
|---|
| 48 | #define STRING(a)  # a | 
|---|
| 49 | #define XSTRING(s) STRING(s) | 
|---|
| 50 |  | 
|---|
| 51 | /* The texts of compile-time error messages. Compile-time error numbers start | 
|---|
| 52 | at COMPILE_ERROR_BASE (100). | 
|---|
| 53 |  | 
|---|
| 54 | This used to be a table of strings, but in order to reduce the number of | 
|---|
| 55 | relocations needed when a shared library is loaded dynamically, it is now one | 
|---|
| 56 | long string. We cannot use a table of offsets, because the lengths of inserts | 
|---|
| 57 | such as XSTRING(MAX_NAME_SIZE) are not known. Instead, | 
|---|
| 58 | pcre2_get_error_message() counts through to the one it wants - this isn't a | 
|---|
| 59 | performance issue because these strings are used only when there is an error. | 
|---|
| 60 |  | 
|---|
| 61 | Each substring ends with \0 to insert a null character. This includes the final | 
|---|
| 62 | substring, so that the whole string ends with \0\0, which can be detected when | 
|---|
| 63 | counting through. */ | 
|---|
| 64 |  | 
|---|
| 65 | static const unsigned char compile_error_texts[] = | 
|---|
| 66 | "no error\0" | 
|---|
| 67 | "\\ at end of pattern\0" | 
|---|
| 68 | "\\c at end of pattern\0" | 
|---|
| 69 | "unrecognized character follows \\\0" | 
|---|
| 70 | "numbers out of order in {} quantifier\0" | 
|---|
| 71 | /* 5 */ | 
|---|
| 72 | "number too big in {} quantifier\0" | 
|---|
| 73 | "missing terminating ] for character class\0" | 
|---|
| 74 | "escape sequence is invalid in character class\0" | 
|---|
| 75 | "range out of order in character class\0" | 
|---|
| 76 | "quantifier does not follow a repeatable item\0" | 
|---|
| 77 | /* 10 */ | 
|---|
| 78 | "internal error: unexpected repeat\0" | 
|---|
| 79 | "unrecognized character after (? or (?-\0" | 
|---|
| 80 | "POSIX named classes are supported only within a class\0" | 
|---|
| 81 | "POSIX collating elements are not supported\0" | 
|---|
| 82 | "missing closing parenthesis\0" | 
|---|
| 83 | /* 15 */ | 
|---|
| 84 | "reference to non-existent subpattern\0" | 
|---|
| 85 | "pattern passed as NULL\0" | 
|---|
| 86 | "unrecognised compile-time option bit(s)\0" | 
|---|
| 87 | "missing ) after (?# comment\0" | 
|---|
| 88 | "parentheses are too deeply nested\0" | 
|---|
| 89 | /* 20 */ | 
|---|
| 90 | "regular expression is too large\0" | 
|---|
| 91 | "failed to allocate heap memory\0" | 
|---|
| 92 | "unmatched closing parenthesis\0" | 
|---|
| 93 | "internal error: code overflow\0" | 
|---|
| 94 | "missing closing parenthesis for condition\0" | 
|---|
| 95 | /* 25 */ | 
|---|
| 96 | "lookbehind assertion is not fixed length\0" | 
|---|
| 97 | "a relative value of zero is not allowed\0" | 
|---|
| 98 | "conditional subpattern contains more than two branches\0" | 
|---|
| 99 | "assertion expected after (?( or (?(?C)\0" | 
|---|
| 100 | "digit expected after (?+ or (?-\0" | 
|---|
| 101 | /* 30 */ | 
|---|
| 102 | "unknown POSIX class name\0" | 
|---|
| 103 | "internal error in pcre2_study(): should not occur\0" | 
|---|
| 104 | "this version of PCRE2 does not have Unicode support\0" | 
|---|
| 105 | "parentheses are too deeply nested (stack check)\0" | 
|---|
| 106 | "character code point value in \\x{} or \\o{} is too large\0" | 
|---|
| 107 | /* 35 */ | 
|---|
| 108 | "lookbehind is too complicated\0" | 
|---|
| 109 | "\\C is not allowed in a lookbehind assertion in UTF-"XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0" | 
|---|
| 110 | "PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u\0" | 
|---|
| 111 | "number after (?C is greater than 255\0" | 
|---|
| 112 | "closing parenthesis for (?C expected\0" | 
|---|
| 113 | /* 40 */ | 
|---|
| 114 | "invalid escape sequence in (*VERB) name\0" | 
|---|
| 115 | "unrecognized character after (?P\0" | 
|---|
| 116 | "syntax error in subpattern name (missing terminator?)\0" | 
|---|
| 117 | "two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0" | 
|---|
| 118 | "subpattern name must start with a non-digit\0" | 
|---|
| 119 | /* 45 */ | 
|---|
| 120 | "this version of PCRE2 does not have support for \\P, \\p, or \\X\0" | 
|---|
| 121 | "malformed \\P or \\p sequence\0" | 
|---|
| 122 | "unknown property after \\P or \\p\0" | 
|---|
| 123 | "subpattern name is too long (maximum "XSTRING(MAX_NAME_SIZE) " code units)\0" | 
|---|
| 124 | "too many named subpatterns (maximum "XSTRING(MAX_NAME_COUNT) ")\0" | 
|---|
| 125 | /* 50 */ | 
|---|
| 126 | "invalid range in character class\0" | 
|---|
| 127 | "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0" | 
|---|
| 128 | "internal error: overran compiling workspace\0" | 
|---|
| 129 | "internal error: previously-checked referenced subpattern not found\0" | 
|---|
| 130 | "DEFINE subpattern contains more than one branch\0" | 
|---|
| 131 | /* 55 */ | 
|---|
| 132 | "missing opening brace after \\o\0" | 
|---|
| 133 | "internal error: unknown newline setting\0" | 
|---|
| 134 | "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0" | 
|---|
| 135 | "(?R (recursive pattern call) must be followed by a closing parenthesis\0" | 
|---|
| 136 | /* "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" */ | 
|---|
| 137 | "obsolete error (should not occur)\0"/* Was the above */ | 
|---|
| 138 | /* 60 */ | 
|---|
| 139 | "(*VERB) not recognized or malformed\0" | 
|---|
| 140 | "subpattern number is too big\0" | 
|---|
| 141 | "subpattern name expected\0" | 
|---|
| 142 | "internal error: parsed pattern overflow\0" | 
|---|
| 143 | "non-octal character in \\o{} (closing brace missing?)\0" | 
|---|
| 144 | /* 65 */ | 
|---|
| 145 | "different names for subpatterns of the same number are not allowed\0" | 
|---|
| 146 | "(*MARK) must have an argument\0" | 
|---|
| 147 | "non-hex character in \\x{} (closing brace missing?)\0" | 
|---|
| 148 | #ifndef EBCDIC | 
|---|
| 149 | "\\c must be followed by a printable ASCII character\0" | 
|---|
| 150 | #else | 
|---|
| 151 | "\\c must be followed by a letter or one of [\\]^_?\0" | 
|---|
| 152 | #endif | 
|---|
| 153 | "\\k is not followed by a braced, angle-bracketed, or quoted name\0" | 
|---|
| 154 | /* 70 */ | 
|---|
| 155 | "internal error: unknown meta code in check_lookbehinds()\0" | 
|---|
| 156 | "\\N is not supported in a class\0" | 
|---|
| 157 | "callout string is too long\0" | 
|---|
| 158 | "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0" | 
|---|
| 159 | "using UTF is disabled by the application\0" | 
|---|
| 160 | /* 75 */ | 
|---|
| 161 | "using UCP is disabled by the application\0" | 
|---|
| 162 | "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" | 
|---|
| 163 | "character code point value in \\u.... sequence is too large\0" | 
|---|
| 164 | "digits missing in \\x{} or \\o{} or \\N{U+}\0" | 
|---|
| 165 | "syntax error or number too big in (?(VERSION condition\0" | 
|---|
| 166 | /* 80 */ | 
|---|
| 167 | "internal error: unknown opcode in auto_possessify()\0" | 
|---|
| 168 | "missing terminating delimiter for callout with string argument\0" | 
|---|
| 169 | "unrecognized string delimiter follows (?C\0" | 
|---|
| 170 | "using \\C is disabled by the application\0" | 
|---|
| 171 | "(?| and/or (?J: or (?x: parentheses are too deeply nested\0" | 
|---|
| 172 | /* 85 */ | 
|---|
| 173 | "using \\C is disabled in this PCRE2 library\0" | 
|---|
| 174 | "regular expression is too complicated\0" | 
|---|
| 175 | "lookbehind assertion is too long\0" | 
|---|
| 176 | "pattern string is longer than the limit set by the application\0" | 
|---|
| 177 | "internal error: unknown code in parsed pattern\0" | 
|---|
| 178 | /* 90 */ | 
|---|
| 179 | "internal error: bad code value in parsed_skip()\0" | 
|---|
| 180 | "PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0" | 
|---|
| 181 | "invalid option bits with PCRE2_LITERAL\0" | 
|---|
| 182 | "\\N{U+dddd} is supported only in Unicode (UTF) mode\0" | 
|---|
| 183 | "invalid hyphen in option setting\0" | 
|---|
| 184 | /* 95 */ | 
|---|
| 185 | "(*alpha_assertion) not recognized\0" | 
|---|
| 186 | "script runs require Unicode support, which this version of PCRE2 does not have\0" | 
|---|
| 187 | "too many capturing groups (maximum 65535)\0" | 
|---|
| 188 | "atomic assertion expected after (?( or (?(?C)\0" | 
|---|
| 189 | "\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0" | 
|---|
| 190 | ; | 
|---|
| 191 |  | 
|---|
| 192 | /* Match-time and UTF error texts are in the same format. */ | 
|---|
| 193 |  | 
|---|
| 194 | static const unsigned char match_error_texts[] = | 
|---|
| 195 | "no error\0" | 
|---|
| 196 | "no match\0" | 
|---|
| 197 | "partial match\0" | 
|---|
| 198 | "UTF-8 error: 1 byte missing at end\0" | 
|---|
| 199 | "UTF-8 error: 2 bytes missing at end\0" | 
|---|
| 200 | /* 5 */ | 
|---|
| 201 | "UTF-8 error: 3 bytes missing at end\0" | 
|---|
| 202 | "UTF-8 error: 4 bytes missing at end\0" | 
|---|
| 203 | "UTF-8 error: 5 bytes missing at end\0" | 
|---|
| 204 | "UTF-8 error: byte 2 top bits not 0x80\0" | 
|---|
| 205 | "UTF-8 error: byte 3 top bits not 0x80\0" | 
|---|
| 206 | /* 10 */ | 
|---|
| 207 | "UTF-8 error: byte 4 top bits not 0x80\0" | 
|---|
| 208 | "UTF-8 error: byte 5 top bits not 0x80\0" | 
|---|
| 209 | "UTF-8 error: byte 6 top bits not 0x80\0" | 
|---|
| 210 | "UTF-8 error: 5-byte character is not allowed (RFC 3629)\0" | 
|---|
| 211 | "UTF-8 error: 6-byte character is not allowed (RFC 3629)\0" | 
|---|
| 212 | /* 15 */ | 
|---|
| 213 | "UTF-8 error: code points greater than 0x10ffff are not defined\0" | 
|---|
| 214 | "UTF-8 error: code points 0xd800-0xdfff are not defined\0" | 
|---|
| 215 | "UTF-8 error: overlong 2-byte sequence\0" | 
|---|
| 216 | "UTF-8 error: overlong 3-byte sequence\0" | 
|---|
| 217 | "UTF-8 error: overlong 4-byte sequence\0" | 
|---|
| 218 | /* 20 */ | 
|---|
| 219 | "UTF-8 error: overlong 5-byte sequence\0" | 
|---|
| 220 | "UTF-8 error: overlong 6-byte sequence\0" | 
|---|
| 221 | "UTF-8 error: isolated byte with 0x80 bit set\0" | 
|---|
| 222 | "UTF-8 error: illegal byte (0xfe or 0xff)\0" | 
|---|
| 223 | "UTF-16 error: missing low surrogate at end\0" | 
|---|
| 224 | /* 25 */ | 
|---|
| 225 | "UTF-16 error: invalid low surrogate\0" | 
|---|
| 226 | "UTF-16 error: isolated low surrogate\0" | 
|---|
| 227 | "UTF-32 error: code points 0xd800-0xdfff are not defined\0" | 
|---|
| 228 | "UTF-32 error: code points greater than 0x10ffff are not defined\0" | 
|---|
| 229 | "bad data value\0" | 
|---|
| 230 | /* 30 */ | 
|---|
| 231 | "patterns do not all use the same character tables\0" | 
|---|
| 232 | "magic number missing\0" | 
|---|
| 233 | "pattern compiled in wrong mode: 8/16/32-bit error\0" | 
|---|
| 234 | "bad offset value\0" | 
|---|
| 235 | "bad option value\0" | 
|---|
| 236 | /* 35 */ | 
|---|
| 237 | "invalid replacement string\0" | 
|---|
| 238 | "bad offset into UTF string\0" | 
|---|
| 239 | "callout error code\0"/* Never returned by PCRE2 itself */ | 
|---|
| 240 | "invalid data in workspace for DFA restart\0" | 
|---|
| 241 | "too much recursion for DFA matching\0" | 
|---|
| 242 | /* 40 */ | 
|---|
| 243 | "backreference condition or recursion test is not supported for DFA matching\0" | 
|---|
| 244 | "function is not supported for DFA matching\0" | 
|---|
| 245 | "pattern contains an item that is not supported for DFA matching\0" | 
|---|
| 246 | "workspace size exceeded in DFA matching\0" | 
|---|
| 247 | "internal error - pattern overwritten?\0" | 
|---|
| 248 | /* 45 */ | 
|---|
| 249 | "bad JIT option\0" | 
|---|
| 250 | "JIT stack limit reached\0" | 
|---|
| 251 | "match limit exceeded\0" | 
|---|
| 252 | "no more memory\0" | 
|---|
| 253 | "unknown substring\0" | 
|---|
| 254 | /* 50 */ | 
|---|
| 255 | "non-unique substring name\0" | 
|---|
| 256 | "NULL argument passed with non-zero length\0" | 
|---|
| 257 | "nested recursion at the same subject position\0" | 
|---|
| 258 | "matching depth limit exceeded\0" | 
|---|
| 259 | "requested value is not available\0" | 
|---|
| 260 | /* 55 */ | 
|---|
| 261 | "requested value is not set\0" | 
|---|
| 262 | "offset limit set without PCRE2_USE_OFFSET_LIMIT\0" | 
|---|
| 263 | "bad escape sequence in replacement string\0" | 
|---|
| 264 | "expected closing curly bracket in replacement string\0" | 
|---|
| 265 | "bad substitution in replacement string\0" | 
|---|
| 266 | /* 60 */ | 
|---|
| 267 | "match with end before start or start moved backwards is not supported\0" | 
|---|
| 268 | "too many replacements (more than INT_MAX)\0" | 
|---|
| 269 | "bad serialized data\0" | 
|---|
| 270 | "heap limit exceeded\0" | 
|---|
| 271 | "invalid syntax\0" | 
|---|
| 272 | /* 65 */ | 
|---|
| 273 | "internal error - duplicate substitution match\0" | 
|---|
| 274 | "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0" | 
|---|
| 275 | ; | 
|---|
| 276 |  | 
|---|
| 277 |  | 
|---|
| 278 | /************************************************* | 
|---|
| 279 | *            Return error message                * | 
|---|
| 280 | *************************************************/ | 
|---|
| 281 |  | 
|---|
| 282 | /* This function copies an error message into a buffer whose units are of an | 
|---|
| 283 | appropriate width. Error numbers are positive for compile-time errors, and | 
|---|
| 284 | negative for match-time errors (except for UTF errors), but the numbers are all | 
|---|
| 285 | distinct. | 
|---|
| 286 |  | 
|---|
| 287 | Arguments: | 
|---|
| 288 | enumber       error number | 
|---|
| 289 | buffer        where to put the message (zero terminated) | 
|---|
| 290 | size          size of the buffer in code units | 
|---|
| 291 |  | 
|---|
| 292 | Returns:        length of message if all is well | 
|---|
| 293 | negative on error | 
|---|
| 294 | */ | 
|---|
| 295 |  | 
|---|
| 296 | PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION | 
|---|
| 297 | pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, PCRE2_SIZE size) | 
|---|
| 298 | { | 
|---|
| 299 | const unsigned char *message; | 
|---|
| 300 | PCRE2_SIZE i; | 
|---|
| 301 | int n; | 
|---|
| 302 |  | 
|---|
| 303 | if (size == 0) return PCRE2_ERROR_NOMEMORY; | 
|---|
| 304 |  | 
|---|
| 305 | if (enumber >= COMPILE_ERROR_BASE)  /* Compile error */ | 
|---|
| 306 | { | 
|---|
| 307 | message = compile_error_texts; | 
|---|
| 308 | n = enumber - COMPILE_ERROR_BASE; | 
|---|
| 309 | } | 
|---|
| 310 | else if (enumber < 0)               /* Match or UTF error */ | 
|---|
| 311 | { | 
|---|
| 312 | message = match_error_texts; | 
|---|
| 313 | n = -enumber; | 
|---|
| 314 | } | 
|---|
| 315 | else                                /* Invalid error number */ | 
|---|
| 316 | { | 
|---|
| 317 | message = (unsigned char *) "\0";  /* Empty message list */ | 
|---|
| 318 | n = 1; | 
|---|
| 319 | } | 
|---|
| 320 |  | 
|---|
| 321 | for (; n > 0; n--) | 
|---|
| 322 | { | 
|---|
| 323 | while (*message++ != CHAR_NUL) {}; | 
|---|
| 324 | if (*message == CHAR_NUL) return PCRE2_ERROR_BADDATA; | 
|---|
| 325 | } | 
|---|
| 326 |  | 
|---|
| 327 | for (i = 0; *message != 0; i++) | 
|---|
| 328 | { | 
|---|
| 329 | if (i >= size - 1) | 
|---|
| 330 | { | 
|---|
| 331 | buffer[i] = 0;     /* Terminate partial message */ | 
|---|
| 332 | return PCRE2_ERROR_NOMEMORY; | 
|---|
| 333 | } | 
|---|
| 334 | buffer[i] = *message++; | 
|---|
| 335 | } | 
|---|
| 336 |  | 
|---|
| 337 | buffer[i] = 0; | 
|---|
| 338 | return (int)i; | 
|---|
| 339 | } | 
|---|
| 340 |  | 
|---|
| 341 | /* End of pcre2_error.c */ | 
|---|
| 342 |  | 
|---|