1 | /************************************************* |
2 | * Perl-Compatible Regular Expressions * |
3 | *************************************************/ |
4 | |
5 | /* PCRE is a library of functions to support regular expressions whose syntax |
6 | and semantics are as close as possible to those of the Perl 5 language. |
7 | |
8 | Written by Philip Hazel |
9 | Original API code Copyright (c) 1997-2012 University of Cambridge |
10 | New API code Copyright (c) 2016-2020 University of Cambridge |
11 | |
12 | ----------------------------------------------------------------------------- |
13 | Redistribution and use in source and binary forms, with or without |
14 | modification, are permitted provided that the following conditions are met: |
15 | |
16 | * Redistributions of source code must retain the above copyright notice, |
17 | this list of conditions and the following disclaimer. |
18 | |
19 | * Redistributions in binary form must reproduce the above copyright |
20 | notice, this list of conditions and the following disclaimer in the |
21 | documentation and/or other materials provided with the distribution. |
22 | |
23 | * Neither the name of the University of Cambridge nor the names of its |
24 | contributors may be used to endorse or promote products derived from |
25 | this software without specific prior written permission. |
26 | |
27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
37 | POSSIBILITY OF SUCH DAMAGE. |
38 | ----------------------------------------------------------------------------- |
39 | */ |
40 | |
41 | |
42 | /* This module contains the external function pcre2_maketables(), which builds |
43 | character tables for PCRE2 in the current locale. The file is compiled on its |
44 | own as part of the PCRE2 library. It is also included in the compilation of |
45 | pcre2_dftables.c as a freestanding program, in which case the macro |
46 | PCRE2_DFTABLES is defined. */ |
47 | |
48 | #ifndef PCRE2_DFTABLES /* Compiling the library */ |
49 | # ifdef HAVE_CONFIG_H |
50 | # include "config.h" |
51 | # endif |
52 | # include "pcre2_internal.h" |
53 | #endif |
54 | |
55 | |
56 | |
57 | /************************************************* |
58 | * Create PCRE2 character tables * |
59 | *************************************************/ |
60 | |
61 | /* This function builds a set of character tables for use by PCRE2 and returns |
62 | a pointer to them. They are build using the ctype functions, and consequently |
63 | their contents will depend upon the current locale setting. When compiled as |
64 | part of the library, the store is obtained via a general context malloc, if |
65 | supplied, but when PCRE2_DFTABLES is defined (when compiling the pcre2_dftables |
66 | freestanding auxiliary program) malloc() is used, and the function has a |
67 | different name so as not to clash with the prototype in pcre2.h. |
68 | |
69 | Arguments: none when PCRE2_DFTABLES is defined |
70 | else a PCRE2 general context or NULL |
71 | Returns: pointer to the contiguous block of data |
72 | else NULL if memory allocation failed |
73 | */ |
74 | |
75 | #ifdef PCRE2_DFTABLES /* Included in freestanding pcre2_dftables program */ |
76 | static const uint8_t *maketables(void) |
77 | { |
78 | uint8_t *yield = (uint8_t *)malloc(TABLES_LENGTH); |
79 | |
80 | #else /* Not PCRE2_DFTABLES, that is, compiling the library */ |
81 | PCRE2_EXP_DEFN const uint8_t * PCRE2_CALL_CONVENTION |
82 | pcre2_maketables(pcre2_general_context *gcontext) |
83 | { |
84 | uint8_t *yield = (uint8_t *)((gcontext != NULL)? |
85 | gcontext->memctl.malloc(TABLES_LENGTH, gcontext->memctl.memory_data) : |
86 | malloc(TABLES_LENGTH)); |
87 | #endif /* PCRE2_DFTABLES */ |
88 | |
89 | int i; |
90 | uint8_t *p; |
91 | |
92 | if (yield == NULL) return NULL; |
93 | p = yield; |
94 | |
95 | /* First comes the lower casing table */ |
96 | |
97 | for (i = 0; i < 256; i++) *p++ = tolower(i); |
98 | |
99 | /* Next the case-flipping table */ |
100 | |
101 | for (i = 0; i < 256; i++) *p++ = islower(i)? toupper(i) : tolower(i); |
102 | |
103 | /* Then the character class tables. Don't try to be clever and save effort on |
104 | exclusive ones - in some locales things may be different. |
105 | |
106 | Note that the table for "space" includes everything "isspace" gives, including |
107 | VT in the default locale. This makes it work for the POSIX class [:space:]. |
108 | From PCRE1 release 8.34 and for all PCRE2 releases it is also correct for Perl |
109 | space, because Perl added VT at release 5.18. |
110 | |
111 | Note also that it is possible for a character to be alnum or alpha without |
112 | being lower or upper, such as "male and female ordinals" (\xAA and \xBA) in the |
113 | fr_FR locale (at least under Debian Linux's locales as of 12/2005). So we must |
114 | test for alnum specially. */ |
115 | |
116 | memset(p, 0, cbit_length); |
117 | for (i = 0; i < 256; i++) |
118 | { |
119 | if (isdigit(i)) p[cbit_digit + i/8] |= 1u << (i&7); |
120 | if (isupper(i)) p[cbit_upper + i/8] |= 1u << (i&7); |
121 | if (islower(i)) p[cbit_lower + i/8] |= 1u << (i&7); |
122 | if (isalnum(i)) p[cbit_word + i/8] |= 1u << (i&7); |
123 | if (i == '_') p[cbit_word + i/8] |= 1u << (i&7); |
124 | if (isspace(i)) p[cbit_space + i/8] |= 1u << (i&7); |
125 | if (isxdigit(i)) p[cbit_xdigit + i/8] |= 1u << (i&7); |
126 | if (isgraph(i)) p[cbit_graph + i/8] |= 1u << (i&7); |
127 | if (isprint(i)) p[cbit_print + i/8] |= 1u << (i&7); |
128 | if (ispunct(i)) p[cbit_punct + i/8] |= 1u << (i&7); |
129 | if (iscntrl(i)) p[cbit_cntrl + i/8] |= 1u << (i&7); |
130 | } |
131 | p += cbit_length; |
132 | |
133 | /* Finally, the character type table. In this, we used to exclude VT from the |
134 | white space chars, because Perl didn't recognize it as such for \s and for |
135 | comments within regexes. However, Perl changed at release 5.18, so PCRE1 |
136 | changed at release 8.34 and it's always been this way for PCRE2. */ |
137 | |
138 | for (i = 0; i < 256; i++) |
139 | { |
140 | int x = 0; |
141 | if (isspace(i)) x += ctype_space; |
142 | if (isalpha(i)) x += ctype_letter; |
143 | if (islower(i)) x += ctype_lcletter; |
144 | if (isdigit(i)) x += ctype_digit; |
145 | if (isalnum(i) || i == '_') x += ctype_word; |
146 | *p++ = x; |
147 | } |
148 | |
149 | return yield; |
150 | } |
151 | |
152 | #ifndef PCRE2_DFTABLES /* Compiling the library */ |
153 | PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION |
154 | pcre2_maketables_free(pcre2_general_context *gcontext, const uint8_t *tables) |
155 | { |
156 | if (gcontext) |
157 | gcontext->memctl.free((void *)tables, gcontext->memctl.memory_data); |
158 | else |
159 | free((void *)tables); |
160 | } |
161 | #endif |
162 | |
163 | /* End of pcre2_maketables.c */ |
164 | |