1 | /************************************************* |
2 | * Perl-Compatible Regular Expressions * |
3 | *************************************************/ |
4 | |
5 | /* PCRE is a library of functions to support regular expressions whose syntax |
6 | and semantics are as close as possible to those of the Perl 5 language. |
7 | |
8 | Written by Philip Hazel |
9 | Copyright (c) 1997-2014 University of Cambridge |
10 | |
11 | ----------------------------------------------------------------------------- |
12 | Redistribution and use in source and binary forms, with or without |
13 | modification, are permitted provided that the following conditions are met: |
14 | |
15 | * Redistributions of source code must retain the above copyright notice, |
16 | this list of conditions and the following disclaimer. |
17 | |
18 | * Redistributions in binary form must reproduce the above copyright |
19 | notice, this list of conditions and the following disclaimer in the |
20 | documentation and/or other materials provided with the distribution. |
21 | |
22 | * Neither the name of the University of Cambridge nor the names of its |
23 | contributors may be used to endorse or promote products derived from |
24 | this software without specific prior written permission. |
25 | |
26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
27 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
28 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
29 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
30 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
31 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
32 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
33 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
34 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
35 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
36 | POSSIBILITY OF SUCH DAMAGE. |
37 | ----------------------------------------------------------------------------- |
38 | */ |
39 | |
40 | |
41 | /* This module contains an internal function that tests a compiled pattern to |
42 | see if it was compiled with the opposite endianness. If so, it uses an |
43 | auxiliary local function to flip the appropriate bytes. */ |
44 | |
45 | #include "pcre_config.h" |
46 | #include "pcre_internal.h" |
47 | |
48 | |
49 | /************************************************* |
50 | * Swap byte functions * |
51 | *************************************************/ |
52 | |
53 | /* The following functions swap the bytes of a pcre_uint16 |
54 | and pcre_uint32 value. |
55 | |
56 | Arguments: |
57 | value any number |
58 | |
59 | Returns: the byte swapped value |
60 | */ |
61 | |
62 | static pcre_uint32 |
63 | swap_uint32(pcre_uint32 value) |
64 | { |
65 | return ((value & 0x000000ff) << 24) | |
66 | ((value & 0x0000ff00) << 8) | |
67 | ((value & 0x00ff0000) >> 8) | |
68 | (value >> 24); |
69 | } |
70 | |
71 | static pcre_uint16 |
72 | swap_uint16(pcre_uint16 value) |
73 | { |
74 | return (value >> 8) | (value << 8); |
75 | } |
76 | |
77 | |
78 | /************************************************* |
79 | * Test for a byte-flipped compiled regex * |
80 | *************************************************/ |
81 | |
82 | /* This function swaps the bytes of a compiled pattern usually |
83 | loaded form the disk. It also sets the tables pointer, which |
84 | is likely an invalid pointer after reload. |
85 | |
86 | Arguments: |
87 | argument_re points to the compiled expression |
88 | extra_data points to extra data or is NULL |
89 | tables points to the character tables or NULL |
90 | |
91 | Returns: 0 if the swap is successful, negative on error |
92 | */ |
93 | |
94 | #if defined COMPILE_PCRE8 |
95 | PCRE_EXP_DECL int pcre_pattern_to_host_byte_order(pcre *argument_re, |
96 | pcre_extra *, const unsigned char *tables) |
97 | #elif defined COMPILE_PCRE16 |
98 | PCRE_EXP_DECL int pcre16_pattern_to_host_byte_order(pcre16 *argument_re, |
99 | pcre16_extra *extra_data, const unsigned char *tables) |
100 | #elif defined COMPILE_PCRE32 |
101 | PCRE_EXP_DECL int pcre32_pattern_to_host_byte_order(pcre32 *argument_re, |
102 | pcre32_extra *extra_data, const unsigned char *tables) |
103 | #endif |
104 | { |
105 | REAL_PCRE *re = (REAL_PCRE *)argument_re; |
106 | pcre_study_data *study; |
107 | #ifndef COMPILE_PCRE8 |
108 | pcre_uchar *ptr; |
109 | int length; |
110 | #if defined SUPPORT_UTF && defined COMPILE_PCRE16 |
111 | BOOL utf; |
112 | BOOL utf16_char; |
113 | #endif /* SUPPORT_UTF && COMPILE_PCRE16 */ |
114 | #endif /* !COMPILE_PCRE8 */ |
115 | |
116 | if (re == NULL) return PCRE_ERROR_NULL; |
117 | if (re->magic_number == MAGIC_NUMBER) |
118 | { |
119 | if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; |
120 | re->tables = tables; |
121 | return 0; |
122 | } |
123 | |
124 | if (re->magic_number != REVERSED_MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
125 | if ((swap_uint32(re->flags) & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE; |
126 | |
127 | re->magic_number = MAGIC_NUMBER; |
128 | re->size = swap_uint32(re->size); |
129 | re->options = swap_uint32(re->options); |
130 | re->flags = swap_uint32(re->flags); |
131 | re->limit_match = swap_uint32(re->limit_match); |
132 | re->limit_recursion = swap_uint32(re->limit_recursion); |
133 | |
134 | #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 |
135 | re->first_char = swap_uint16(re->first_char); |
136 | re->req_char = swap_uint16(re->req_char); |
137 | #elif defined COMPILE_PCRE32 |
138 | re->first_char = swap_uint32(re->first_char); |
139 | re->req_char = swap_uint32(re->req_char); |
140 | #endif |
141 | |
142 | re->max_lookbehind = swap_uint16(re->max_lookbehind); |
143 | re->top_bracket = swap_uint16(re->top_bracket); |
144 | re->top_backref = swap_uint16(re->top_backref); |
145 | re->name_table_offset = swap_uint16(re->name_table_offset); |
146 | re->name_entry_size = swap_uint16(re->name_entry_size); |
147 | re->name_count = swap_uint16(re->name_count); |
148 | re->ref_count = swap_uint16(re->ref_count); |
149 | re->tables = tables; |
150 | |
151 | if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0) |
152 | { |
153 | study = (pcre_study_data *)extra_data->study_data; |
154 | study->size = swap_uint32(study->size); |
155 | study->flags = swap_uint32(study->flags); |
156 | study->minlength = swap_uint32(study->minlength); |
157 | } |
158 | |
159 | #ifndef COMPILE_PCRE8 |
160 | ptr = (pcre_uchar *)re + re->name_table_offset; |
161 | length = re->name_count * re->name_entry_size; |
162 | #if defined SUPPORT_UTF && defined COMPILE_PCRE16 |
163 | utf = (re->options & PCRE_UTF16) != 0; |
164 | utf16_char = FALSE; |
165 | #endif /* SUPPORT_UTF && COMPILE_PCRE16 */ |
166 | |
167 | while(TRUE) |
168 | { |
169 | /* Swap previous characters. */ |
170 | while (length-- > 0) |
171 | { |
172 | #if defined COMPILE_PCRE16 |
173 | *ptr = swap_uint16(*ptr); |
174 | #elif defined COMPILE_PCRE32 |
175 | *ptr = swap_uint32(*ptr); |
176 | #endif |
177 | ptr++; |
178 | } |
179 | #if defined SUPPORT_UTF && defined COMPILE_PCRE16 |
180 | if (utf16_char) |
181 | { |
182 | if (HAS_EXTRALEN(ptr[-1])) |
183 | { |
184 | /* We know that there is only one extra character in UTF-16. */ |
185 | *ptr = swap_uint16(*ptr); |
186 | ptr++; |
187 | } |
188 | } |
189 | utf16_char = FALSE; |
190 | #endif /* SUPPORT_UTF */ |
191 | |
192 | /* Get next opcode. */ |
193 | length = 0; |
194 | #if defined COMPILE_PCRE16 |
195 | *ptr = swap_uint16(*ptr); |
196 | #elif defined COMPILE_PCRE32 |
197 | *ptr = swap_uint32(*ptr); |
198 | #endif |
199 | switch (*ptr) |
200 | { |
201 | case OP_END: |
202 | return 0; |
203 | |
204 | #if defined SUPPORT_UTF && defined COMPILE_PCRE16 |
205 | case OP_CHAR: |
206 | case OP_CHARI: |
207 | case OP_NOT: |
208 | case OP_NOTI: |
209 | case OP_STAR: |
210 | case OP_MINSTAR: |
211 | case OP_PLUS: |
212 | case OP_MINPLUS: |
213 | case OP_QUERY: |
214 | case OP_MINQUERY: |
215 | case OP_UPTO: |
216 | case OP_MINUPTO: |
217 | case OP_EXACT: |
218 | case OP_POSSTAR: |
219 | case OP_POSPLUS: |
220 | case OP_POSQUERY: |
221 | case OP_POSUPTO: |
222 | case OP_STARI: |
223 | case OP_MINSTARI: |
224 | case OP_PLUSI: |
225 | case OP_MINPLUSI: |
226 | case OP_QUERYI: |
227 | case OP_MINQUERYI: |
228 | case OP_UPTOI: |
229 | case OP_MINUPTOI: |
230 | case OP_EXACTI: |
231 | case OP_POSSTARI: |
232 | case OP_POSPLUSI: |
233 | case OP_POSQUERYI: |
234 | case OP_POSUPTOI: |
235 | case OP_NOTSTAR: |
236 | case OP_NOTMINSTAR: |
237 | case OP_NOTPLUS: |
238 | case OP_NOTMINPLUS: |
239 | case OP_NOTQUERY: |
240 | case OP_NOTMINQUERY: |
241 | case OP_NOTUPTO: |
242 | case OP_NOTMINUPTO: |
243 | case OP_NOTEXACT: |
244 | case OP_NOTPOSSTAR: |
245 | case OP_NOTPOSPLUS: |
246 | case OP_NOTPOSQUERY: |
247 | case OP_NOTPOSUPTO: |
248 | case OP_NOTSTARI: |
249 | case OP_NOTMINSTARI: |
250 | case OP_NOTPLUSI: |
251 | case OP_NOTMINPLUSI: |
252 | case OP_NOTQUERYI: |
253 | case OP_NOTMINQUERYI: |
254 | case OP_NOTUPTOI: |
255 | case OP_NOTMINUPTOI: |
256 | case OP_NOTEXACTI: |
257 | case OP_NOTPOSSTARI: |
258 | case OP_NOTPOSPLUSI: |
259 | case OP_NOTPOSQUERYI: |
260 | case OP_NOTPOSUPTOI: |
261 | if (utf) utf16_char = TRUE; |
262 | #endif |
263 | /* Fall through. */ |
264 | |
265 | default: |
266 | length = PRIV(OP_lengths)[*ptr] - 1; |
267 | break; |
268 | |
269 | case OP_CLASS: |
270 | case OP_NCLASS: |
271 | /* Skip the character bit map. */ |
272 | ptr += 32/sizeof(pcre_uchar); |
273 | length = 0; |
274 | break; |
275 | |
276 | case OP_XCLASS: |
277 | /* Reverse the size of the XCLASS instance. */ |
278 | ptr++; |
279 | #if defined COMPILE_PCRE16 |
280 | *ptr = swap_uint16(*ptr); |
281 | #elif defined COMPILE_PCRE32 |
282 | *ptr = swap_uint32(*ptr); |
283 | #endif |
284 | #ifndef COMPILE_PCRE32 |
285 | if (LINK_SIZE > 1) |
286 | { |
287 | /* LINK_SIZE can be 1 or 2 in 16 bit mode. */ |
288 | ptr++; |
289 | *ptr = swap_uint16(*ptr); |
290 | } |
291 | #endif |
292 | ptr++; |
293 | length = (GET(ptr, -LINK_SIZE)) - (1 + LINK_SIZE + 1); |
294 | #if defined COMPILE_PCRE16 |
295 | *ptr = swap_uint16(*ptr); |
296 | #elif defined COMPILE_PCRE32 |
297 | *ptr = swap_uint32(*ptr); |
298 | #endif |
299 | if ((*ptr & XCL_MAP) != 0) |
300 | { |
301 | /* Skip the character bit map. */ |
302 | ptr += 32/sizeof(pcre_uchar); |
303 | length -= 32/sizeof(pcre_uchar); |
304 | } |
305 | break; |
306 | } |
307 | ptr++; |
308 | } |
309 | /* Control should never reach here in 16/32 bit mode. */ |
310 | #else /* In 8-bit mode, the pattern does not need to be processed. */ |
311 | return 0; |
312 | #endif /* !COMPILE_PCRE8 */ |
313 | } |
314 | |
315 | /* End of pcre_byte_order.c */ |
316 | |