1 | /************************************************* |
2 | * Perl-Compatible Regular Expressions * |
3 | *************************************************/ |
4 | |
5 | /* PCRE is a library of functions to support regular expressions whose syntax |
6 | and semantics are as close as possible to those of the Perl 5 language. |
7 | |
8 | Written by Philip Hazel |
9 | This module by Zoltan Herczeg |
10 | Original API code Copyright (c) 1997-2012 University of Cambridge |
11 | New API code Copyright (c) 2016-2019 University of Cambridge |
12 | |
13 | ----------------------------------------------------------------------------- |
14 | Redistribution and use in source and binary forms, with or without |
15 | modification, are permitted provided that the following conditions are met: |
16 | |
17 | * Redistributions of source code must retain the above copyright notice, |
18 | this list of conditions and the following disclaimer. |
19 | |
20 | * Redistributions in binary form must reproduce the above copyright |
21 | notice, this list of conditions and the following disclaimer in the |
22 | documentation and/or other materials provided with the distribution. |
23 | |
24 | * Neither the name of the University of Cambridge nor the names of its |
25 | contributors may be used to endorse or promote products derived from |
26 | this software without specific prior written permission. |
27 | |
28 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
29 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
30 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
31 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
32 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
33 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
34 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
35 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
36 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | POSSIBILITY OF SUCH DAMAGE. |
39 | ----------------------------------------------------------------------------- |
40 | */ |
41 | |
42 | #if !(defined SUPPORT_VALGRIND) |
43 | |
44 | #if ((defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ |
45 | || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)) |
46 | |
47 | typedef enum { |
48 | vector_compare_match1, |
49 | vector_compare_match1i, |
50 | vector_compare_match2, |
51 | } vector_compare_type; |
52 | |
53 | static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void) |
54 | { |
55 | #if PCRE2_CODE_UNIT_WIDTH == 8 |
56 | return 15; |
57 | #elif PCRE2_CODE_UNIT_WIDTH == 16 |
58 | return 7; |
59 | #elif PCRE2_CODE_UNIT_WIDTH == 32 |
60 | return 3; |
61 | #else |
62 | #error "Unsupported unit width" |
63 | #endif |
64 | } |
65 | |
66 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
67 | static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg) |
68 | { |
69 | #if PCRE2_CODE_UNIT_WIDTH == 8 |
70 | OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0); |
71 | return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80); |
72 | #elif PCRE2_CODE_UNIT_WIDTH == 16 |
73 | OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00); |
74 | return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00); |
75 | #else |
76 | #error "Unknown code width" |
77 | #endif |
78 | } |
79 | #endif |
80 | |
81 | #endif /* SLJIT_CONFIG_X86 || SLJIT_CONFIG_S390X */ |
82 | |
83 | #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) |
84 | |
85 | static sljit_s32 character_to_int32(PCRE2_UCHAR chr) |
86 | { |
87 | sljit_u32 value = chr; |
88 | #if PCRE2_CODE_UNIT_WIDTH == 8 |
89 | #define SSE2_COMPARE_TYPE_INDEX 0 |
90 | return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value); |
91 | #elif PCRE2_CODE_UNIT_WIDTH == 16 |
92 | #define SSE2_COMPARE_TYPE_INDEX 1 |
93 | return (sljit_s32)((value << 16) | value); |
94 | #elif PCRE2_CODE_UNIT_WIDTH == 32 |
95 | #define SSE2_COMPARE_TYPE_INDEX 2 |
96 | return (sljit_s32)(value); |
97 | #else |
98 | #error "Unsupported unit width" |
99 | #endif |
100 | } |
101 | |
102 | static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg, sljit_s8 offset) |
103 | { |
104 | sljit_u8 instruction[5]; |
105 | |
106 | SLJIT_ASSERT(dst_xmm_reg < 8); |
107 | SLJIT_ASSERT(src_general_reg < 8); |
108 | |
109 | /* MOVDQA xmm1, xmm2/m128 */ |
110 | instruction[0] = ((sljit_u8)offset & 0xf) == 0 ? 0x66 : 0xf3; |
111 | instruction[1] = 0x0f; |
112 | instruction[2] = 0x6f; |
113 | |
114 | if (offset == 0) |
115 | { |
116 | instruction[3] = (dst_xmm_reg << 3) | src_general_reg; |
117 | sljit_emit_op_custom(compiler, instruction, 4); |
118 | return; |
119 | } |
120 | |
121 | instruction[3] = 0x40 | (dst_xmm_reg << 3) | src_general_reg; |
122 | instruction[4] = (sljit_u8)offset; |
123 | sljit_emit_op_custom(compiler, instruction, 5); |
124 | } |
125 | |
126 | static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type, |
127 | int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) |
128 | { |
129 | sljit_u8 instruction[4]; |
130 | instruction[0] = 0x66; |
131 | instruction[1] = 0x0f; |
132 | |
133 | SLJIT_ASSERT(step >= 0 && step <= 3); |
134 | |
135 | if (compare_type != vector_compare_match2) |
136 | { |
137 | if (step == 0) |
138 | { |
139 | if (compare_type == vector_compare_match1i) |
140 | { |
141 | /* POR xmm1, xmm2/m128 */ |
142 | /* instruction[0] = 0x66; */ |
143 | /* instruction[1] = 0x0f; */ |
144 | instruction[2] = 0xeb; |
145 | instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind; |
146 | sljit_emit_op_custom(compiler, instruction, 4); |
147 | } |
148 | return; |
149 | } |
150 | |
151 | if (step != 2) |
152 | return; |
153 | |
154 | /* PCMPEQB/W/D xmm1, xmm2/m128 */ |
155 | /* instruction[0] = 0x66; */ |
156 | /* instruction[1] = 0x0f; */ |
157 | instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; |
158 | instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; |
159 | sljit_emit_op_custom(compiler, instruction, 4); |
160 | return; |
161 | } |
162 | |
163 | switch (step) |
164 | { |
165 | case 0: |
166 | /* MOVDQA xmm1, xmm2/m128 */ |
167 | /* instruction[0] = 0x66; */ |
168 | /* instruction[1] = 0x0f; */ |
169 | instruction[2] = 0x6f; |
170 | instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind; |
171 | sljit_emit_op_custom(compiler, instruction, 4); |
172 | return; |
173 | |
174 | case 1: |
175 | /* PCMPEQB/W/D xmm1, xmm2/m128 */ |
176 | /* instruction[0] = 0x66; */ |
177 | /* instruction[1] = 0x0f; */ |
178 | instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; |
179 | instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; |
180 | sljit_emit_op_custom(compiler, instruction, 4); |
181 | return; |
182 | |
183 | case 2: |
184 | /* PCMPEQB/W/D xmm1, xmm2/m128 */ |
185 | /* instruction[0] = 0x66; */ |
186 | /* instruction[1] = 0x0f; */ |
187 | instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; |
188 | instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind; |
189 | sljit_emit_op_custom(compiler, instruction, 4); |
190 | return; |
191 | |
192 | case 3: |
193 | /* POR xmm1, xmm2/m128 */ |
194 | /* instruction[0] = 0x66; */ |
195 | /* instruction[1] = 0x0f; */ |
196 | instruction[2] = 0xeb; |
197 | instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind; |
198 | sljit_emit_op_custom(compiler, instruction, 4); |
199 | return; |
200 | } |
201 | } |
202 | |
203 | #define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) |
204 | |
205 | static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) |
206 | { |
207 | DEFINE_COMPILER; |
208 | sljit_u8 instruction[8]; |
209 | struct sljit_label *start; |
210 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
211 | struct sljit_label *restart; |
212 | #endif |
213 | struct sljit_jump *quit; |
214 | struct sljit_jump *partial_quit[2]; |
215 | vector_compare_type compare_type = vector_compare_match1; |
216 | sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); |
217 | sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); |
218 | sljit_s32 data_ind = 0; |
219 | sljit_s32 tmp_ind = 1; |
220 | sljit_s32 cmp1_ind = 2; |
221 | sljit_s32 cmp2_ind = 3; |
222 | sljit_u32 bit = 0; |
223 | int i; |
224 | |
225 | SLJIT_UNUSED_ARG(offset); |
226 | |
227 | if (char1 != char2) |
228 | { |
229 | bit = char1 ^ char2; |
230 | compare_type = vector_compare_match1i; |
231 | |
232 | if (!is_powerof2(bit)) |
233 | { |
234 | bit = 0; |
235 | compare_type = vector_compare_match2; |
236 | } |
237 | } |
238 | |
239 | partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
240 | if (common->mode == PCRE2_JIT_COMPLETE) |
241 | add_jump(compiler, &common->failed_match, partial_quit[0]); |
242 | |
243 | /* First part (unaligned start) */ |
244 | |
245 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); |
246 | |
247 | SLJIT_ASSERT(tmp1_reg_ind < 8); |
248 | |
249 | /* MOVD xmm, r/m32 */ |
250 | instruction[0] = 0x66; |
251 | instruction[1] = 0x0f; |
252 | instruction[2] = 0x6e; |
253 | instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind; |
254 | sljit_emit_op_custom(compiler, instruction, 4); |
255 | |
256 | if (char1 != char2) |
257 | { |
258 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); |
259 | |
260 | /* MOVD xmm, r/m32 */ |
261 | instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind; |
262 | sljit_emit_op_custom(compiler, instruction, 4); |
263 | } |
264 | |
265 | OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); |
266 | |
267 | /* PSHUFD xmm1, xmm2/m128, imm8 */ |
268 | /* instruction[0] = 0x66; */ |
269 | /* instruction[1] = 0x0f; */ |
270 | instruction[2] = 0x70; |
271 | instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind; |
272 | instruction[4] = 0; |
273 | sljit_emit_op_custom(compiler, instruction, 5); |
274 | |
275 | if (char1 != char2) |
276 | { |
277 | /* PSHUFD xmm1, xmm2/m128, imm8 */ |
278 | instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind; |
279 | sljit_emit_op_custom(compiler, instruction, 5); |
280 | } |
281 | |
282 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
283 | restart = LABEL(); |
284 | #endif |
285 | OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); |
286 | OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); |
287 | |
288 | load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); |
289 | for (i = 0; i < 4; i++) |
290 | fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
291 | |
292 | /* PMOVMSKB reg, xmm */ |
293 | /* instruction[0] = 0x66; */ |
294 | /* instruction[1] = 0x0f; */ |
295 | instruction[2] = 0xd7; |
296 | instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; |
297 | sljit_emit_op_custom(compiler, instruction, 4); |
298 | |
299 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
300 | OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); |
301 | |
302 | quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); |
303 | |
304 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
305 | |
306 | /* Second part (aligned) */ |
307 | start = LABEL(); |
308 | |
309 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); |
310 | |
311 | partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
312 | if (common->mode == PCRE2_JIT_COMPLETE) |
313 | add_jump(compiler, &common->failed_match, partial_quit[1]); |
314 | |
315 | load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); |
316 | for (i = 0; i < 4; i++) |
317 | fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
318 | |
319 | /* PMOVMSKB reg, xmm */ |
320 | /* instruction[0] = 0x66; */ |
321 | /* instruction[1] = 0x0f; */ |
322 | instruction[2] = 0xd7; |
323 | instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; |
324 | sljit_emit_op_custom(compiler, instruction, 4); |
325 | |
326 | CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); |
327 | |
328 | JUMPHERE(quit); |
329 | |
330 | /* BSF r32, r/m32 */ |
331 | instruction[0] = 0x0f; |
332 | instruction[1] = 0xbc; |
333 | instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; |
334 | sljit_emit_op_custom(compiler, instruction, 3); |
335 | |
336 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
337 | |
338 | if (common->mode != PCRE2_JIT_COMPLETE) |
339 | { |
340 | JUMPHERE(partial_quit[0]); |
341 | JUMPHERE(partial_quit[1]); |
342 | OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); |
343 | CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); |
344 | } |
345 | else |
346 | add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
347 | |
348 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
349 | if (common->utf && offset > 0) |
350 | { |
351 | SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); |
352 | |
353 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); |
354 | |
355 | quit = jump_if_utf_char_start(compiler, TMP1); |
356 | |
357 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
358 | add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
359 | OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); |
360 | JUMPTO(SLJIT_JUMP, restart); |
361 | |
362 | JUMPHERE(quit); |
363 | } |
364 | #endif |
365 | } |
366 | |
367 | #define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) |
368 | |
369 | static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) |
370 | { |
371 | DEFINE_COMPILER; |
372 | sljit_u8 instruction[8]; |
373 | struct sljit_label *start; |
374 | struct sljit_jump *quit; |
375 | jump_list *not_found = NULL; |
376 | vector_compare_type compare_type = vector_compare_match1; |
377 | sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); |
378 | sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); |
379 | sljit_s32 data_ind = 0; |
380 | sljit_s32 tmp_ind = 1; |
381 | sljit_s32 cmp1_ind = 2; |
382 | sljit_s32 cmp2_ind = 3; |
383 | sljit_u32 bit = 0; |
384 | int i; |
385 | |
386 | if (char1 != char2) |
387 | { |
388 | bit = char1 ^ char2; |
389 | compare_type = vector_compare_match1i; |
390 | |
391 | if (!is_powerof2(bit)) |
392 | { |
393 | bit = 0; |
394 | compare_type = vector_compare_match2; |
395 | } |
396 | } |
397 | |
398 | add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); |
399 | OP1(SLJIT_MOV, TMP2, 0, TMP1, 0); |
400 | OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); |
401 | |
402 | /* First part (unaligned start) */ |
403 | |
404 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); |
405 | |
406 | SLJIT_ASSERT(tmp1_reg_ind < 8); |
407 | |
408 | /* MOVD xmm, r/m32 */ |
409 | instruction[0] = 0x66; |
410 | instruction[1] = 0x0f; |
411 | instruction[2] = 0x6e; |
412 | instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind; |
413 | sljit_emit_op_custom(compiler, instruction, 4); |
414 | |
415 | if (char1 != char2) |
416 | { |
417 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); |
418 | |
419 | /* MOVD xmm, r/m32 */ |
420 | instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind; |
421 | sljit_emit_op_custom(compiler, instruction, 4); |
422 | } |
423 | |
424 | OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0); |
425 | |
426 | /* PSHUFD xmm1, xmm2/m128, imm8 */ |
427 | /* instruction[0] = 0x66; */ |
428 | /* instruction[1] = 0x0f; */ |
429 | instruction[2] = 0x70; |
430 | instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind; |
431 | instruction[4] = 0; |
432 | sljit_emit_op_custom(compiler, instruction, 5); |
433 | |
434 | if (char1 != char2) |
435 | { |
436 | /* PSHUFD xmm1, xmm2/m128, imm8 */ |
437 | instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind; |
438 | sljit_emit_op_custom(compiler, instruction, 5); |
439 | } |
440 | |
441 | OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); |
442 | OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); |
443 | |
444 | load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); |
445 | for (i = 0; i < 4; i++) |
446 | fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
447 | |
448 | /* PMOVMSKB reg, xmm */ |
449 | /* instruction[0] = 0x66; */ |
450 | /* instruction[1] = 0x0f; */ |
451 | instruction[2] = 0xd7; |
452 | instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; |
453 | sljit_emit_op_custom(compiler, instruction, 4); |
454 | |
455 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
456 | OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); |
457 | |
458 | quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); |
459 | |
460 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
461 | |
462 | /* Second part (aligned) */ |
463 | start = LABEL(); |
464 | |
465 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); |
466 | |
467 | add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
468 | |
469 | load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); |
470 | for (i = 0; i < 4; i++) |
471 | fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
472 | |
473 | /* PMOVMSKB reg, xmm */ |
474 | /* instruction[0] = 0x66; */ |
475 | /* instruction[1] = 0x0f; */ |
476 | instruction[2] = 0xd7; |
477 | instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; |
478 | sljit_emit_op_custom(compiler, instruction, 4); |
479 | |
480 | CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); |
481 | |
482 | JUMPHERE(quit); |
483 | |
484 | /* BSF r32, r/m32 */ |
485 | instruction[0] = 0x0f; |
486 | instruction[1] = 0xbc; |
487 | instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; |
488 | sljit_emit_op_custom(compiler, instruction, 3); |
489 | |
490 | OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0); |
491 | add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); |
492 | |
493 | OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); |
494 | return not_found; |
495 | } |
496 | |
497 | #ifndef _WIN64 |
498 | |
499 | #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) |
500 | |
501 | static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, |
502 | PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) |
503 | { |
504 | DEFINE_COMPILER; |
505 | sljit_u8 instruction[8]; |
506 | vector_compare_type compare1_type = vector_compare_match1; |
507 | vector_compare_type compare2_type = vector_compare_match1; |
508 | sljit_u32 bit1 = 0; |
509 | sljit_u32 bit2 = 0; |
510 | sljit_u32 diff = IN_UCHARS(offs1 - offs2); |
511 | sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); |
512 | sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2); |
513 | sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); |
514 | sljit_s32 data1_ind = 0; |
515 | sljit_s32 data2_ind = 1; |
516 | sljit_s32 tmp1_ind = 2; |
517 | sljit_s32 tmp2_ind = 3; |
518 | sljit_s32 cmp1a_ind = 4; |
519 | sljit_s32 cmp1b_ind = 5; |
520 | sljit_s32 cmp2a_ind = 6; |
521 | sljit_s32 cmp2b_ind = 7; |
522 | struct sljit_label *start; |
523 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
524 | struct sljit_label *restart; |
525 | #endif |
526 | struct sljit_jump *jump[2]; |
527 | int i; |
528 | |
529 | SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); |
530 | SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset())); |
531 | SLJIT_ASSERT(tmp1_reg_ind < 8 && tmp2_reg_ind == 1); |
532 | |
533 | /* Initialize. */ |
534 | if (common->match_end_ptr != 0) |
535 | { |
536 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
537 | OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); |
538 | OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); |
539 | |
540 | OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0); |
541 | CMOV(SLJIT_LESS, STR_END, TMP1, 0); |
542 | } |
543 | |
544 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); |
545 | add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
546 | |
547 | /* MOVD xmm, r/m32 */ |
548 | instruction[0] = 0x66; |
549 | instruction[1] = 0x0f; |
550 | instruction[2] = 0x6e; |
551 | |
552 | if (char1a == char1b) |
553 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); |
554 | else |
555 | { |
556 | bit1 = char1a ^ char1b; |
557 | if (is_powerof2(bit1)) |
558 | { |
559 | compare1_type = vector_compare_match1i; |
560 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1)); |
561 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1)); |
562 | } |
563 | else |
564 | { |
565 | compare1_type = vector_compare_match2; |
566 | bit1 = 0; |
567 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); |
568 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b)); |
569 | } |
570 | } |
571 | |
572 | instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_reg_ind; |
573 | sljit_emit_op_custom(compiler, instruction, 4); |
574 | |
575 | if (char1a != char1b) |
576 | { |
577 | instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_reg_ind; |
578 | sljit_emit_op_custom(compiler, instruction, 4); |
579 | } |
580 | |
581 | if (char2a == char2b) |
582 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); |
583 | else |
584 | { |
585 | bit2 = char2a ^ char2b; |
586 | if (is_powerof2(bit2)) |
587 | { |
588 | compare2_type = vector_compare_match1i; |
589 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2)); |
590 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2)); |
591 | } |
592 | else |
593 | { |
594 | compare2_type = vector_compare_match2; |
595 | bit2 = 0; |
596 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); |
597 | OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b)); |
598 | } |
599 | } |
600 | |
601 | instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_reg_ind; |
602 | sljit_emit_op_custom(compiler, instruction, 4); |
603 | |
604 | if (char2a != char2b) |
605 | { |
606 | instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_reg_ind; |
607 | sljit_emit_op_custom(compiler, instruction, 4); |
608 | } |
609 | |
610 | /* PSHUFD xmm1, xmm2/m128, imm8 */ |
611 | /* instruction[0] = 0x66; */ |
612 | /* instruction[1] = 0x0f; */ |
613 | instruction[2] = 0x70; |
614 | instruction[4] = 0; |
615 | |
616 | instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind; |
617 | sljit_emit_op_custom(compiler, instruction, 5); |
618 | |
619 | if (char1a != char1b) |
620 | { |
621 | instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind; |
622 | sljit_emit_op_custom(compiler, instruction, 5); |
623 | } |
624 | |
625 | instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind; |
626 | sljit_emit_op_custom(compiler, instruction, 5); |
627 | |
628 | if (char2a != char2b) |
629 | { |
630 | instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind; |
631 | sljit_emit_op_custom(compiler, instruction, 5); |
632 | } |
633 | |
634 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
635 | restart = LABEL(); |
636 | #endif |
637 | |
638 | OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff); |
639 | OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); |
640 | OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); |
641 | |
642 | load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0); |
643 | |
644 | jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0); |
645 | |
646 | load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff); |
647 | jump[1] = JUMP(SLJIT_JUMP); |
648 | |
649 | JUMPHERE(jump[0]); |
650 | |
651 | /* MOVDQA xmm1, xmm2/m128 */ |
652 | /* instruction[0] = 0x66; */ |
653 | /* instruction[1] = 0x0f; */ |
654 | instruction[2] = 0x6f; |
655 | instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind; |
656 | sljit_emit_op_custom(compiler, instruction, 4); |
657 | |
658 | /* PSLLDQ xmm1, imm8 */ |
659 | /* instruction[0] = 0x66; */ |
660 | /* instruction[1] = 0x0f; */ |
661 | instruction[2] = 0x73; |
662 | instruction[3] = 0xc0 | (7 << 3) | data2_ind; |
663 | instruction[4] = diff; |
664 | sljit_emit_op_custom(compiler, instruction, 5); |
665 | |
666 | JUMPHERE(jump[1]); |
667 | |
668 | OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); |
669 | |
670 | for (i = 0; i < 4; i++) |
671 | { |
672 | fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); |
673 | fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); |
674 | } |
675 | |
676 | /* PAND xmm1, xmm2/m128 */ |
677 | /* instruction[0] = 0x66; */ |
678 | /* instruction[1] = 0x0f; */ |
679 | instruction[2] = 0xdb; |
680 | instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind; |
681 | sljit_emit_op_custom(compiler, instruction, 4); |
682 | |
683 | /* PMOVMSKB reg, xmm */ |
684 | /* instruction[0] = 0x66; */ |
685 | /* instruction[1] = 0x0f; */ |
686 | instruction[2] = 0xd7; |
687 | instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0; |
688 | sljit_emit_op_custom(compiler, instruction, 4); |
689 | |
690 | /* Ignore matches before the first STR_PTR. */ |
691 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
692 | OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); |
693 | |
694 | jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); |
695 | |
696 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
697 | |
698 | /* Main loop. */ |
699 | start = LABEL(); |
700 | |
701 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); |
702 | add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
703 | |
704 | load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0); |
705 | load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff); |
706 | |
707 | for (i = 0; i < 4; i++) |
708 | { |
709 | fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind); |
710 | fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind); |
711 | } |
712 | |
713 | /* PAND xmm1, xmm2/m128 */ |
714 | /* instruction[0] = 0x66; */ |
715 | /* instruction[1] = 0x0f; */ |
716 | instruction[2] = 0xdb; |
717 | instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind; |
718 | sljit_emit_op_custom(compiler, instruction, 4); |
719 | |
720 | /* PMOVMSKB reg, xmm */ |
721 | /* instruction[0] = 0x66; */ |
722 | /* instruction[1] = 0x0f; */ |
723 | instruction[2] = 0xd7; |
724 | instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0; |
725 | sljit_emit_op_custom(compiler, instruction, 4); |
726 | |
727 | CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); |
728 | |
729 | JUMPHERE(jump[0]); |
730 | |
731 | /* BSF r32, r/m32 */ |
732 | instruction[0] = 0x0f; |
733 | instruction[1] = 0xbc; |
734 | instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; |
735 | sljit_emit_op_custom(compiler, instruction, 3); |
736 | |
737 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
738 | |
739 | add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
740 | |
741 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
742 | if (common->utf) |
743 | { |
744 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1)); |
745 | |
746 | jump[0] = jump_if_utf_char_start(compiler, TMP1); |
747 | |
748 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
749 | CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart); |
750 | |
751 | add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP)); |
752 | |
753 | JUMPHERE(jump[0]); |
754 | } |
755 | #endif |
756 | |
757 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); |
758 | |
759 | if (common->match_end_ptr != 0) |
760 | OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
761 | } |
762 | |
763 | #endif /* !_WIN64 */ |
764 | |
765 | #undef SSE2_COMPARE_TYPE_INDEX |
766 | |
767 | #endif /* SLJIT_CONFIG_X86 */ |
768 | |
769 | #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__)) |
770 | |
771 | #include <arm_neon.h> |
772 | |
773 | typedef union { |
774 | unsigned int x; |
775 | struct { unsigned char c1, c2, c3, c4; } c; |
776 | } int_char; |
777 | |
778 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
779 | static SLJIT_INLINE int utf_continue(PCRE2_SPTR s) |
780 | { |
781 | #if PCRE2_CODE_UNIT_WIDTH == 8 |
782 | return (*s & 0xc0) == 0x80; |
783 | #elif PCRE2_CODE_UNIT_WIDTH == 16 |
784 | return (*s & 0xfc00) == 0xdc00; |
785 | #else |
786 | #error "Unknown code width" |
787 | #endif |
788 | } |
789 | #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */ |
790 | |
791 | #if PCRE2_CODE_UNIT_WIDTH == 8 |
792 | # define VECTOR_FACTOR 16 |
793 | # define vect_t uint8x16_t |
794 | # define VLD1Q(X) vld1q_u8((sljit_u8 *)(X)) |
795 | # define VCEQQ vceqq_u8 |
796 | # define VORRQ vorrq_u8 |
797 | # define VST1Q vst1q_u8 |
798 | # define VDUPQ vdupq_n_u8 |
799 | # define VEXTQ vextq_u8 |
800 | # define VANDQ vandq_u8 |
801 | typedef union { |
802 | uint8_t mem[16]; |
803 | uint64_t dw[2]; |
804 | } quad_word; |
805 | #elif PCRE2_CODE_UNIT_WIDTH == 16 |
806 | # define VECTOR_FACTOR 8 |
807 | # define vect_t uint16x8_t |
808 | # define VLD1Q(X) vld1q_u16((sljit_u16 *)(X)) |
809 | # define VCEQQ vceqq_u16 |
810 | # define VORRQ vorrq_u16 |
811 | # define VST1Q vst1q_u16 |
812 | # define VDUPQ vdupq_n_u16 |
813 | # define VEXTQ vextq_u16 |
814 | # define VANDQ vandq_u16 |
815 | typedef union { |
816 | uint16_t mem[8]; |
817 | uint64_t dw[2]; |
818 | } quad_word; |
819 | #else |
820 | # define VECTOR_FACTOR 4 |
821 | # define vect_t uint32x4_t |
822 | # define VLD1Q(X) vld1q_u32((sljit_u32 *)(X)) |
823 | # define VCEQQ vceqq_u32 |
824 | # define VORRQ vorrq_u32 |
825 | # define VST1Q vst1q_u32 |
826 | # define VDUPQ vdupq_n_u32 |
827 | # define VEXTQ vextq_u32 |
828 | # define VANDQ vandq_u32 |
829 | typedef union { |
830 | uint32_t mem[4]; |
831 | uint64_t dw[2]; |
832 | } quad_word; |
833 | #endif |
834 | |
835 | #define FFCS |
836 | #include "pcre2_jit_neon_inc.h" |
837 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
838 | # define FF_UTF |
839 | # include "pcre2_jit_neon_inc.h" |
840 | # undef FF_UTF |
841 | #endif |
842 | #undef FFCS |
843 | |
844 | #define FFCS_2 |
845 | #include "pcre2_jit_neon_inc.h" |
846 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
847 | # define FF_UTF |
848 | # include "pcre2_jit_neon_inc.h" |
849 | # undef FF_UTF |
850 | #endif |
851 | #undef FFCS_2 |
852 | |
853 | #define FFCS_MASK |
854 | #include "pcre2_jit_neon_inc.h" |
855 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
856 | # define FF_UTF |
857 | # include "pcre2_jit_neon_inc.h" |
858 | # undef FF_UTF |
859 | #endif |
860 | #undef FFCS_MASK |
861 | |
862 | #define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1 |
863 | |
864 | static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) |
865 | { |
866 | DEFINE_COMPILER; |
867 | int_char ic; |
868 | struct sljit_jump *partial_quit; |
869 | /* Save temporary registers. */ |
870 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); |
871 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP3, 0); |
872 | |
873 | /* Prepare function arguments */ |
874 | OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0); |
875 | OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0); |
876 | OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset); |
877 | |
878 | if (char1 == char2) |
879 | { |
880 | ic.c.c1 = char1; |
881 | ic.c.c2 = char2; |
882 | OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x); |
883 | |
884 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
885 | if (common->utf && offset > 0) |
886 | sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
887 | SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_utf)); |
888 | else |
889 | sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
890 | SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs)); |
891 | #else |
892 | sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
893 | SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs)); |
894 | #endif |
895 | } |
896 | else |
897 | { |
898 | PCRE2_UCHAR mask = char1 ^ char2; |
899 | if (is_powerof2(mask)) |
900 | { |
901 | ic.c.c1 = char1 | mask; |
902 | ic.c.c2 = mask; |
903 | OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x); |
904 | |
905 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
906 | if (common->utf && offset > 0) |
907 | sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
908 | SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask_utf)); |
909 | else |
910 | sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
911 | SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask)); |
912 | #else |
913 | sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
914 | SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask)); |
915 | #endif |
916 | } |
917 | else |
918 | { |
919 | ic.c.c1 = char1; |
920 | ic.c.c2 = char2; |
921 | OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x); |
922 | |
923 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
924 | if (common->utf && offset > 0) |
925 | sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
926 | SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2_utf)); |
927 | else |
928 | sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
929 | SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2)); |
930 | #else |
931 | sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
932 | SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2)); |
933 | #endif |
934 | } |
935 | } |
936 | /* Restore registers. */ |
937 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
938 | OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); |
939 | |
940 | /* Check return value. */ |
941 | partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); |
942 | if (common->mode == PCRE2_JIT_COMPLETE) |
943 | add_jump(compiler, &common->failed_match, partial_quit); |
944 | |
945 | /* Fast forward STR_PTR to the result of memchr. */ |
946 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); |
947 | |
948 | if (common->mode != PCRE2_JIT_COMPLETE) |
949 | JUMPHERE(partial_quit); |
950 | } |
951 | |
952 | typedef enum { |
953 | compare_match1, |
954 | compare_match1i, |
955 | compare_match2, |
956 | } compare_type; |
957 | |
958 | static inline vect_t fast_forward_char_pair_compare(compare_type ctype, vect_t dst, vect_t cmp1, vect_t cmp2) |
959 | { |
960 | if (ctype == compare_match2) |
961 | { |
962 | vect_t tmp = dst; |
963 | dst = VCEQQ(dst, cmp1); |
964 | tmp = VCEQQ(tmp, cmp2); |
965 | dst = VORRQ(dst, tmp); |
966 | return dst; |
967 | } |
968 | |
969 | if (ctype == compare_match1i) |
970 | dst = VORRQ(dst, cmp2); |
971 | dst = VCEQQ(dst, cmp1); |
972 | return dst; |
973 | } |
974 | |
975 | static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void) |
976 | { |
977 | #if PCRE2_CODE_UNIT_WIDTH == 8 |
978 | return 15; |
979 | #elif PCRE2_CODE_UNIT_WIDTH == 16 |
980 | return 7; |
981 | #elif PCRE2_CODE_UNIT_WIDTH == 32 |
982 | return 3; |
983 | #else |
984 | #error "Unsupported unit width" |
985 | #endif |
986 | } |
987 | |
988 | /* ARM doesn't have a shift left across lanes. */ |
989 | static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n) |
990 | { |
991 | vect_t zero = VDUPQ(0); |
992 | SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR); |
993 | /* VEXTQ takes an immediate as last argument. */ |
994 | #define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X); |
995 | switch (n) |
996 | { |
997 | C(1); C(2); C(3); |
998 | #if PCRE2_CODE_UNIT_WIDTH != 32 |
999 | C(4); C(5); C(6); C(7); |
1000 | # if PCRE2_CODE_UNIT_WIDTH != 16 |
1001 | C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15); |
1002 | # endif |
1003 | #endif |
1004 | default: |
1005 | /* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't |
1006 | happen. The return is still here for compilers to not warn. */ |
1007 | return a; |
1008 | } |
1009 | } |
1010 | |
1011 | #define FFCPS |
1012 | #define FFCPS_DIFF1 |
1013 | #define FFCPS_CHAR1A2A |
1014 | |
1015 | #define FFCPS_0 |
1016 | #include "pcre2_jit_neon_inc.h" |
1017 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
1018 | # define FF_UTF |
1019 | # include "pcre2_jit_neon_inc.h" |
1020 | # undef FF_UTF |
1021 | #endif |
1022 | #undef FFCPS_0 |
1023 | |
1024 | #undef FFCPS_CHAR1A2A |
1025 | |
1026 | #define FFCPS_1 |
1027 | #include "pcre2_jit_neon_inc.h" |
1028 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
1029 | # define FF_UTF |
1030 | # include "pcre2_jit_neon_inc.h" |
1031 | # undef FF_UTF |
1032 | #endif |
1033 | #undef FFCPS_1 |
1034 | |
1035 | #undef FFCPS_DIFF1 |
1036 | |
1037 | #define FFCPS_DEFAULT |
1038 | #include "pcre2_jit_neon_inc.h" |
1039 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
1040 | # define FF_UTF |
1041 | # include "pcre2_jit_neon_inc.h" |
1042 | # undef FF_UTF |
1043 | #endif |
1044 | #undef FFCPS |
1045 | |
1046 | #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1 |
1047 | |
1048 | static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, |
1049 | PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) |
1050 | { |
1051 | DEFINE_COMPILER; |
1052 | sljit_u32 diff = IN_UCHARS(offs1 - offs2); |
1053 | struct sljit_jump *partial_quit; |
1054 | int_char ic; |
1055 | SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); |
1056 | SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset())); |
1057 | SLJIT_ASSERT(compiler->scratches == 5); |
1058 | |
1059 | /* Save temporary register STR_PTR. */ |
1060 | OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); |
1061 | |
1062 | /* Prepare arguments for the function call. */ |
1063 | if (common->match_end_ptr == 0) |
1064 | OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0); |
1065 | else |
1066 | { |
1067 | OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
1068 | OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); |
1069 | |
1070 | OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, SLJIT_R0, 0); |
1071 | CMOV(SLJIT_LESS, SLJIT_R0, STR_END, 0); |
1072 | } |
1073 | |
1074 | OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0); |
1075 | OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1); |
1076 | OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2); |
1077 | ic.c.c1 = char1a; |
1078 | ic.c.c2 = char1b; |
1079 | ic.c.c3 = char2a; |
1080 | ic.c.c4 = char2b; |
1081 | OP1(SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, ic.x); |
1082 | |
1083 | if (diff == 1) { |
1084 | if (char1a == char1b && char2a == char2b) { |
1085 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
1086 | if (common->utf) |
1087 | sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
1088 | SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0_utf)); |
1089 | else |
1090 | #endif |
1091 | sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
1092 | SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0)); |
1093 | } else { |
1094 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
1095 | if (common->utf) |
1096 | sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
1097 | SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1_utf)); |
1098 | else |
1099 | #endif |
1100 | sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
1101 | SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1)); |
1102 | } |
1103 | } else { |
1104 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
1105 | if (common->utf) |
1106 | sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
1107 | SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default_utf)); |
1108 | else |
1109 | #endif |
1110 | sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
1111 | SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default)); |
1112 | } |
1113 | |
1114 | /* Restore STR_PTR register. */ |
1115 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
1116 | |
1117 | /* Check return value. */ |
1118 | partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); |
1119 | add_jump(compiler, &common->failed_match, partial_quit); |
1120 | |
1121 | /* Fast forward STR_PTR to the result of memchr. */ |
1122 | OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); |
1123 | |
1124 | JUMPHERE(partial_quit); |
1125 | } |
1126 | |
1127 | #endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */ |
1128 | |
1129 | #if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) |
1130 | |
1131 | #if PCRE2_CODE_UNIT_WIDTH == 8 |
1132 | #define VECTOR_ELEMENT_SIZE 0 |
1133 | #elif PCRE2_CODE_UNIT_WIDTH == 16 |
1134 | #define VECTOR_ELEMENT_SIZE 1 |
1135 | #elif PCRE2_CODE_UNIT_WIDTH == 32 |
1136 | #define VECTOR_ELEMENT_SIZE 2 |
1137 | #else |
1138 | #error "Unsupported unit width" |
1139 | #endif |
1140 | |
1141 | static void load_from_mem_vector(struct sljit_compiler *compiler, BOOL vlbb, sljit_s32 dst_vreg, |
1142 | sljit_s32 base_reg, sljit_s32 index_reg) |
1143 | { |
1144 | sljit_u16 instruction[3]; |
1145 | |
1146 | instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | index_reg); |
1147 | instruction[1] = (sljit_u16)(base_reg << 12); |
1148 | instruction[2] = (sljit_u16)((0x8 << 8) | (vlbb ? 0x07 : 0x06)); |
1149 | |
1150 | sljit_emit_op_custom(compiler, instruction, 6); |
1151 | } |
1152 | |
1153 | #if PCRE2_CODE_UNIT_WIDTH == 32 |
1154 | |
1155 | static void replicate_imm_vector(struct sljit_compiler *compiler, int step, sljit_s32 dst_vreg, |
1156 | PCRE2_UCHAR chr, sljit_s32 tmp_general_reg) |
1157 | { |
1158 | sljit_u16 instruction[3]; |
1159 | |
1160 | SLJIT_ASSERT(step >= 0 && step <= 1); |
1161 | |
1162 | if (chr < 0x7fff) |
1163 | { |
1164 | if (step == 1) |
1165 | return; |
1166 | |
1167 | /* VREPI */ |
1168 | instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4)); |
1169 | instruction[1] = (sljit_u16)chr; |
1170 | instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); |
1171 | sljit_emit_op_custom(compiler, instruction, 6); |
1172 | return; |
1173 | } |
1174 | |
1175 | if (step == 0) |
1176 | { |
1177 | OP1(SLJIT_MOV, tmp_general_reg, 0, SLJIT_IMM, chr); |
1178 | |
1179 | /* VLVG */ |
1180 | instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | sljit_get_register_index(tmp_general_reg)); |
1181 | instruction[1] = 0; |
1182 | instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x22); |
1183 | sljit_emit_op_custom(compiler, instruction, 6); |
1184 | return; |
1185 | } |
1186 | |
1187 | /* VREP */ |
1188 | instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | dst_vreg); |
1189 | instruction[1] = 0; |
1190 | instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xc << 8) | 0x4d); |
1191 | sljit_emit_op_custom(compiler, instruction, 6); |
1192 | } |
1193 | |
1194 | #endif |
1195 | |
1196 | static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type, |
1197 | int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) |
1198 | { |
1199 | sljit_u16 instruction[3]; |
1200 | |
1201 | SLJIT_ASSERT(step >= 0 && step <= 2); |
1202 | |
1203 | if (step == 1) |
1204 | { |
1205 | /* VCEQ */ |
1206 | instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); |
1207 | instruction[1] = (sljit_u16)(cmp1_ind << 12); |
1208 | instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8); |
1209 | sljit_emit_op_custom(compiler, instruction, 6); |
1210 | return; |
1211 | } |
1212 | |
1213 | if (compare_type != vector_compare_match2) |
1214 | { |
1215 | if (step == 0 && compare_type == vector_compare_match1i) |
1216 | { |
1217 | /* VO */ |
1218 | instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); |
1219 | instruction[1] = (sljit_u16)(cmp2_ind << 12); |
1220 | instruction[2] = (sljit_u16)((0xe << 8) | 0x6a); |
1221 | sljit_emit_op_custom(compiler, instruction, 6); |
1222 | } |
1223 | return; |
1224 | } |
1225 | |
1226 | switch (step) |
1227 | { |
1228 | case 0: |
1229 | /* VCEQ */ |
1230 | instruction[0] = (sljit_u16)(0xe700 | (tmp_ind << 4) | dst_ind); |
1231 | instruction[1] = (sljit_u16)(cmp2_ind << 12); |
1232 | instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8); |
1233 | sljit_emit_op_custom(compiler, instruction, 6); |
1234 | return; |
1235 | |
1236 | case 2: |
1237 | /* VO */ |
1238 | instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); |
1239 | instruction[1] = (sljit_u16)(tmp_ind << 12); |
1240 | instruction[2] = (sljit_u16)((0xe << 8) | 0x6a); |
1241 | sljit_emit_op_custom(compiler, instruction, 6); |
1242 | return; |
1243 | } |
1244 | } |
1245 | |
1246 | #define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1 |
1247 | |
1248 | static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) |
1249 | { |
1250 | DEFINE_COMPILER; |
1251 | sljit_u16 instruction[3]; |
1252 | struct sljit_label *start; |
1253 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
1254 | struct sljit_label *restart; |
1255 | #endif |
1256 | struct sljit_jump *quit; |
1257 | struct sljit_jump *partial_quit[2]; |
1258 | vector_compare_type compare_type = vector_compare_match1; |
1259 | sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); |
1260 | sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); |
1261 | sljit_s32 data_ind = 0; |
1262 | sljit_s32 tmp_ind = 1; |
1263 | sljit_s32 cmp1_ind = 2; |
1264 | sljit_s32 cmp2_ind = 3; |
1265 | sljit_s32 zero_ind = 4; |
1266 | sljit_u32 bit = 0; |
1267 | int i; |
1268 | |
1269 | SLJIT_UNUSED_ARG(offset); |
1270 | |
1271 | if (char1 != char2) |
1272 | { |
1273 | bit = char1 ^ char2; |
1274 | compare_type = vector_compare_match1i; |
1275 | |
1276 | if (!is_powerof2(bit)) |
1277 | { |
1278 | bit = 0; |
1279 | compare_type = vector_compare_match2; |
1280 | } |
1281 | } |
1282 | |
1283 | partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
1284 | if (common->mode == PCRE2_JIT_COMPLETE) |
1285 | add_jump(compiler, &common->failed_match, partial_quit[0]); |
1286 | |
1287 | /* First part (unaligned start) */ |
1288 | |
1289 | OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16); |
1290 | |
1291 | #if PCRE2_CODE_UNIT_WIDTH != 32 |
1292 | |
1293 | /* VREPI */ |
1294 | instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4)); |
1295 | instruction[1] = (sljit_u16)(char1 | bit); |
1296 | instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); |
1297 | sljit_emit_op_custom(compiler, instruction, 6); |
1298 | |
1299 | if (char1 != char2) |
1300 | { |
1301 | /* VREPI */ |
1302 | instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4)); |
1303 | instruction[1] = (sljit_u16)(bit != 0 ? bit : char2); |
1304 | /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ |
1305 | sljit_emit_op_custom(compiler, instruction, 6); |
1306 | } |
1307 | |
1308 | #else /* PCRE2_CODE_UNIT_WIDTH == 32 */ |
1309 | |
1310 | for (int i = 0; i < 2; i++) |
1311 | { |
1312 | replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP1); |
1313 | |
1314 | if (char1 != char2) |
1315 | replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP1); |
1316 | } |
1317 | |
1318 | #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ |
1319 | |
1320 | if (compare_type == vector_compare_match2) |
1321 | { |
1322 | /* VREPI */ |
1323 | instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); |
1324 | instruction[1] = 0; |
1325 | instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); |
1326 | sljit_emit_op_custom(compiler, instruction, 6); |
1327 | } |
1328 | |
1329 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
1330 | restart = LABEL(); |
1331 | #endif |
1332 | |
1333 | load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0); |
1334 | OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15); |
1335 | |
1336 | if (compare_type != vector_compare_match2) |
1337 | { |
1338 | if (compare_type == vector_compare_match1i) |
1339 | fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
1340 | |
1341 | /* VFEE */ |
1342 | instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); |
1343 | instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); |
1344 | instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); |
1345 | sljit_emit_op_custom(compiler, instruction, 6); |
1346 | } |
1347 | else |
1348 | { |
1349 | for (i = 0; i < 3; i++) |
1350 | fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
1351 | |
1352 | /* VFENE */ |
1353 | instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); |
1354 | instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); |
1355 | instruction[2] = (sljit_u16)((0xe << 8) | 0x81); |
1356 | sljit_emit_op_custom(compiler, instruction, 6); |
1357 | } |
1358 | |
1359 | /* VLGVB */ |
1360 | instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind); |
1361 | instruction[1] = 7; |
1362 | instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); |
1363 | sljit_emit_op_custom(compiler, instruction, 6); |
1364 | |
1365 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
1366 | quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); |
1367 | |
1368 | OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16); |
1369 | |
1370 | /* Second part (aligned) */ |
1371 | start = LABEL(); |
1372 | |
1373 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); |
1374 | |
1375 | partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
1376 | if (common->mode == PCRE2_JIT_COMPLETE) |
1377 | add_jump(compiler, &common->failed_match, partial_quit[1]); |
1378 | |
1379 | load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0); |
1380 | |
1381 | if (compare_type != vector_compare_match2) |
1382 | { |
1383 | if (compare_type == vector_compare_match1i) |
1384 | fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
1385 | |
1386 | /* VFEE */ |
1387 | instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); |
1388 | instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); |
1389 | instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); |
1390 | sljit_emit_op_custom(compiler, instruction, 6); |
1391 | } |
1392 | else |
1393 | { |
1394 | for (i = 0; i < 3; i++) |
1395 | fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
1396 | |
1397 | /* VFENE */ |
1398 | instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); |
1399 | instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); |
1400 | instruction[2] = (sljit_u16)((0xe << 8) | 0x81); |
1401 | sljit_emit_op_custom(compiler, instruction, 6); |
1402 | } |
1403 | |
1404 | sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW); |
1405 | JUMPTO(SLJIT_OVERFLOW, start); |
1406 | |
1407 | /* VLGVB */ |
1408 | instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind); |
1409 | instruction[1] = 7; |
1410 | instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); |
1411 | sljit_emit_op_custom(compiler, instruction, 6); |
1412 | |
1413 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
1414 | |
1415 | JUMPHERE(quit); |
1416 | |
1417 | if (common->mode != PCRE2_JIT_COMPLETE) |
1418 | { |
1419 | JUMPHERE(partial_quit[0]); |
1420 | JUMPHERE(partial_quit[1]); |
1421 | OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); |
1422 | CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); |
1423 | } |
1424 | else |
1425 | add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
1426 | |
1427 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
1428 | if (common->utf && offset > 0) |
1429 | { |
1430 | SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); |
1431 | |
1432 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); |
1433 | |
1434 | quit = jump_if_utf_char_start(compiler, TMP1); |
1435 | |
1436 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
1437 | add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
1438 | |
1439 | OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16); |
1440 | JUMPTO(SLJIT_JUMP, restart); |
1441 | |
1442 | JUMPHERE(quit); |
1443 | } |
1444 | #endif |
1445 | } |
1446 | |
1447 | #define JIT_HAS_FAST_REQUESTED_CHAR_SIMD 1 |
1448 | |
1449 | static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) |
1450 | { |
1451 | DEFINE_COMPILER; |
1452 | sljit_u16 instruction[3]; |
1453 | struct sljit_label *start; |
1454 | struct sljit_jump *quit; |
1455 | jump_list *not_found = NULL; |
1456 | vector_compare_type compare_type = vector_compare_match1; |
1457 | sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); |
1458 | sljit_s32 tmp3_reg_ind = sljit_get_register_index(TMP3); |
1459 | sljit_s32 data_ind = 0; |
1460 | sljit_s32 tmp_ind = 1; |
1461 | sljit_s32 cmp1_ind = 2; |
1462 | sljit_s32 cmp2_ind = 3; |
1463 | sljit_s32 zero_ind = 4; |
1464 | sljit_u32 bit = 0; |
1465 | int i; |
1466 | |
1467 | if (char1 != char2) |
1468 | { |
1469 | bit = char1 ^ char2; |
1470 | compare_type = vector_compare_match1i; |
1471 | |
1472 | if (!is_powerof2(bit)) |
1473 | { |
1474 | bit = 0; |
1475 | compare_type = vector_compare_match2; |
1476 | } |
1477 | } |
1478 | |
1479 | add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); |
1480 | |
1481 | /* First part (unaligned start) */ |
1482 | |
1483 | OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, 16); |
1484 | |
1485 | #if PCRE2_CODE_UNIT_WIDTH != 32 |
1486 | |
1487 | /* VREPI */ |
1488 | instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4)); |
1489 | instruction[1] = (sljit_u16)(char1 | bit); |
1490 | instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); |
1491 | sljit_emit_op_custom(compiler, instruction, 6); |
1492 | |
1493 | if (char1 != char2) |
1494 | { |
1495 | /* VREPI */ |
1496 | instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4)); |
1497 | instruction[1] = (sljit_u16)(bit != 0 ? bit : char2); |
1498 | /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ |
1499 | sljit_emit_op_custom(compiler, instruction, 6); |
1500 | } |
1501 | |
1502 | #else /* PCRE2_CODE_UNIT_WIDTH == 32 */ |
1503 | |
1504 | for (int i = 0; i < 2; i++) |
1505 | { |
1506 | replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP3); |
1507 | |
1508 | if (char1 != char2) |
1509 | replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP3); |
1510 | } |
1511 | |
1512 | #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ |
1513 | |
1514 | if (compare_type == vector_compare_match2) |
1515 | { |
1516 | /* VREPI */ |
1517 | instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); |
1518 | instruction[1] = 0; |
1519 | instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); |
1520 | sljit_emit_op_custom(compiler, instruction, 6); |
1521 | } |
1522 | |
1523 | load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0); |
1524 | OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15); |
1525 | |
1526 | if (compare_type != vector_compare_match2) |
1527 | { |
1528 | if (compare_type == vector_compare_match1i) |
1529 | fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
1530 | |
1531 | /* VFEE */ |
1532 | instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); |
1533 | instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); |
1534 | instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); |
1535 | sljit_emit_op_custom(compiler, instruction, 6); |
1536 | } |
1537 | else |
1538 | { |
1539 | for (i = 0; i < 3; i++) |
1540 | fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
1541 | |
1542 | /* VFENE */ |
1543 | instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); |
1544 | instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); |
1545 | instruction[2] = (sljit_u16)((0xe << 8) | 0x81); |
1546 | sljit_emit_op_custom(compiler, instruction, 6); |
1547 | } |
1548 | |
1549 | /* VLGVB */ |
1550 | instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind); |
1551 | instruction[1] = 7; |
1552 | instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); |
1553 | sljit_emit_op_custom(compiler, instruction, 6); |
1554 | |
1555 | OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0); |
1556 | quit = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0); |
1557 | |
1558 | OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 16); |
1559 | |
1560 | /* Second part (aligned) */ |
1561 | start = LABEL(); |
1562 | |
1563 | OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 16); |
1564 | |
1565 | add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); |
1566 | |
1567 | load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0); |
1568 | |
1569 | if (compare_type != vector_compare_match2) |
1570 | { |
1571 | if (compare_type == vector_compare_match1i) |
1572 | fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
1573 | |
1574 | /* VFEE */ |
1575 | instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); |
1576 | instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); |
1577 | instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); |
1578 | sljit_emit_op_custom(compiler, instruction, 6); |
1579 | } |
1580 | else |
1581 | { |
1582 | for (i = 0; i < 3; i++) |
1583 | fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
1584 | |
1585 | /* VFENE */ |
1586 | instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); |
1587 | instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); |
1588 | instruction[2] = (sljit_u16)((0xe << 8) | 0x81); |
1589 | sljit_emit_op_custom(compiler, instruction, 6); |
1590 | } |
1591 | |
1592 | sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW); |
1593 | JUMPTO(SLJIT_OVERFLOW, start); |
1594 | |
1595 | /* VLGVB */ |
1596 | instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind); |
1597 | instruction[1] = 7; |
1598 | instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); |
1599 | sljit_emit_op_custom(compiler, instruction, 6); |
1600 | |
1601 | OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0); |
1602 | |
1603 | JUMPHERE(quit); |
1604 | add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); |
1605 | |
1606 | return not_found; |
1607 | } |
1608 | |
1609 | #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1 |
1610 | |
1611 | static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, |
1612 | PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) |
1613 | { |
1614 | DEFINE_COMPILER; |
1615 | sljit_u16 instruction[3]; |
1616 | struct sljit_label *start; |
1617 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
1618 | struct sljit_label *restart; |
1619 | #endif |
1620 | struct sljit_jump *quit; |
1621 | struct sljit_jump *jump[2]; |
1622 | vector_compare_type compare1_type = vector_compare_match1; |
1623 | vector_compare_type compare2_type = vector_compare_match1; |
1624 | sljit_u32 bit1 = 0; |
1625 | sljit_u32 bit2 = 0; |
1626 | sljit_s32 diff = IN_UCHARS(offs2 - offs1); |
1627 | sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); |
1628 | sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2); |
1629 | sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); |
1630 | sljit_s32 data1_ind = 0; |
1631 | sljit_s32 data2_ind = 1; |
1632 | sljit_s32 tmp1_ind = 2; |
1633 | sljit_s32 tmp2_ind = 3; |
1634 | sljit_s32 cmp1a_ind = 4; |
1635 | sljit_s32 cmp1b_ind = 5; |
1636 | sljit_s32 cmp2a_ind = 6; |
1637 | sljit_s32 cmp2b_ind = 7; |
1638 | sljit_s32 zero_ind = 8; |
1639 | int i; |
1640 | |
1641 | SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); |
1642 | SLJIT_ASSERT(-diff <= (sljit_s32)IN_UCHARS(max_fast_forward_char_pair_offset())); |
1643 | SLJIT_ASSERT(tmp1_reg_ind != 0 && tmp2_reg_ind != 0); |
1644 | |
1645 | if (char1a != char1b) |
1646 | { |
1647 | bit1 = char1a ^ char1b; |
1648 | compare1_type = vector_compare_match1i; |
1649 | |
1650 | if (!is_powerof2(bit1)) |
1651 | { |
1652 | bit1 = 0; |
1653 | compare1_type = vector_compare_match2; |
1654 | } |
1655 | } |
1656 | |
1657 | if (char2a != char2b) |
1658 | { |
1659 | bit2 = char2a ^ char2b; |
1660 | compare2_type = vector_compare_match1i; |
1661 | |
1662 | if (!is_powerof2(bit2)) |
1663 | { |
1664 | bit2 = 0; |
1665 | compare2_type = vector_compare_match2; |
1666 | } |
1667 | } |
1668 | |
1669 | /* Initialize. */ |
1670 | if (common->match_end_ptr != 0) |
1671 | { |
1672 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
1673 | OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); |
1674 | OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); |
1675 | |
1676 | OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0); |
1677 | CMOV(SLJIT_LESS, STR_END, TMP1, 0); |
1678 | } |
1679 | |
1680 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); |
1681 | add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
1682 | OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15); |
1683 | |
1684 | #if PCRE2_CODE_UNIT_WIDTH != 32 |
1685 | |
1686 | OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); |
1687 | |
1688 | /* VREPI */ |
1689 | instruction[0] = (sljit_u16)(0xe700 | (cmp1a_ind << 4)); |
1690 | instruction[1] = (sljit_u16)(char1a | bit1); |
1691 | instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); |
1692 | sljit_emit_op_custom(compiler, instruction, 6); |
1693 | |
1694 | if (char1a != char1b) |
1695 | { |
1696 | /* VREPI */ |
1697 | instruction[0] = (sljit_u16)(0xe700 | (cmp1b_ind << 4)); |
1698 | instruction[1] = (sljit_u16)(bit1 != 0 ? bit1 : char1b); |
1699 | /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ |
1700 | sljit_emit_op_custom(compiler, instruction, 6); |
1701 | } |
1702 | |
1703 | /* VREPI */ |
1704 | instruction[0] = (sljit_u16)(0xe700 | (cmp2a_ind << 4)); |
1705 | instruction[1] = (sljit_u16)(char2a | bit2); |
1706 | /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ |
1707 | sljit_emit_op_custom(compiler, instruction, 6); |
1708 | |
1709 | if (char2a != char2b) |
1710 | { |
1711 | /* VREPI */ |
1712 | instruction[0] = (sljit_u16)(0xe700 | (cmp2b_ind << 4)); |
1713 | instruction[1] = (sljit_u16)(bit2 != 0 ? bit2 : char2b); |
1714 | /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ |
1715 | sljit_emit_op_custom(compiler, instruction, 6); |
1716 | } |
1717 | |
1718 | #else /* PCRE2_CODE_UNIT_WIDTH == 32 */ |
1719 | |
1720 | for (int i = 0; i < 2; i++) |
1721 | { |
1722 | replicate_imm_vector(compiler, i, cmp1a_ind, char1a | bit1, TMP1); |
1723 | |
1724 | if (char1a != char1b) |
1725 | replicate_imm_vector(compiler, i, cmp1b_ind, bit1 != 0 ? bit1 : char1b, TMP1); |
1726 | |
1727 | replicate_imm_vector(compiler, i, cmp2a_ind, char2a | bit2, TMP1); |
1728 | |
1729 | if (char2a != char2b) |
1730 | replicate_imm_vector(compiler, i, cmp2b_ind, bit2 != 0 ? bit2 : char2b, TMP1); |
1731 | } |
1732 | |
1733 | OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); |
1734 | |
1735 | #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ |
1736 | |
1737 | /* VREPI */ |
1738 | instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); |
1739 | instruction[1] = 0; |
1740 | instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); |
1741 | sljit_emit_op_custom(compiler, instruction, 6); |
1742 | |
1743 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
1744 | restart = LABEL(); |
1745 | #endif |
1746 | |
1747 | jump[0] = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0); |
1748 | load_from_mem_vector(compiler, TRUE, data2_ind, tmp1_reg_ind, 0); |
1749 | jump[1] = JUMP(SLJIT_JUMP); |
1750 | JUMPHERE(jump[0]); |
1751 | load_from_mem_vector(compiler, FALSE, data2_ind, tmp1_reg_ind, 0); |
1752 | JUMPHERE(jump[1]); |
1753 | |
1754 | load_from_mem_vector(compiler, TRUE, data1_ind, str_ptr_reg_ind, 0); |
1755 | OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 16); |
1756 | |
1757 | for (i = 0; i < 3; i++) |
1758 | { |
1759 | fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); |
1760 | fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); |
1761 | } |
1762 | |
1763 | /* VN */ |
1764 | instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); |
1765 | instruction[1] = (sljit_u16)(data2_ind << 12); |
1766 | instruction[2] = (sljit_u16)((0xe << 8) | 0x68); |
1767 | sljit_emit_op_custom(compiler, instruction, 6); |
1768 | |
1769 | /* VFENE */ |
1770 | instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); |
1771 | instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); |
1772 | instruction[2] = (sljit_u16)((0xe << 8) | 0x81); |
1773 | sljit_emit_op_custom(compiler, instruction, 6); |
1774 | |
1775 | /* VLGVB */ |
1776 | instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data1_ind); |
1777 | instruction[1] = 7; |
1778 | instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); |
1779 | sljit_emit_op_custom(compiler, instruction, 6); |
1780 | |
1781 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
1782 | quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); |
1783 | |
1784 | OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16); |
1785 | OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, diff); |
1786 | |
1787 | /* Main loop. */ |
1788 | start = LABEL(); |
1789 | |
1790 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); |
1791 | add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
1792 | |
1793 | load_from_mem_vector(compiler, FALSE, data1_ind, str_ptr_reg_ind, 0); |
1794 | load_from_mem_vector(compiler, FALSE, data2_ind, str_ptr_reg_ind, tmp1_reg_ind); |
1795 | |
1796 | for (i = 0; i < 3; i++) |
1797 | { |
1798 | fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); |
1799 | fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); |
1800 | } |
1801 | |
1802 | /* VN */ |
1803 | instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); |
1804 | instruction[1] = (sljit_u16)(data2_ind << 12); |
1805 | instruction[2] = (sljit_u16)((0xe << 8) | 0x68); |
1806 | sljit_emit_op_custom(compiler, instruction, 6); |
1807 | |
1808 | /* VFENE */ |
1809 | instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); |
1810 | instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); |
1811 | instruction[2] = (sljit_u16)((0xe << 8) | 0x81); |
1812 | sljit_emit_op_custom(compiler, instruction, 6); |
1813 | |
1814 | sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW); |
1815 | JUMPTO(SLJIT_OVERFLOW, start); |
1816 | |
1817 | /* VLGVB */ |
1818 | instruction[0] = (sljit_u16)(0xe700 | (tmp2_reg_ind << 4) | data1_ind); |
1819 | instruction[1] = 7; |
1820 | instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); |
1821 | sljit_emit_op_custom(compiler, instruction, 6); |
1822 | |
1823 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
1824 | |
1825 | JUMPHERE(quit); |
1826 | |
1827 | add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
1828 | |
1829 | #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
1830 | if (common->utf) |
1831 | { |
1832 | SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); |
1833 | |
1834 | OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1)); |
1835 | |
1836 | quit = jump_if_utf_char_start(compiler, TMP1); |
1837 | |
1838 | OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
1839 | add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
1840 | |
1841 | /* TMP1 contains diff. */ |
1842 | OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15); |
1843 | OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); |
1844 | JUMPTO(SLJIT_JUMP, restart); |
1845 | |
1846 | JUMPHERE(quit); |
1847 | } |
1848 | #endif |
1849 | |
1850 | OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); |
1851 | |
1852 | if (common->match_end_ptr != 0) |
1853 | OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
1854 | } |
1855 | |
1856 | #endif /* SLJIT_CONFIG_S390X */ |
1857 | |
1858 | #endif /* !SUPPORT_VALGRIND */ |
1859 | |