1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#ifndef ROSE_COUNTING_MIRACLE_H
30#define ROSE_COUNTING_MIRACLE_H
31
32#include "ue2common.h"
33#include "runtime.h"
34#include "rose_internal.h"
35#include "nfa/nfa_api_queue.h"
36#include "util/simd_utils.h"
37
38/** \brief Maximum number of bytes to scan when looking for a "counting miracle"
39 * stop character. */
40#define COUNTING_MIRACLE_LEN_MAX 256
41
42static really_inline
43char roseCountingMiracleScan(u8 c, const u8 *d, const u8 *d_end,
44 u32 target_count, u32 *count_inout,
45 const u8 **d_out) {
46 assert(d <= d_end);
47
48 u32 count = *count_inout;
49
50 m128 chars = set16x8(c);
51
52 for (; d + 16 <= d_end; d_end -= 16) {
53 m128 data = loadu128(d_end - 16);
54 u32 z1 = movemask128(eq128(chars, data));
55 count += popcount32(z1);
56
57 if (count >= target_count) {
58 *d_out = d_end - 16;
59 *count_inout = count;
60 return 1;
61 }
62 }
63
64 if (d != d_end) {
65 char temp[sizeof(m128)];
66 assert(d + sizeof(temp) > d_end);
67 memset(temp, c + 1, sizeof(temp));
68 memcpy(temp, d, d_end - d);
69 m128 data = loadu128(temp);
70 u32 z1 = movemask128(eq128(chars, data));
71 count += popcount32(z1);
72
73 if (count >= target_count) {
74 *d_out = d;
75 *count_inout = count;
76 return 1;
77 }
78 }
79
80 *count_inout = count;
81 return 0;
82}
83
84#define GET_LO_4(chars) and128(chars, low4bits)
85#define GET_HI_4(chars) rshift64_m128(andnot128(low4bits, chars), 4)
86
87static really_inline
88u32 roseCountingMiracleScanShufti(m128 mask_lo, m128 mask_hi, u8 poison,
89 const u8 *d, const u8 *d_end,
90 u32 target_count, u32 *count_inout,
91 const u8 **d_out) {
92 assert(d <= d_end);
93
94 u32 count = *count_inout;
95
96 const m128 zeroes = zeroes128();
97 const m128 low4bits = _mm_set1_epi8(0xf);
98
99 for (; d + 16 <= d_end; d_end -= 16) {
100 m128 data = loadu128(d_end - 16);
101 m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(data));
102 m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(data));
103 m128 t = and128(c_lo, c_hi);
104 u32 z1 = movemask128(eq128(t, zeroes));
105 count += popcount32(z1 ^ 0xffff);
106
107 if (count >= target_count) {
108 *d_out = d_end - 16;
109 *count_inout = count;
110 return 1;
111 }
112 }
113
114 if (d != d_end) {
115 char temp[sizeof(m128)];
116 assert(d + sizeof(temp) > d_end);
117 memset(temp, poison, sizeof(temp));
118 memcpy(temp, d, d_end - d);
119 m128 data = loadu128(temp);
120 m128 c_lo = pshufb_m128(mask_lo, GET_LO_4(data));
121 m128 c_hi = pshufb_m128(mask_hi, GET_HI_4(data));
122 m128 t = and128(c_lo, c_hi);
123 u32 z1 = movemask128(eq128(t, zeroes));
124 count += popcount32(z1 ^ 0xffff);
125
126 if (count >= target_count) {
127 *d_out = d;
128 *count_inout = count;
129 return 1;
130 }
131 }
132
133 *count_inout = count;
134 return 0;
135}
136
137/**
138 * \brief "Counting Miracle" scan: If we see more than N instances of a
139 * particular character class we know that the engine must be dead.
140 *
141 * Scans the buffer/history between relative locations \a begin_loc and \a
142 * end_loc, and returns a miracle location (if any) that appears in the stream
143 * after \a begin_loc.
144 *
145 * Returns 1 if some bytes can be skipped and sets \a miracle_loc
146 * appropriately, 0 otherwise.
147 */
148static never_inline
149int roseCountingMiracleOccurs(const struct RoseEngine *t,
150 const struct LeftNfaInfo *left,
151 const struct core_info *ci, s64a begin_loc,
152 const s64a end_loc, s64a *miracle_loc) {
153 if (!left->countingMiracleOffset) {
154 return 0;
155 }
156
157 const struct RoseCountingMiracle *cm
158 = (const void *)((const char *)t + left->countingMiracleOffset);
159
160 assert(!left->transient);
161 assert(cm->count > 1); /* should be a normal miracle then */
162
163 DEBUG_PRINTF("looking for counting miracle over [%lld,%lld], maxLag=%u\n",
164 begin_loc, end_loc, left->maxLag);
165 DEBUG_PRINTF("ci->len=%zu, ci->hlen=%zu\n", ci->len, ci->hlen);
166
167 assert(begin_loc <= end_loc);
168 assert(begin_loc >= -(s64a)ci->hlen);
169 assert(end_loc <= (s64a)ci->len);
170
171 const s64a scan_end_loc = end_loc - left->maxLag;
172 if (scan_end_loc <= begin_loc) {
173 DEBUG_PRINTF("nothing to scan\n");
174 return 0;
175 }
176
177 const s64a start = MAX(begin_loc, scan_end_loc - COUNTING_MIRACLE_LEN_MAX);
178 DEBUG_PRINTF("scan [%lld..%lld]\n", start, scan_end_loc);
179
180 u32 count = 0;
181
182 s64a m_loc = start;
183
184 if (!cm->shufti) {
185 u8 c = cm->c;
186
187 // Scan buffer.
188 const s64a buf_scan_start = MAX(0, start);
189 if (scan_end_loc > buf_scan_start) {
190 const u8 *buf = ci->buf;
191 const u8 *d = buf + scan_end_loc;
192 const u8 *d_start = buf + buf_scan_start;
193 const u8 *d_out;
194 if (roseCountingMiracleScan(c, d_start, d, cm->count, &count,
195 &d_out)) {
196 assert(d_out >= d_start);
197 m_loc = (d_out - d_start) + buf_scan_start;
198 goto success;
199 }
200 }
201
202 // Scan history.
203 if (start < 0) {
204 const u8 *hbuf_end = ci->hbuf + ci->hlen;
205 const u8 *d = hbuf_end + MIN(0, scan_end_loc);
206 const u8 *d_start = hbuf_end + start;
207 const u8 *d_out;
208 if (roseCountingMiracleScan(c, d_start, d, cm->count, &count,
209 &d_out)) {
210 assert(d_out >= d_start);
211 m_loc = (d_out - d_start) + start;
212 goto success;
213 }
214 }
215 } else {
216 m128 lo = cm->lo;
217 m128 hi = cm->hi;
218 u8 poison = cm->poison;
219
220 // Scan buffer.
221 const s64a buf_scan_start = MAX(0, start);
222 if (scan_end_loc > buf_scan_start) {
223 const u8 *buf = ci->buf;
224 const u8 *d = buf + scan_end_loc;
225 const u8 *d_start = buf + buf_scan_start;
226 const u8 *d_out;
227 if (roseCountingMiracleScanShufti(lo, hi, poison, d_start, d,
228 cm->count, &count, &d_out)) {
229 assert(d_out >= d_start);
230 m_loc = (d_out - d_start) + buf_scan_start;
231 goto success;
232 }
233 }
234
235 // Scan history.
236 if (start < 0) {
237 const u8 *hbuf_end = ci->hbuf + ci->hlen;
238 const u8 *d = hbuf_end + MIN(0, scan_end_loc);
239 const u8 *d_start = hbuf_end + start;
240 const u8 *d_out;
241 if (roseCountingMiracleScanShufti(lo, hi, poison, d_start, d,
242 cm->count, &count, &d_out)) {
243 assert(d_out >= d_start);
244 m_loc = (d_out - d_start) + start;
245 goto success;
246 }
247 }
248 }
249
250 DEBUG_PRINTF("found %u/%u\n", count, cm->count);
251 return 0;
252
253success:
254 DEBUG_PRINTF("found %u/%u\n", count, cm->count);
255 assert(count >= cm->count);
256 assert(m_loc < scan_end_loc);
257 assert(m_loc >= start);
258
259 *miracle_loc = m_loc;
260 return 1;
261}
262
263#endif
264