1 | /* |
2 | * Copyright (c) 2016-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | /* |
30 | * In order to use this macro, the following things need to be defined: |
31 | * |
32 | * - SHENG_IMPL (name of the Sheng implementation function) |
33 | * - INTERESTING_FUNC (name of the function checking for accept, accel or dead |
34 | * states) |
35 | * - INNER_DEAD_FUNC (name of the inner function checking for dead states) |
36 | * - OUTER_DEAD_FUNC (name of the outer function checking for dead states) |
37 | * - INNER_ACCEL_FUNC (name of the inner function checking for accel states) |
38 | * - OUTER_ACCEL_FUNC (name of the outer function checking for accel states) |
39 | * - ACCEPT_FUNC (name of the function checking for accept state) |
40 | * - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match) |
41 | */ |
42 | |
43 | /* unrolled 4-byte-at-a-time version. |
44 | * |
45 | * we put innerDeadFunc inside interestingFunc() block so that we don't pay for |
46 | * dead states checking. however, if interestingFunc is dummy, innerDeadFunc |
47 | * gets lost with it, so we need an additional check outside the |
48 | * interestingFunc() branch - it's normally dummy so we don't pay for it, but |
49 | * when interestingFunc is dummy, outerDeadFunc should be set if we want to |
50 | * check for dead states. |
51 | * |
52 | * also, deadFunc only checks the last known state, but since we can't ever get |
53 | * out of the dead state and we don't really care where we died, it's not a |
54 | * problem. |
55 | */ |
56 | static really_inline |
57 | char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s, |
58 | u8 *const cached_accept_state, ReportID *const cached_accept_id, |
59 | u8 single, u64a base_offset, const u8 *buf, const u8 *start, |
60 | const u8 *end, const u8 **scan_end) { |
61 | DEBUG_PRINTF("Starting DFAx4 execution in state %u\n" , |
62 | *state & SHENG_STATE_MASK); |
63 | const u8 *cur_buf = start; |
64 | const u8 *min_accel_dist = start; |
65 | base_offset++; |
66 | DEBUG_PRINTF("Scanning %llu bytes\n" , (u64a)(end - start)); |
67 | |
68 | if (INNER_ACCEL_FUNC(*state) || OUTER_ACCEL_FUNC(*state)) { |
69 | DEBUG_PRINTF("Accel state reached @ 0\n" ); |
70 | const union AccelAux *aaux = get_accel(s, *state & SHENG_STATE_MASK); |
71 | const u8 *new_offset = run_accel(aaux, cur_buf, end); |
72 | if (new_offset < cur_buf + BAD_ACCEL_DIST) { |
73 | min_accel_dist = new_offset + BIG_ACCEL_PENALTY; |
74 | } else { |
75 | min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; |
76 | } |
77 | DEBUG_PRINTF("Next accel chance: %llu\n" , |
78 | (u64a)(min_accel_dist - start)); |
79 | DEBUG_PRINTF("Accel scanned %zu bytes\n" , new_offset - cur_buf); |
80 | cur_buf = new_offset; |
81 | DEBUG_PRINTF("New offset: %lli\n" , (s64a)(cur_buf - start)); |
82 | } |
83 | if (INNER_DEAD_FUNC(*state) || OUTER_DEAD_FUNC(*state)) { |
84 | DEBUG_PRINTF("Dead on arrival\n" ); |
85 | *scan_end = end; |
86 | return MO_CONTINUE_MATCHING; |
87 | } |
88 | |
89 | m128 cur_state = set16x8(*state); |
90 | const m128 *masks = s->shuffle_masks; |
91 | |
92 | while (likely(end - cur_buf >= 4)) { |
93 | const u8 *b1 = cur_buf; |
94 | const u8 *b2 = cur_buf + 1; |
95 | const u8 *b3 = cur_buf + 2; |
96 | const u8 *b4 = cur_buf + 3; |
97 | const u8 c1 = *b1; |
98 | const u8 c2 = *b2; |
99 | const u8 c3 = *b3; |
100 | const u8 c4 = *b4; |
101 | |
102 | const m128 shuffle_mask1 = masks[c1]; |
103 | cur_state = pshufb_m128(shuffle_mask1, cur_state); |
104 | const u8 a1 = movd(cur_state); |
105 | |
106 | const m128 shuffle_mask2 = masks[c2]; |
107 | cur_state = pshufb_m128(shuffle_mask2, cur_state); |
108 | const u8 a2 = movd(cur_state); |
109 | |
110 | const m128 shuffle_mask3 = masks[c3]; |
111 | cur_state = pshufb_m128(shuffle_mask3, cur_state); |
112 | const u8 a3 = movd(cur_state); |
113 | |
114 | const m128 shuffle_mask4 = masks[c4]; |
115 | cur_state = pshufb_m128(shuffle_mask4, cur_state); |
116 | const u8 a4 = movd(cur_state); |
117 | |
118 | DEBUG_PRINTF("c: %02hhx '%c'\n" , c1, ourisprint(c1) ? c1 : '?'); |
119 | DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n" , a1, (a1 & 0xF0) >> 4, a1 & 0xF); |
120 | |
121 | DEBUG_PRINTF("c: %02hhx '%c'\n" , c2, ourisprint(c2) ? c2 : '?'); |
122 | DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n" , a2, (a2 & 0xF0) >> 4, a2 & 0xF); |
123 | |
124 | DEBUG_PRINTF("c: %02hhx '%c'\n" , c3, ourisprint(c3) ? c3 : '?'); |
125 | DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n" , a3, (a3 & 0xF0) >> 4, a3 & 0xF); |
126 | |
127 | DEBUG_PRINTF("c: %02hhx '%c'\n" , c4, ourisprint(c4) ? c4 : '?'); |
128 | DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n" , a4, (a4 & 0xF0) >> 4, a4 & 0xF); |
129 | |
130 | if (unlikely(INTERESTING_FUNC(a1, a2, a3, a4))) { |
131 | if (ACCEPT_FUNC(a1)) { |
132 | u64a match_offset = base_offset + b1 - buf; |
133 | DEBUG_PRINTF("Accept state %u reached\n" , |
134 | a1 & SHENG_STATE_MASK); |
135 | DEBUG_PRINTF("Match @ %llu\n" , match_offset); |
136 | if (STOP_AT_MATCH) { |
137 | DEBUG_PRINTF("Stopping at match @ %lli\n" , |
138 | (s64a)(b1 - start)); |
139 | *scan_end = b1; |
140 | *state = a1; |
141 | return MO_MATCHES_PENDING; |
142 | } |
143 | if (single) { |
144 | if (fireSingleReport(cb, ctxt, s->report, match_offset) == |
145 | MO_HALT_MATCHING) { |
146 | return MO_HALT_MATCHING; |
147 | } |
148 | } else { |
149 | if (fireReports(s, cb, ctxt, a1, match_offset, |
150 | cached_accept_state, cached_accept_id, |
151 | 0) == MO_HALT_MATCHING) { |
152 | return MO_HALT_MATCHING; |
153 | } |
154 | } |
155 | } |
156 | if (ACCEPT_FUNC(a2)) { |
157 | u64a match_offset = base_offset + b2 - buf; |
158 | DEBUG_PRINTF("Accept state %u reached\n" , |
159 | a2 & SHENG_STATE_MASK); |
160 | DEBUG_PRINTF("Match @ %llu\n" , match_offset); |
161 | if (STOP_AT_MATCH) { |
162 | DEBUG_PRINTF("Stopping at match @ %lli\n" , |
163 | (s64a)(b2 - start)); |
164 | *scan_end = b2; |
165 | *state = a2; |
166 | return MO_MATCHES_PENDING; |
167 | } |
168 | if (single) { |
169 | if (fireSingleReport(cb, ctxt, s->report, match_offset) == |
170 | MO_HALT_MATCHING) { |
171 | return MO_HALT_MATCHING; |
172 | } |
173 | } else { |
174 | if (fireReports(s, cb, ctxt, a2, match_offset, |
175 | cached_accept_state, cached_accept_id, |
176 | 0) == MO_HALT_MATCHING) { |
177 | return MO_HALT_MATCHING; |
178 | } |
179 | } |
180 | } |
181 | if (ACCEPT_FUNC(a3)) { |
182 | u64a match_offset = base_offset + b3 - buf; |
183 | DEBUG_PRINTF("Accept state %u reached\n" , |
184 | a3 & SHENG_STATE_MASK); |
185 | DEBUG_PRINTF("Match @ %llu\n" , match_offset); |
186 | if (STOP_AT_MATCH) { |
187 | DEBUG_PRINTF("Stopping at match @ %lli\n" , |
188 | (s64a)(b3 - start)); |
189 | *scan_end = b3; |
190 | *state = a3; |
191 | return MO_MATCHES_PENDING; |
192 | } |
193 | if (single) { |
194 | if (fireSingleReport(cb, ctxt, s->report, match_offset) == |
195 | MO_HALT_MATCHING) { |
196 | return MO_HALT_MATCHING; |
197 | } |
198 | } else { |
199 | if (fireReports(s, cb, ctxt, a3, match_offset, |
200 | cached_accept_state, cached_accept_id, |
201 | 0) == MO_HALT_MATCHING) { |
202 | return MO_HALT_MATCHING; |
203 | } |
204 | } |
205 | } |
206 | if (ACCEPT_FUNC(a4)) { |
207 | u64a match_offset = base_offset + b4 - buf; |
208 | DEBUG_PRINTF("Accept state %u reached\n" , |
209 | a4 & SHENG_STATE_MASK); |
210 | DEBUG_PRINTF("Match @ %llu\n" , match_offset); |
211 | if (STOP_AT_MATCH) { |
212 | DEBUG_PRINTF("Stopping at match @ %lli\n" , |
213 | (s64a)(b4 - start)); |
214 | *scan_end = b4; |
215 | *state = a4; |
216 | return MO_MATCHES_PENDING; |
217 | } |
218 | if (single) { |
219 | if (fireSingleReport(cb, ctxt, s->report, match_offset) == |
220 | MO_HALT_MATCHING) { |
221 | return MO_HALT_MATCHING; |
222 | } |
223 | } else { |
224 | if (fireReports(s, cb, ctxt, a4, match_offset, |
225 | cached_accept_state, cached_accept_id, |
226 | 0) == MO_HALT_MATCHING) { |
227 | return MO_HALT_MATCHING; |
228 | } |
229 | } |
230 | } |
231 | if (INNER_DEAD_FUNC(a4)) { |
232 | DEBUG_PRINTF("Dead state reached @ %lli\n" , (s64a)(b4 - buf)); |
233 | *scan_end = end; |
234 | *state = a4; |
235 | return MO_CONTINUE_MATCHING; |
236 | } |
237 | if (cur_buf > min_accel_dist && INNER_ACCEL_FUNC(a4)) { |
238 | DEBUG_PRINTF("Accel state reached @ %lli\n" , (s64a)(b4 - buf)); |
239 | const union AccelAux *aaux = |
240 | get_accel(s, a4 & SHENG_STATE_MASK); |
241 | const u8 *new_offset = run_accel(aaux, cur_buf + 4, end); |
242 | if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) { |
243 | min_accel_dist = new_offset + BIG_ACCEL_PENALTY; |
244 | } else { |
245 | min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; |
246 | } |
247 | DEBUG_PRINTF("Next accel chance: %llu\n" , |
248 | (u64a)(min_accel_dist - start)); |
249 | DEBUG_PRINTF("Accel scanned %llu bytes\n" , |
250 | (u64a)(new_offset - cur_buf - 4)); |
251 | cur_buf = new_offset; |
252 | DEBUG_PRINTF("New offset: %llu\n" , (u64a)(cur_buf - buf)); |
253 | continue; |
254 | } |
255 | } |
256 | if (OUTER_DEAD_FUNC(a4)) { |
257 | DEBUG_PRINTF("Dead state reached @ %lli\n" , (s64a)(cur_buf - buf)); |
258 | *scan_end = end; |
259 | *state = a4; |
260 | return MO_CONTINUE_MATCHING; |
261 | }; |
262 | if (cur_buf > min_accel_dist && OUTER_ACCEL_FUNC(a4)) { |
263 | DEBUG_PRINTF("Accel state reached @ %lli\n" , (s64a)(b4 - buf)); |
264 | const union AccelAux *aaux = get_accel(s, a4 & SHENG_STATE_MASK); |
265 | const u8 *new_offset = run_accel(aaux, cur_buf + 4, end); |
266 | if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) { |
267 | min_accel_dist = new_offset + BIG_ACCEL_PENALTY; |
268 | } else { |
269 | min_accel_dist = new_offset + SMALL_ACCEL_PENALTY; |
270 | } |
271 | DEBUG_PRINTF("Next accel chance: %llu\n" , |
272 | (u64a)(min_accel_dist - start)); |
273 | DEBUG_PRINTF("Accel scanned %llu bytes\n" , |
274 | (u64a)(new_offset - cur_buf - 4)); |
275 | cur_buf = new_offset; |
276 | DEBUG_PRINTF("New offset: %llu\n" , (u64a)(cur_buf - buf)); |
277 | continue; |
278 | }; |
279 | cur_buf += 4; |
280 | } |
281 | *state = movd(cur_state); |
282 | *scan_end = cur_buf; |
283 | return MO_CONTINUE_MATCHING; |
284 | } |
285 | |