1/*
2 * Copyright (c) 2016-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * In order to use this macro, the following things need to be defined:
31 *
32 * - SHENG_IMPL (name of the Sheng implementation function)
33 * - INTERESTING_FUNC (name of the function checking for accept, accel or dead
34 * states)
35 * - INNER_DEAD_FUNC (name of the inner function checking for dead states)
36 * - OUTER_DEAD_FUNC (name of the outer function checking for dead states)
37 * - INNER_ACCEL_FUNC (name of the inner function checking for accel states)
38 * - OUTER_ACCEL_FUNC (name of the outer function checking for accel states)
39 * - ACCEPT_FUNC (name of the function checking for accept state)
40 * - STOP_AT_MATCH (can be 1 or 0, enable or disable stop at match)
41 */
42
43/* unrolled 4-byte-at-a-time version.
44 *
45 * we put innerDeadFunc inside interestingFunc() block so that we don't pay for
46 * dead states checking. however, if interestingFunc is dummy, innerDeadFunc
47 * gets lost with it, so we need an additional check outside the
48 * interestingFunc() branch - it's normally dummy so we don't pay for it, but
49 * when interestingFunc is dummy, outerDeadFunc should be set if we want to
50 * check for dead states.
51 *
52 * also, deadFunc only checks the last known state, but since we can't ever get
53 * out of the dead state and we don't really care where we died, it's not a
54 * problem.
55 */
56static really_inline
57char SHENG_IMPL(u8 *state, NfaCallback cb, void *ctxt, const struct sheng *s,
58 u8 *const cached_accept_state, ReportID *const cached_accept_id,
59 u8 single, u64a base_offset, const u8 *buf, const u8 *start,
60 const u8 *end, const u8 **scan_end) {
61 DEBUG_PRINTF("Starting DFAx4 execution in state %u\n",
62 *state & SHENG_STATE_MASK);
63 const u8 *cur_buf = start;
64 const u8 *min_accel_dist = start;
65 base_offset++;
66 DEBUG_PRINTF("Scanning %llu bytes\n", (u64a)(end - start));
67
68 if (INNER_ACCEL_FUNC(*state) || OUTER_ACCEL_FUNC(*state)) {
69 DEBUG_PRINTF("Accel state reached @ 0\n");
70 const union AccelAux *aaux = get_accel(s, *state & SHENG_STATE_MASK);
71 const u8 *new_offset = run_accel(aaux, cur_buf, end);
72 if (new_offset < cur_buf + BAD_ACCEL_DIST) {
73 min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
74 } else {
75 min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
76 }
77 DEBUG_PRINTF("Next accel chance: %llu\n",
78 (u64a)(min_accel_dist - start));
79 DEBUG_PRINTF("Accel scanned %zu bytes\n", new_offset - cur_buf);
80 cur_buf = new_offset;
81 DEBUG_PRINTF("New offset: %lli\n", (s64a)(cur_buf - start));
82 }
83 if (INNER_DEAD_FUNC(*state) || OUTER_DEAD_FUNC(*state)) {
84 DEBUG_PRINTF("Dead on arrival\n");
85 *scan_end = end;
86 return MO_CONTINUE_MATCHING;
87 }
88
89 m128 cur_state = set16x8(*state);
90 const m128 *masks = s->shuffle_masks;
91
92 while (likely(end - cur_buf >= 4)) {
93 const u8 *b1 = cur_buf;
94 const u8 *b2 = cur_buf + 1;
95 const u8 *b3 = cur_buf + 2;
96 const u8 *b4 = cur_buf + 3;
97 const u8 c1 = *b1;
98 const u8 c2 = *b2;
99 const u8 c3 = *b3;
100 const u8 c4 = *b4;
101
102 const m128 shuffle_mask1 = masks[c1];
103 cur_state = pshufb_m128(shuffle_mask1, cur_state);
104 const u8 a1 = movd(cur_state);
105
106 const m128 shuffle_mask2 = masks[c2];
107 cur_state = pshufb_m128(shuffle_mask2, cur_state);
108 const u8 a2 = movd(cur_state);
109
110 const m128 shuffle_mask3 = masks[c3];
111 cur_state = pshufb_m128(shuffle_mask3, cur_state);
112 const u8 a3 = movd(cur_state);
113
114 const m128 shuffle_mask4 = masks[c4];
115 cur_state = pshufb_m128(shuffle_mask4, cur_state);
116 const u8 a4 = movd(cur_state);
117
118 DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
119 DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a1, (a1 & 0xF0) >> 4, a1 & 0xF);
120
121 DEBUG_PRINTF("c: %02hhx '%c'\n", c2, ourisprint(c2) ? c2 : '?');
122 DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a2, (a2 & 0xF0) >> 4, a2 & 0xF);
123
124 DEBUG_PRINTF("c: %02hhx '%c'\n", c3, ourisprint(c3) ? c3 : '?');
125 DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a3, (a3 & 0xF0) >> 4, a3 & 0xF);
126
127 DEBUG_PRINTF("c: %02hhx '%c'\n", c4, ourisprint(c4) ? c4 : '?');
128 DEBUG_PRINTF("s: %u (hi: %u lo: %u)\n", a4, (a4 & 0xF0) >> 4, a4 & 0xF);
129
130 if (unlikely(INTERESTING_FUNC(a1, a2, a3, a4))) {
131 if (ACCEPT_FUNC(a1)) {
132 u64a match_offset = base_offset + b1 - buf;
133 DEBUG_PRINTF("Accept state %u reached\n",
134 a1 & SHENG_STATE_MASK);
135 DEBUG_PRINTF("Match @ %llu\n", match_offset);
136 if (STOP_AT_MATCH) {
137 DEBUG_PRINTF("Stopping at match @ %lli\n",
138 (s64a)(b1 - start));
139 *scan_end = b1;
140 *state = a1;
141 return MO_MATCHES_PENDING;
142 }
143 if (single) {
144 if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
145 MO_HALT_MATCHING) {
146 return MO_HALT_MATCHING;
147 }
148 } else {
149 if (fireReports(s, cb, ctxt, a1, match_offset,
150 cached_accept_state, cached_accept_id,
151 0) == MO_HALT_MATCHING) {
152 return MO_HALT_MATCHING;
153 }
154 }
155 }
156 if (ACCEPT_FUNC(a2)) {
157 u64a match_offset = base_offset + b2 - buf;
158 DEBUG_PRINTF("Accept state %u reached\n",
159 a2 & SHENG_STATE_MASK);
160 DEBUG_PRINTF("Match @ %llu\n", match_offset);
161 if (STOP_AT_MATCH) {
162 DEBUG_PRINTF("Stopping at match @ %lli\n",
163 (s64a)(b2 - start));
164 *scan_end = b2;
165 *state = a2;
166 return MO_MATCHES_PENDING;
167 }
168 if (single) {
169 if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
170 MO_HALT_MATCHING) {
171 return MO_HALT_MATCHING;
172 }
173 } else {
174 if (fireReports(s, cb, ctxt, a2, match_offset,
175 cached_accept_state, cached_accept_id,
176 0) == MO_HALT_MATCHING) {
177 return MO_HALT_MATCHING;
178 }
179 }
180 }
181 if (ACCEPT_FUNC(a3)) {
182 u64a match_offset = base_offset + b3 - buf;
183 DEBUG_PRINTF("Accept state %u reached\n",
184 a3 & SHENG_STATE_MASK);
185 DEBUG_PRINTF("Match @ %llu\n", match_offset);
186 if (STOP_AT_MATCH) {
187 DEBUG_PRINTF("Stopping at match @ %lli\n",
188 (s64a)(b3 - start));
189 *scan_end = b3;
190 *state = a3;
191 return MO_MATCHES_PENDING;
192 }
193 if (single) {
194 if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
195 MO_HALT_MATCHING) {
196 return MO_HALT_MATCHING;
197 }
198 } else {
199 if (fireReports(s, cb, ctxt, a3, match_offset,
200 cached_accept_state, cached_accept_id,
201 0) == MO_HALT_MATCHING) {
202 return MO_HALT_MATCHING;
203 }
204 }
205 }
206 if (ACCEPT_FUNC(a4)) {
207 u64a match_offset = base_offset + b4 - buf;
208 DEBUG_PRINTF("Accept state %u reached\n",
209 a4 & SHENG_STATE_MASK);
210 DEBUG_PRINTF("Match @ %llu\n", match_offset);
211 if (STOP_AT_MATCH) {
212 DEBUG_PRINTF("Stopping at match @ %lli\n",
213 (s64a)(b4 - start));
214 *scan_end = b4;
215 *state = a4;
216 return MO_MATCHES_PENDING;
217 }
218 if (single) {
219 if (fireSingleReport(cb, ctxt, s->report, match_offset) ==
220 MO_HALT_MATCHING) {
221 return MO_HALT_MATCHING;
222 }
223 } else {
224 if (fireReports(s, cb, ctxt, a4, match_offset,
225 cached_accept_state, cached_accept_id,
226 0) == MO_HALT_MATCHING) {
227 return MO_HALT_MATCHING;
228 }
229 }
230 }
231 if (INNER_DEAD_FUNC(a4)) {
232 DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(b4 - buf));
233 *scan_end = end;
234 *state = a4;
235 return MO_CONTINUE_MATCHING;
236 }
237 if (cur_buf > min_accel_dist && INNER_ACCEL_FUNC(a4)) {
238 DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
239 const union AccelAux *aaux =
240 get_accel(s, a4 & SHENG_STATE_MASK);
241 const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
242 if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
243 min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
244 } else {
245 min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
246 }
247 DEBUG_PRINTF("Next accel chance: %llu\n",
248 (u64a)(min_accel_dist - start));
249 DEBUG_PRINTF("Accel scanned %llu bytes\n",
250 (u64a)(new_offset - cur_buf - 4));
251 cur_buf = new_offset;
252 DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
253 continue;
254 }
255 }
256 if (OUTER_DEAD_FUNC(a4)) {
257 DEBUG_PRINTF("Dead state reached @ %lli\n", (s64a)(cur_buf - buf));
258 *scan_end = end;
259 *state = a4;
260 return MO_CONTINUE_MATCHING;
261 };
262 if (cur_buf > min_accel_dist && OUTER_ACCEL_FUNC(a4)) {
263 DEBUG_PRINTF("Accel state reached @ %lli\n", (s64a)(b4 - buf));
264 const union AccelAux *aaux = get_accel(s, a4 & SHENG_STATE_MASK);
265 const u8 *new_offset = run_accel(aaux, cur_buf + 4, end);
266 if (new_offset < cur_buf + 4 + BAD_ACCEL_DIST) {
267 min_accel_dist = new_offset + BIG_ACCEL_PENALTY;
268 } else {
269 min_accel_dist = new_offset + SMALL_ACCEL_PENALTY;
270 }
271 DEBUG_PRINTF("Next accel chance: %llu\n",
272 (u64a)(min_accel_dist - start));
273 DEBUG_PRINTF("Accel scanned %llu bytes\n",
274 (u64a)(new_offset - cur_buf - 4));
275 cur_buf = new_offset;
276 DEBUG_PRINTF("New offset: %llu\n", (u64a)(cur_buf - buf));
277 continue;
278 };
279 cur_buf += 4;
280 }
281 *state = movd(cur_state);
282 *scan_end = cur_buf;
283 return MO_CONTINUE_MATCHING;
284}
285