1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** \file
30 * \brief Hamster Wheel Literal Matcher: runtime.
31 */
32#include "hwlm.h"
33#include "hwlm_internal.h"
34#include "noodle_engine.h"
35#include "scratch.h"
36#include "ue2common.h"
37#include "fdr/fdr.h"
38#include "nfa/accel.h"
39#include "nfa/shufti.h"
40#include "nfa/truffle.h"
41#include "nfa/vermicelli.h"
42#include <string.h>
43
44#define MIN_ACCEL_LEN_BLOCK 16
45#define MIN_ACCEL_LEN_STREAM 16
46
47static really_inline
48const u8 *run_hwlm_accel(const union AccelAux *aux, const u8 *ptr,
49 const u8 *end) {
50 switch (aux->accel_type) {
51 case ACCEL_VERM:
52 DEBUG_PRINTF("single vermicelli for 0x%02hhx\n", aux->verm.c);
53 return vermicelliExec(aux->verm.c, 0, ptr, end);
54 case ACCEL_VERM_NOCASE:
55 DEBUG_PRINTF("single vermicelli-nocase for 0x%02hhx\n", aux->verm.c);
56 return vermicelliExec(aux->verm.c, 1, ptr, end);
57 case ACCEL_DVERM:
58 DEBUG_PRINTF("double vermicelli for 0x%02hhx%02hhx\n", aux->dverm.c1,
59 aux->dverm.c2);
60 return vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 0, ptr, end);
61 case ACCEL_DVERM_NOCASE:
62 DEBUG_PRINTF("double vermicelli-nocase for 0x%02hhx%02hhx\n",
63 aux->dverm.c1, aux->dverm.c2);
64 return vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2, 1, ptr, end);
65 case ACCEL_SHUFTI:
66 DEBUG_PRINTF("single shufti\n");
67 return shuftiExec(aux->shufti.lo, aux->shufti.hi, ptr, end);
68 case ACCEL_TRUFFLE:
69 DEBUG_PRINTF("truffle\n");
70 return truffleExec(aux->truffle.mask1, aux->truffle.mask2, ptr, end);
71 default:
72 /* no acceleration, fall through and return current ptr */
73 DEBUG_PRINTF("no accel; %u\n", (int)aux->accel_type);
74 assert(aux->accel_type == ACCEL_NONE);
75 return ptr;
76 }
77}
78
79static really_inline
80void do_accel_block(const union AccelAux *aux, const u8 *buf, size_t len,
81 size_t *start) {
82 if (len - *start < MIN_ACCEL_LEN_BLOCK) {
83 return;
84 }
85
86 const u8 *ptr = buf + *start;
87 const u8 *end = buf + len;
88 const u8 offset = aux->generic.offset;
89 ptr = run_hwlm_accel(aux, ptr, end);
90
91 if (offset) {
92 ptr -= offset;
93 if (ptr < buf) {
94 ptr = buf;
95 }
96 }
97 assert(ptr >= buf);
98 *start = ptr - buf;
99}
100
101static really_inline
102int inaccurate_accel(u8 type) {
103 /* accels which don't always catch up to the boundary
104 * DSHUFTI is also inaccurate but it is not used by the hamsters */
105 return type == ACCEL_DVERM_NOCASE || type == ACCEL_DVERM;
106}
107
108static never_inline
109void do_accel_streaming(const union AccelAux *aux, const u8 *hbuf, size_t hlen,
110 const u8 *buf, size_t len, size_t *start) {
111 if (aux->accel_type == ACCEL_NONE || len - *start < MIN_ACCEL_LEN_STREAM) {
112 return;
113 }
114
115 const u8 offset = aux->generic.offset;
116
117 DEBUG_PRINTF("using accel %hhu offset %hhu\n", aux->accel_type, offset);
118
119 // Scan history buffer, but only if the start offset (which always refers to
120 // buf) is zero.
121
122 if (!*start && hlen) {
123 const u8 *ptr1 = hbuf;
124 const u8 *end1 = hbuf + hlen;
125 if (hlen >= 16) {
126 ptr1 = run_hwlm_accel(aux, ptr1, end1);
127 }
128
129 if ((hlen <= 16 || inaccurate_accel(aux->accel_type))
130 && end1 != ptr1 && end1 - ptr1 <= 16) {
131 DEBUG_PRINTF("already scanned %zu/%zu\n", ptr1 - hbuf, hlen);
132 /* see if we can finish off the history buffer completely */
133 u8 ALIGN_DIRECTIVE temp[17];
134 ptrdiff_t tlen = end1 - ptr1;
135 memcpy(temp, ptr1, tlen);
136 memset(temp + tlen, 0, 17 - tlen);
137 if (len) { /* for dverm */
138 temp[end1 - ptr1] = *buf;
139 }
140
141 const u8 *tempp = run_hwlm_accel(aux, temp, temp + 17);
142
143 if (tempp - temp >= tlen) {
144 ptr1 = end1;
145 }
146 DEBUG_PRINTF("got %zu\n", tempp - temp);
147 }
148
149 if (ptr1 != end1) {
150 DEBUG_PRINTF("bailing in history\n");
151 return;
152 }
153 }
154
155 DEBUG_PRINTF("scanning main buffer, start=%zu, len=%zu\n", *start, len);
156
157 const u8 *ptr2 = buf + *start;
158 const u8 *end2 = buf + len;
159
160 const u8 *found = run_hwlm_accel(aux, ptr2, end2);
161
162 if (found >= ptr2 + offset) {
163 size_t delta = found - offset - ptr2;
164 DEBUG_PRINTF("got %zu/%zu in 2nd buffer\n", delta, len);
165 *start += delta;
166 } else if (hlen) {
167 UNUSED size_t remaining = offset + ptr2 - found;
168 DEBUG_PRINTF("got %zu/%zu remaining in 1st buffer\n", remaining, hlen);
169 }
170}
171
172hwlm_error_t hwlmExec(const struct HWLM *t, const u8 *buf, size_t len,
173 size_t start, HWLMCallback cb, struct hs_scratch *scratch,
174 hwlm_group_t groups) {
175 assert(t);
176
177 DEBUG_PRINTF("buf len=%zu, start=%zu, groups=%llx\n", len, start, groups);
178 if (!groups) {
179 DEBUG_PRINTF("groups all off\n");
180 return HWLM_SUCCESS;
181 }
182
183 assert(start < len);
184
185 if (t->type == HWLM_ENGINE_NOOD) {
186 DEBUG_PRINTF("calling noodExec\n");
187 return noodExec(HWLM_C_DATA(t), buf, len, start, cb, scratch);
188 }
189
190 assert(t->type == HWLM_ENGINE_FDR);
191 const union AccelAux *aa = &t->accel0;
192 if ((groups & ~t->accel1_groups) == 0) {
193 DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
194 aa = &t->accel1;
195 }
196 do_accel_block(aa, buf, len, &start);
197 DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start);
198 return fdrExec(HWLM_C_DATA(t), buf, len, start, cb, scratch, groups);
199}
200
201hwlm_error_t hwlmExecStreaming(const struct HWLM *t, size_t len, size_t start,
202 HWLMCallback cb, struct hs_scratch *scratch,
203 hwlm_group_t groups) {
204 assert(t);
205 assert(scratch);
206
207 const u8 *hbuf = scratch->core_info.hbuf;
208 const size_t hlen = scratch->core_info.hlen;
209 const u8 *buf = scratch->core_info.buf;
210
211 DEBUG_PRINTF("hbuf len=%zu, buf len=%zu, start=%zu, groups=%llx\n", hlen,
212 len, start, groups);
213
214 if (!groups) {
215 return HWLM_SUCCESS;
216 }
217
218 assert(start < len);
219
220 if (t->type == HWLM_ENGINE_NOOD) {
221 DEBUG_PRINTF("calling noodExec\n");
222 // If we've been handed a start offset, we can use a block mode scan at
223 // that offset.
224 if (start) {
225 return noodExec(HWLM_C_DATA(t), buf, len, start, cb, scratch);
226 } else {
227 return noodExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, cb,
228 scratch);
229 }
230 }
231
232 assert(t->type == HWLM_ENGINE_FDR);
233 const union AccelAux *aa = &t->accel0;
234 if ((groups & ~t->accel1_groups) == 0) {
235 DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
236 aa = &t->accel1;
237 }
238 do_accel_streaming(aa, hbuf, hlen, buf, len, &start);
239 DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups, start);
240 return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len, start, cb,
241 scratch, groups);
242}
243