1/*
2 * Copyright (c) 2015-2017, Intel Corporation
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * * Redistributions of source code must retain the above copyright notice,
8 * this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Intel Corporation nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#ifndef FLOOD_RUNTIME
30#define FLOOD_RUNTIME
31
32#if defined(ARCH_64_BIT)
33#define FLOOD_64
34#else
35#define FLOOD_32
36#endif
37#define FLOOD_MINIMUM_SIZE 256
38#define FLOOD_BACKOFF_START 32
39
40static really_inline
41const u8 * nextFloodDetect(const u8 * buf, size_t len, u32 floodBackoff) {
42 // if we don't have a flood at either the start or end,
43 // or have a very small buffer, don't bother with flood detection
44 if (len < FLOOD_MINIMUM_SIZE) {
45 return buf + len;
46 }
47
48 /* entry points in runtime.c prefetch relevant data */
49#ifndef FLOOD_32
50 u64a x11 = *(const u64a *)ROUNDUP_PTR(buf, 8);
51 u64a x12 = *(const u64a *)ROUNDUP_PTR(buf+8, 8);
52 if (x11 == x12) {
53 return buf + floodBackoff;
54 }
55 u64a x21 = *(const u64a *)ROUNDUP_PTR(buf + len/2, 8);
56 u64a x22 = *(const u64a *)ROUNDUP_PTR(buf + len/2 + 8, 8);
57 if (x21 == x22) {
58 return buf + floodBackoff;
59 }
60 u64a x31 = *(const u64a *)ROUNDUP_PTR(buf + len - 24, 8);
61 u64a x32 = *(const u64a *)ROUNDUP_PTR(buf + len - 16, 8);
62 if (x31 == x32) {
63 return buf + floodBackoff;
64 }
65#else
66 u32 x11 = *(const u32 *)ROUNDUP_PTR(buf, 4);
67 u32 x12 = *(const u32 *)ROUNDUP_PTR(buf+4, 4);
68 if (x11 == x12) {
69 return buf + floodBackoff;
70 }
71 u32 x21 = *(const u32 *)ROUNDUP_PTR(buf + len/2, 4);
72 u32 x22 = *(const u32 *)ROUNDUP_PTR(buf + len/2 + 4, 4);
73 if (x21 == x22) {
74 return buf + floodBackoff;
75 }
76 u32 x31 = *(const u32 *)ROUNDUP_PTR(buf + len - 12, 4);
77 u32 x32 = *(const u32 *)ROUNDUP_PTR(buf + len - 8, 4);
78 if (x31 == x32) {
79 return buf + floodBackoff;
80 }
81#endif
82 return buf + len;
83}
84
85static really_inline
86const u8 * floodDetect(const struct FDR * fdr,
87 const struct FDR_Runtime_Args * a,
88 const u8 ** ptrPtr,
89 const u8 * tryFloodDetect,
90 u32 * floodBackoffPtr,
91 hwlmcb_rv_t * control,
92 u32 iterBytes) {
93 DEBUG_PRINTF("attempting flood detection at %p\n", tryFloodDetect);
94 const u8 * buf = a->buf;
95 const size_t len = a->len;
96 HWLMCallback cb = a->cb;
97 struct hs_scratch *scratch = a->scratch;
98
99 const u8 * ptr = *ptrPtr;
100 // tryFloodDetect is never put in places where unconditional
101 // reads a short distance forward or backward here
102 // TODO: rationale for this line needs to be rediscovered!!
103 size_t mainLoopLen = len > 2 * iterBytes ? len - 2 * iterBytes : 0;
104 const u32 i = ptr - buf;
105 u32 j = i;
106
107 // go from c to our FDRFlood structure
108 u8 c = buf[i];
109 const u8 * fBase = ((const u8 *)fdr) + fdr->floodOffset;
110 u32 fIdx = ((const u32 *)fBase)[c];
111 const struct FDRFlood * fsb = (const struct FDRFlood *)(fBase + sizeof(u32) * 256);
112 const struct FDRFlood * fl = &fsb[fIdx];
113
114#ifndef FLOOD_32
115 u64a cmpVal = c;
116 cmpVal |= cmpVal << 8;
117 cmpVal |= cmpVal << 16;
118 cmpVal |= cmpVal << 32;
119 u64a probe = *(const u64a *)ROUNDUP_PTR(buf+i, 8);
120#else
121 u32 cmpVal = c;
122 cmpVal |= cmpVal << 8;
123 cmpVal |= cmpVal << 16;
124 u32 probe = *(const u32 *)ROUNDUP_PTR(buf+i, 4);
125#endif
126
127 if ((probe != cmpVal) || (fl->idCount >= FDR_FLOOD_MAX_IDS)) {
128 *floodBackoffPtr *= 2;
129 goto floodout;
130 }
131
132 if (i < fl->suffix + 7) {
133 *floodBackoffPtr *= 2;
134 goto floodout;
135 }
136
137 j = i - fl->suffix;
138
139#ifndef FLOOD_32
140 j -= (u32)((uintptr_t)buf + j) & 0x7; // push j back to yield 8-aligned addrs
141 for (; j + 32 < mainLoopLen; j += 32) {
142 u64a v = *(const u64a *)(buf + j);
143 u64a v2 = *(const u64a *)(buf + j + 8);
144 u64a v3 = *(const u64a *)(buf + j + 16);
145 u64a v4 = *(const u64a *)(buf + j + 24);
146 if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) {
147 break;
148 }
149 }
150 for (; j + 8 < mainLoopLen; j += 8) {
151 u64a v = *(const u64a *)(buf + j);
152 if (v != cmpVal) {
153 break;
154 }
155 }
156#else
157 j -= (u32)((size_t)buf + j) & 0x3; // push j back to yield 4-aligned addrs
158 for (; j + 16 < mainLoopLen; j += 16) {
159 u32 v = *(const u32 *)(buf + j);
160 u32 v2 = *(const u32 *)(buf + j + 4);
161 u32 v3 = *(const u32 *)(buf + j + 8);
162 u32 v4 = *(const u32 *)(buf + j + 12);
163 if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) {
164 break;
165 }
166 }
167 for (; j + 4 < mainLoopLen; j += 4) {
168 u32 v = *(const u32 *)(buf + j);
169 if (v != cmpVal) {
170 break;
171 }
172 }
173#endif
174 for (; j < mainLoopLen; j++) {
175 u8 v = *(const u8 *)(buf + j);
176 if (v != c) {
177 break;
178 }
179 }
180 if (j > i ) {
181 j--; // needed for some reaches
182 u32 itersAhead = (j-i)/iterBytes;
183 u32 floodSize = itersAhead*iterBytes;
184
185 DEBUG_PRINTF("flooding %u size j %u i %u fl->idCount %hu "
186 "*control %016llx fl->allGroups %016llx\n",
187 floodSize, j, i, fl->idCount, *control, fl->allGroups);
188 DEBUG_PRINTF("mainloopLen %zu mainStart ??? mainEnd ??? len %zu\n",
189 mainLoopLen, len);
190
191 if (fl->idCount && (*control & fl->allGroups)) {
192 switch (fl->idCount) {
193#if !defined(FLOOD_DEBUG)
194 // Carefully unrolled code
195 case 1:
196 for (u32 t = 0; t < floodSize && (*control & fl->allGroups);
197 t += 4) {
198 DEBUG_PRINTF("aaa %u %llx\n", t, fl->groups[0]);
199 if (*control & fl->groups[0]) {
200 *control = cb(i + t + 0, fl->ids[0], scratch);
201 }
202 if (*control & fl->groups[0]) {
203 *control = cb(i + t + 1, fl->ids[0], scratch);
204 }
205 if (*control & fl->groups[0]) {
206 *control = cb(i + t + 2, fl->ids[0], scratch);
207 }
208 if (*control & fl->groups[0]) {
209 *control = cb(i + t + 3, fl->ids[0], scratch);
210 }
211 }
212 break;
213 case 2:
214 for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 4) {
215 if (*control & fl->groups[0]) {
216 *control = cb(i + t, fl->ids[0], scratch);
217 }
218 if (*control & fl->groups[1]) {
219 *control = cb(i + t, fl->ids[1], scratch);
220 }
221 if (*control & fl->groups[0]) {
222 *control =
223 cb(i + t + 1, fl->ids[0], scratch);
224 }
225 if (*control & fl->groups[1]) {
226 *control = cb(i + t + 1, fl->ids[1], scratch);
227 }
228 if (*control & fl->groups[0]) {
229 *control = cb(i + t + 2, fl->ids[0], scratch);
230 }
231 if (*control & fl->groups[1]) {
232 *control = cb(i + t + 2, fl->ids[1], scratch);
233 }
234 if (*control & fl->groups[0]) {
235 *control = cb(i + t + 3, fl->ids[0], scratch);
236 }
237 if (*control & fl->groups[1]) {
238 *control = cb(i + t + 3, fl->ids[1], scratch);
239 }
240 }
241 break;
242 case 3:
243 for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
244 if (*control & fl->groups[0]) {
245 *control = cb(i + t, fl->ids[0], scratch);
246 }
247 if (*control & fl->groups[1]) {
248 *control = cb(i + t, fl->ids[1], scratch);
249 }
250 if (*control & fl->groups[2]) {
251 *control = cb(i + t, fl->ids[2], scratch);
252 }
253 if (*control & fl->groups[0]) {
254 *control = cb(i + t + 1, fl->ids[0], scratch);
255 }
256 if (*control & fl->groups[1]) {
257 *control = cb(i + t + 1, fl->ids[1], scratch);
258 }
259 if (*control & fl->groups[2]) {
260 *control = cb(i + t + 1, fl->ids[2], scratch);
261 }
262 }
263 break;
264 default:
265 // slow generalized loop
266 for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) {
267
268 if (*control & fl->groups[0]) {
269 *control = cb(i + t, fl->ids[0], scratch);
270 }
271 if (*control & fl->groups[1]) {
272 *control = cb(i + t, fl->ids[1], scratch);
273 }
274 if (*control & fl->groups[2]) {
275 *control = cb(i + t, fl->ids[2], scratch);
276 }
277 if (*control & fl->groups[3]) {
278 *control = cb(i + t, fl->ids[3], scratch);
279 }
280
281 for (u32 t2 = 4; t2 < fl->idCount; t2++) {
282 if (*control & fl->groups[t2]) {
283 *control = cb(i + t, fl->ids[t2], scratch);
284 }
285 }
286
287 if (*control & fl->groups[0]) {
288 *control = cb(i + t + 1, fl->ids[0], scratch);
289 }
290 if (*control & fl->groups[1]) {
291 *control = cb(i + t + 1, fl->ids[1], scratch);
292 }
293 if (*control & fl->groups[2]) {
294 *control = cb(i + t + 1, fl->ids[2], scratch);
295 }
296 if (*control & fl->groups[3]) {
297 *control = cb(i + t + 1, fl->ids[3], scratch);
298 }
299
300 for (u32 t2 = 4; t2 < fl->idCount; t2++) {
301 if (*control & fl->groups[t2]) {
302 *control = cb(i + t + 1, fl->ids[t2], scratch);
303 }
304 }
305 }
306 break;
307#else
308 // Fallback for debugging
309 default:
310 for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t++) {
311 for (u32 t2 = 0; t2 < fl->idCount; t2++) {
312 if (*control & fl->groups[t2]) {
313 *control = cb(i + t, fl->ids[t2], scratch);
314 }
315 }
316 }
317#endif
318 }
319 }
320 ptr += floodSize;
321 } else {
322 *floodBackoffPtr *= 2;
323 }
324
325floodout:
326 if (j + *floodBackoffPtr < mainLoopLen - 128) {
327 tryFloodDetect = buf + MAX(i,j) + *floodBackoffPtr;
328 } else {
329 tryFloodDetect = buf + mainLoopLen; // set so we never do another flood detect
330 }
331 *ptrPtr = ptr;
332 DEBUG_PRINTF("finished flood detection at %p (next check %p)\n",
333 ptr, tryFloodDetect);
334 return tryFloodDetect;
335}
336
337#endif
338