1 | /* |
2 | * Copyright (c) 2015-2017, Intel Corporation |
3 | * |
4 | * Redistribution and use in source and binary forms, with or without |
5 | * modification, are permitted provided that the following conditions are met: |
6 | * |
7 | * * Redistributions of source code must retain the above copyright notice, |
8 | * this list of conditions and the following disclaimer. |
9 | * * Redistributions in binary form must reproduce the above copyright |
10 | * notice, this list of conditions and the following disclaimer in the |
11 | * documentation and/or other materials provided with the distribution. |
12 | * * Neither the name of Intel Corporation nor the names of its contributors |
13 | * may be used to endorse or promote products derived from this software |
14 | * without specific prior written permission. |
15 | * |
16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
26 | * POSSIBILITY OF SUCH DAMAGE. |
27 | */ |
28 | |
29 | #ifndef FLOOD_RUNTIME |
30 | #define FLOOD_RUNTIME |
31 | |
32 | #if defined(ARCH_64_BIT) |
33 | #define FLOOD_64 |
34 | #else |
35 | #define FLOOD_32 |
36 | #endif |
37 | #define FLOOD_MINIMUM_SIZE 256 |
38 | #define FLOOD_BACKOFF_START 32 |
39 | |
40 | static really_inline |
41 | const u8 * nextFloodDetect(const u8 * buf, size_t len, u32 floodBackoff) { |
42 | // if we don't have a flood at either the start or end, |
43 | // or have a very small buffer, don't bother with flood detection |
44 | if (len < FLOOD_MINIMUM_SIZE) { |
45 | return buf + len; |
46 | } |
47 | |
48 | /* entry points in runtime.c prefetch relevant data */ |
49 | #ifndef FLOOD_32 |
50 | u64a x11 = *(const u64a *)ROUNDUP_PTR(buf, 8); |
51 | u64a x12 = *(const u64a *)ROUNDUP_PTR(buf+8, 8); |
52 | if (x11 == x12) { |
53 | return buf + floodBackoff; |
54 | } |
55 | u64a x21 = *(const u64a *)ROUNDUP_PTR(buf + len/2, 8); |
56 | u64a x22 = *(const u64a *)ROUNDUP_PTR(buf + len/2 + 8, 8); |
57 | if (x21 == x22) { |
58 | return buf + floodBackoff; |
59 | } |
60 | u64a x31 = *(const u64a *)ROUNDUP_PTR(buf + len - 24, 8); |
61 | u64a x32 = *(const u64a *)ROUNDUP_PTR(buf + len - 16, 8); |
62 | if (x31 == x32) { |
63 | return buf + floodBackoff; |
64 | } |
65 | #else |
66 | u32 x11 = *(const u32 *)ROUNDUP_PTR(buf, 4); |
67 | u32 x12 = *(const u32 *)ROUNDUP_PTR(buf+4, 4); |
68 | if (x11 == x12) { |
69 | return buf + floodBackoff; |
70 | } |
71 | u32 x21 = *(const u32 *)ROUNDUP_PTR(buf + len/2, 4); |
72 | u32 x22 = *(const u32 *)ROUNDUP_PTR(buf + len/2 + 4, 4); |
73 | if (x21 == x22) { |
74 | return buf + floodBackoff; |
75 | } |
76 | u32 x31 = *(const u32 *)ROUNDUP_PTR(buf + len - 12, 4); |
77 | u32 x32 = *(const u32 *)ROUNDUP_PTR(buf + len - 8, 4); |
78 | if (x31 == x32) { |
79 | return buf + floodBackoff; |
80 | } |
81 | #endif |
82 | return buf + len; |
83 | } |
84 | |
85 | static really_inline |
86 | const u8 * floodDetect(const struct FDR * fdr, |
87 | const struct FDR_Runtime_Args * a, |
88 | const u8 ** ptrPtr, |
89 | const u8 * tryFloodDetect, |
90 | u32 * floodBackoffPtr, |
91 | hwlmcb_rv_t * control, |
92 | u32 iterBytes) { |
93 | DEBUG_PRINTF("attempting flood detection at %p\n" , tryFloodDetect); |
94 | const u8 * buf = a->buf; |
95 | const size_t len = a->len; |
96 | HWLMCallback cb = a->cb; |
97 | struct hs_scratch *scratch = a->scratch; |
98 | |
99 | const u8 * ptr = *ptrPtr; |
100 | // tryFloodDetect is never put in places where unconditional |
101 | // reads a short distance forward or backward here |
102 | // TODO: rationale for this line needs to be rediscovered!! |
103 | size_t mainLoopLen = len > 2 * iterBytes ? len - 2 * iterBytes : 0; |
104 | const u32 i = ptr - buf; |
105 | u32 j = i; |
106 | |
107 | // go from c to our FDRFlood structure |
108 | u8 c = buf[i]; |
109 | const u8 * fBase = ((const u8 *)fdr) + fdr->floodOffset; |
110 | u32 fIdx = ((const u32 *)fBase)[c]; |
111 | const struct FDRFlood * fsb = (const struct FDRFlood *)(fBase + sizeof(u32) * 256); |
112 | const struct FDRFlood * fl = &fsb[fIdx]; |
113 | |
114 | #ifndef FLOOD_32 |
115 | u64a cmpVal = c; |
116 | cmpVal |= cmpVal << 8; |
117 | cmpVal |= cmpVal << 16; |
118 | cmpVal |= cmpVal << 32; |
119 | u64a probe = *(const u64a *)ROUNDUP_PTR(buf+i, 8); |
120 | #else |
121 | u32 cmpVal = c; |
122 | cmpVal |= cmpVal << 8; |
123 | cmpVal |= cmpVal << 16; |
124 | u32 probe = *(const u32 *)ROUNDUP_PTR(buf+i, 4); |
125 | #endif |
126 | |
127 | if ((probe != cmpVal) || (fl->idCount >= FDR_FLOOD_MAX_IDS)) { |
128 | *floodBackoffPtr *= 2; |
129 | goto floodout; |
130 | } |
131 | |
132 | if (i < fl->suffix + 7) { |
133 | *floodBackoffPtr *= 2; |
134 | goto floodout; |
135 | } |
136 | |
137 | j = i - fl->suffix; |
138 | |
139 | #ifndef FLOOD_32 |
140 | j -= (u32)((uintptr_t)buf + j) & 0x7; // push j back to yield 8-aligned addrs |
141 | for (; j + 32 < mainLoopLen; j += 32) { |
142 | u64a v = *(const u64a *)(buf + j); |
143 | u64a v2 = *(const u64a *)(buf + j + 8); |
144 | u64a v3 = *(const u64a *)(buf + j + 16); |
145 | u64a v4 = *(const u64a *)(buf + j + 24); |
146 | if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) { |
147 | break; |
148 | } |
149 | } |
150 | for (; j + 8 < mainLoopLen; j += 8) { |
151 | u64a v = *(const u64a *)(buf + j); |
152 | if (v != cmpVal) { |
153 | break; |
154 | } |
155 | } |
156 | #else |
157 | j -= (u32)((size_t)buf + j) & 0x3; // push j back to yield 4-aligned addrs |
158 | for (; j + 16 < mainLoopLen; j += 16) { |
159 | u32 v = *(const u32 *)(buf + j); |
160 | u32 v2 = *(const u32 *)(buf + j + 4); |
161 | u32 v3 = *(const u32 *)(buf + j + 8); |
162 | u32 v4 = *(const u32 *)(buf + j + 12); |
163 | if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) { |
164 | break; |
165 | } |
166 | } |
167 | for (; j + 4 < mainLoopLen; j += 4) { |
168 | u32 v = *(const u32 *)(buf + j); |
169 | if (v != cmpVal) { |
170 | break; |
171 | } |
172 | } |
173 | #endif |
174 | for (; j < mainLoopLen; j++) { |
175 | u8 v = *(const u8 *)(buf + j); |
176 | if (v != c) { |
177 | break; |
178 | } |
179 | } |
180 | if (j > i ) { |
181 | j--; // needed for some reaches |
182 | u32 itersAhead = (j-i)/iterBytes; |
183 | u32 floodSize = itersAhead*iterBytes; |
184 | |
185 | DEBUG_PRINTF("flooding %u size j %u i %u fl->idCount %hu " |
186 | "*control %016llx fl->allGroups %016llx\n" , |
187 | floodSize, j, i, fl->idCount, *control, fl->allGroups); |
188 | DEBUG_PRINTF("mainloopLen %zu mainStart ??? mainEnd ??? len %zu\n" , |
189 | mainLoopLen, len); |
190 | |
191 | if (fl->idCount && (*control & fl->allGroups)) { |
192 | switch (fl->idCount) { |
193 | #if !defined(FLOOD_DEBUG) |
194 | // Carefully unrolled code |
195 | case 1: |
196 | for (u32 t = 0; t < floodSize && (*control & fl->allGroups); |
197 | t += 4) { |
198 | DEBUG_PRINTF("aaa %u %llx\n" , t, fl->groups[0]); |
199 | if (*control & fl->groups[0]) { |
200 | *control = cb(i + t + 0, fl->ids[0], scratch); |
201 | } |
202 | if (*control & fl->groups[0]) { |
203 | *control = cb(i + t + 1, fl->ids[0], scratch); |
204 | } |
205 | if (*control & fl->groups[0]) { |
206 | *control = cb(i + t + 2, fl->ids[0], scratch); |
207 | } |
208 | if (*control & fl->groups[0]) { |
209 | *control = cb(i + t + 3, fl->ids[0], scratch); |
210 | } |
211 | } |
212 | break; |
213 | case 2: |
214 | for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 4) { |
215 | if (*control & fl->groups[0]) { |
216 | *control = cb(i + t, fl->ids[0], scratch); |
217 | } |
218 | if (*control & fl->groups[1]) { |
219 | *control = cb(i + t, fl->ids[1], scratch); |
220 | } |
221 | if (*control & fl->groups[0]) { |
222 | *control = |
223 | cb(i + t + 1, fl->ids[0], scratch); |
224 | } |
225 | if (*control & fl->groups[1]) { |
226 | *control = cb(i + t + 1, fl->ids[1], scratch); |
227 | } |
228 | if (*control & fl->groups[0]) { |
229 | *control = cb(i + t + 2, fl->ids[0], scratch); |
230 | } |
231 | if (*control & fl->groups[1]) { |
232 | *control = cb(i + t + 2, fl->ids[1], scratch); |
233 | } |
234 | if (*control & fl->groups[0]) { |
235 | *control = cb(i + t + 3, fl->ids[0], scratch); |
236 | } |
237 | if (*control & fl->groups[1]) { |
238 | *control = cb(i + t + 3, fl->ids[1], scratch); |
239 | } |
240 | } |
241 | break; |
242 | case 3: |
243 | for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) { |
244 | if (*control & fl->groups[0]) { |
245 | *control = cb(i + t, fl->ids[0], scratch); |
246 | } |
247 | if (*control & fl->groups[1]) { |
248 | *control = cb(i + t, fl->ids[1], scratch); |
249 | } |
250 | if (*control & fl->groups[2]) { |
251 | *control = cb(i + t, fl->ids[2], scratch); |
252 | } |
253 | if (*control & fl->groups[0]) { |
254 | *control = cb(i + t + 1, fl->ids[0], scratch); |
255 | } |
256 | if (*control & fl->groups[1]) { |
257 | *control = cb(i + t + 1, fl->ids[1], scratch); |
258 | } |
259 | if (*control & fl->groups[2]) { |
260 | *control = cb(i + t + 1, fl->ids[2], scratch); |
261 | } |
262 | } |
263 | break; |
264 | default: |
265 | // slow generalized loop |
266 | for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) { |
267 | |
268 | if (*control & fl->groups[0]) { |
269 | *control = cb(i + t, fl->ids[0], scratch); |
270 | } |
271 | if (*control & fl->groups[1]) { |
272 | *control = cb(i + t, fl->ids[1], scratch); |
273 | } |
274 | if (*control & fl->groups[2]) { |
275 | *control = cb(i + t, fl->ids[2], scratch); |
276 | } |
277 | if (*control & fl->groups[3]) { |
278 | *control = cb(i + t, fl->ids[3], scratch); |
279 | } |
280 | |
281 | for (u32 t2 = 4; t2 < fl->idCount; t2++) { |
282 | if (*control & fl->groups[t2]) { |
283 | *control = cb(i + t, fl->ids[t2], scratch); |
284 | } |
285 | } |
286 | |
287 | if (*control & fl->groups[0]) { |
288 | *control = cb(i + t + 1, fl->ids[0], scratch); |
289 | } |
290 | if (*control & fl->groups[1]) { |
291 | *control = cb(i + t + 1, fl->ids[1], scratch); |
292 | } |
293 | if (*control & fl->groups[2]) { |
294 | *control = cb(i + t + 1, fl->ids[2], scratch); |
295 | } |
296 | if (*control & fl->groups[3]) { |
297 | *control = cb(i + t + 1, fl->ids[3], scratch); |
298 | } |
299 | |
300 | for (u32 t2 = 4; t2 < fl->idCount; t2++) { |
301 | if (*control & fl->groups[t2]) { |
302 | *control = cb(i + t + 1, fl->ids[t2], scratch); |
303 | } |
304 | } |
305 | } |
306 | break; |
307 | #else |
308 | // Fallback for debugging |
309 | default: |
310 | for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t++) { |
311 | for (u32 t2 = 0; t2 < fl->idCount; t2++) { |
312 | if (*control & fl->groups[t2]) { |
313 | *control = cb(i + t, fl->ids[t2], scratch); |
314 | } |
315 | } |
316 | } |
317 | #endif |
318 | } |
319 | } |
320 | ptr += floodSize; |
321 | } else { |
322 | *floodBackoffPtr *= 2; |
323 | } |
324 | |
325 | floodout: |
326 | if (j + *floodBackoffPtr < mainLoopLen - 128) { |
327 | tryFloodDetect = buf + MAX(i,j) + *floodBackoffPtr; |
328 | } else { |
329 | tryFloodDetect = buf + mainLoopLen; // set so we never do another flood detect |
330 | } |
331 | *ptrPtr = ptr; |
332 | DEBUG_PRINTF("finished flood detection at %p (next check %p)\n" , |
333 | ptr, tryFloodDetect); |
334 | return tryFloodDetect; |
335 | } |
336 | |
337 | #endif |
338 | |