| 1 | /* |
| 2 | * Copyright (c) 2015-2017, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #ifndef FLOOD_RUNTIME |
| 30 | #define FLOOD_RUNTIME |
| 31 | |
| 32 | #if defined(ARCH_64_BIT) |
| 33 | #define FLOOD_64 |
| 34 | #else |
| 35 | #define FLOOD_32 |
| 36 | #endif |
| 37 | #define FLOOD_MINIMUM_SIZE 256 |
| 38 | #define FLOOD_BACKOFF_START 32 |
| 39 | |
| 40 | static really_inline |
| 41 | const u8 * nextFloodDetect(const u8 * buf, size_t len, u32 floodBackoff) { |
| 42 | // if we don't have a flood at either the start or end, |
| 43 | // or have a very small buffer, don't bother with flood detection |
| 44 | if (len < FLOOD_MINIMUM_SIZE) { |
| 45 | return buf + len; |
| 46 | } |
| 47 | |
| 48 | /* entry points in runtime.c prefetch relevant data */ |
| 49 | #ifndef FLOOD_32 |
| 50 | u64a x11 = *(const u64a *)ROUNDUP_PTR(buf, 8); |
| 51 | u64a x12 = *(const u64a *)ROUNDUP_PTR(buf+8, 8); |
| 52 | if (x11 == x12) { |
| 53 | return buf + floodBackoff; |
| 54 | } |
| 55 | u64a x21 = *(const u64a *)ROUNDUP_PTR(buf + len/2, 8); |
| 56 | u64a x22 = *(const u64a *)ROUNDUP_PTR(buf + len/2 + 8, 8); |
| 57 | if (x21 == x22) { |
| 58 | return buf + floodBackoff; |
| 59 | } |
| 60 | u64a x31 = *(const u64a *)ROUNDUP_PTR(buf + len - 24, 8); |
| 61 | u64a x32 = *(const u64a *)ROUNDUP_PTR(buf + len - 16, 8); |
| 62 | if (x31 == x32) { |
| 63 | return buf + floodBackoff; |
| 64 | } |
| 65 | #else |
| 66 | u32 x11 = *(const u32 *)ROUNDUP_PTR(buf, 4); |
| 67 | u32 x12 = *(const u32 *)ROUNDUP_PTR(buf+4, 4); |
| 68 | if (x11 == x12) { |
| 69 | return buf + floodBackoff; |
| 70 | } |
| 71 | u32 x21 = *(const u32 *)ROUNDUP_PTR(buf + len/2, 4); |
| 72 | u32 x22 = *(const u32 *)ROUNDUP_PTR(buf + len/2 + 4, 4); |
| 73 | if (x21 == x22) { |
| 74 | return buf + floodBackoff; |
| 75 | } |
| 76 | u32 x31 = *(const u32 *)ROUNDUP_PTR(buf + len - 12, 4); |
| 77 | u32 x32 = *(const u32 *)ROUNDUP_PTR(buf + len - 8, 4); |
| 78 | if (x31 == x32) { |
| 79 | return buf + floodBackoff; |
| 80 | } |
| 81 | #endif |
| 82 | return buf + len; |
| 83 | } |
| 84 | |
| 85 | static really_inline |
| 86 | const u8 * floodDetect(const struct FDR * fdr, |
| 87 | const struct FDR_Runtime_Args * a, |
| 88 | const u8 ** ptrPtr, |
| 89 | const u8 * tryFloodDetect, |
| 90 | u32 * floodBackoffPtr, |
| 91 | hwlmcb_rv_t * control, |
| 92 | u32 iterBytes) { |
| 93 | DEBUG_PRINTF("attempting flood detection at %p\n" , tryFloodDetect); |
| 94 | const u8 * buf = a->buf; |
| 95 | const size_t len = a->len; |
| 96 | HWLMCallback cb = a->cb; |
| 97 | struct hs_scratch *scratch = a->scratch; |
| 98 | |
| 99 | const u8 * ptr = *ptrPtr; |
| 100 | // tryFloodDetect is never put in places where unconditional |
| 101 | // reads a short distance forward or backward here |
| 102 | // TODO: rationale for this line needs to be rediscovered!! |
| 103 | size_t mainLoopLen = len > 2 * iterBytes ? len - 2 * iterBytes : 0; |
| 104 | const u32 i = ptr - buf; |
| 105 | u32 j = i; |
| 106 | |
| 107 | // go from c to our FDRFlood structure |
| 108 | u8 c = buf[i]; |
| 109 | const u8 * fBase = ((const u8 *)fdr) + fdr->floodOffset; |
| 110 | u32 fIdx = ((const u32 *)fBase)[c]; |
| 111 | const struct FDRFlood * fsb = (const struct FDRFlood *)(fBase + sizeof(u32) * 256); |
| 112 | const struct FDRFlood * fl = &fsb[fIdx]; |
| 113 | |
| 114 | #ifndef FLOOD_32 |
| 115 | u64a cmpVal = c; |
| 116 | cmpVal |= cmpVal << 8; |
| 117 | cmpVal |= cmpVal << 16; |
| 118 | cmpVal |= cmpVal << 32; |
| 119 | u64a probe = *(const u64a *)ROUNDUP_PTR(buf+i, 8); |
| 120 | #else |
| 121 | u32 cmpVal = c; |
| 122 | cmpVal |= cmpVal << 8; |
| 123 | cmpVal |= cmpVal << 16; |
| 124 | u32 probe = *(const u32 *)ROUNDUP_PTR(buf+i, 4); |
| 125 | #endif |
| 126 | |
| 127 | if ((probe != cmpVal) || (fl->idCount >= FDR_FLOOD_MAX_IDS)) { |
| 128 | *floodBackoffPtr *= 2; |
| 129 | goto floodout; |
| 130 | } |
| 131 | |
| 132 | if (i < fl->suffix + 7) { |
| 133 | *floodBackoffPtr *= 2; |
| 134 | goto floodout; |
| 135 | } |
| 136 | |
| 137 | j = i - fl->suffix; |
| 138 | |
| 139 | #ifndef FLOOD_32 |
| 140 | j -= (u32)((uintptr_t)buf + j) & 0x7; // push j back to yield 8-aligned addrs |
| 141 | for (; j + 32 < mainLoopLen; j += 32) { |
| 142 | u64a v = *(const u64a *)(buf + j); |
| 143 | u64a v2 = *(const u64a *)(buf + j + 8); |
| 144 | u64a v3 = *(const u64a *)(buf + j + 16); |
| 145 | u64a v4 = *(const u64a *)(buf + j + 24); |
| 146 | if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) { |
| 147 | break; |
| 148 | } |
| 149 | } |
| 150 | for (; j + 8 < mainLoopLen; j += 8) { |
| 151 | u64a v = *(const u64a *)(buf + j); |
| 152 | if (v != cmpVal) { |
| 153 | break; |
| 154 | } |
| 155 | } |
| 156 | #else |
| 157 | j -= (u32)((size_t)buf + j) & 0x3; // push j back to yield 4-aligned addrs |
| 158 | for (; j + 16 < mainLoopLen; j += 16) { |
| 159 | u32 v = *(const u32 *)(buf + j); |
| 160 | u32 v2 = *(const u32 *)(buf + j + 4); |
| 161 | u32 v3 = *(const u32 *)(buf + j + 8); |
| 162 | u32 v4 = *(const u32 *)(buf + j + 12); |
| 163 | if ((v4 != cmpVal) || (v3 != cmpVal) || (v2 != cmpVal) || (v != cmpVal)) { |
| 164 | break; |
| 165 | } |
| 166 | } |
| 167 | for (; j + 4 < mainLoopLen; j += 4) { |
| 168 | u32 v = *(const u32 *)(buf + j); |
| 169 | if (v != cmpVal) { |
| 170 | break; |
| 171 | } |
| 172 | } |
| 173 | #endif |
| 174 | for (; j < mainLoopLen; j++) { |
| 175 | u8 v = *(const u8 *)(buf + j); |
| 176 | if (v != c) { |
| 177 | break; |
| 178 | } |
| 179 | } |
| 180 | if (j > i ) { |
| 181 | j--; // needed for some reaches |
| 182 | u32 itersAhead = (j-i)/iterBytes; |
| 183 | u32 floodSize = itersAhead*iterBytes; |
| 184 | |
| 185 | DEBUG_PRINTF("flooding %u size j %u i %u fl->idCount %hu " |
| 186 | "*control %016llx fl->allGroups %016llx\n" , |
| 187 | floodSize, j, i, fl->idCount, *control, fl->allGroups); |
| 188 | DEBUG_PRINTF("mainloopLen %zu mainStart ??? mainEnd ??? len %zu\n" , |
| 189 | mainLoopLen, len); |
| 190 | |
| 191 | if (fl->idCount && (*control & fl->allGroups)) { |
| 192 | switch (fl->idCount) { |
| 193 | #if !defined(FLOOD_DEBUG) |
| 194 | // Carefully unrolled code |
| 195 | case 1: |
| 196 | for (u32 t = 0; t < floodSize && (*control & fl->allGroups); |
| 197 | t += 4) { |
| 198 | DEBUG_PRINTF("aaa %u %llx\n" , t, fl->groups[0]); |
| 199 | if (*control & fl->groups[0]) { |
| 200 | *control = cb(i + t + 0, fl->ids[0], scratch); |
| 201 | } |
| 202 | if (*control & fl->groups[0]) { |
| 203 | *control = cb(i + t + 1, fl->ids[0], scratch); |
| 204 | } |
| 205 | if (*control & fl->groups[0]) { |
| 206 | *control = cb(i + t + 2, fl->ids[0], scratch); |
| 207 | } |
| 208 | if (*control & fl->groups[0]) { |
| 209 | *control = cb(i + t + 3, fl->ids[0], scratch); |
| 210 | } |
| 211 | } |
| 212 | break; |
| 213 | case 2: |
| 214 | for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 4) { |
| 215 | if (*control & fl->groups[0]) { |
| 216 | *control = cb(i + t, fl->ids[0], scratch); |
| 217 | } |
| 218 | if (*control & fl->groups[1]) { |
| 219 | *control = cb(i + t, fl->ids[1], scratch); |
| 220 | } |
| 221 | if (*control & fl->groups[0]) { |
| 222 | *control = |
| 223 | cb(i + t + 1, fl->ids[0], scratch); |
| 224 | } |
| 225 | if (*control & fl->groups[1]) { |
| 226 | *control = cb(i + t + 1, fl->ids[1], scratch); |
| 227 | } |
| 228 | if (*control & fl->groups[0]) { |
| 229 | *control = cb(i + t + 2, fl->ids[0], scratch); |
| 230 | } |
| 231 | if (*control & fl->groups[1]) { |
| 232 | *control = cb(i + t + 2, fl->ids[1], scratch); |
| 233 | } |
| 234 | if (*control & fl->groups[0]) { |
| 235 | *control = cb(i + t + 3, fl->ids[0], scratch); |
| 236 | } |
| 237 | if (*control & fl->groups[1]) { |
| 238 | *control = cb(i + t + 3, fl->ids[1], scratch); |
| 239 | } |
| 240 | } |
| 241 | break; |
| 242 | case 3: |
| 243 | for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) { |
| 244 | if (*control & fl->groups[0]) { |
| 245 | *control = cb(i + t, fl->ids[0], scratch); |
| 246 | } |
| 247 | if (*control & fl->groups[1]) { |
| 248 | *control = cb(i + t, fl->ids[1], scratch); |
| 249 | } |
| 250 | if (*control & fl->groups[2]) { |
| 251 | *control = cb(i + t, fl->ids[2], scratch); |
| 252 | } |
| 253 | if (*control & fl->groups[0]) { |
| 254 | *control = cb(i + t + 1, fl->ids[0], scratch); |
| 255 | } |
| 256 | if (*control & fl->groups[1]) { |
| 257 | *control = cb(i + t + 1, fl->ids[1], scratch); |
| 258 | } |
| 259 | if (*control & fl->groups[2]) { |
| 260 | *control = cb(i + t + 1, fl->ids[2], scratch); |
| 261 | } |
| 262 | } |
| 263 | break; |
| 264 | default: |
| 265 | // slow generalized loop |
| 266 | for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t += 2) { |
| 267 | |
| 268 | if (*control & fl->groups[0]) { |
| 269 | *control = cb(i + t, fl->ids[0], scratch); |
| 270 | } |
| 271 | if (*control & fl->groups[1]) { |
| 272 | *control = cb(i + t, fl->ids[1], scratch); |
| 273 | } |
| 274 | if (*control & fl->groups[2]) { |
| 275 | *control = cb(i + t, fl->ids[2], scratch); |
| 276 | } |
| 277 | if (*control & fl->groups[3]) { |
| 278 | *control = cb(i + t, fl->ids[3], scratch); |
| 279 | } |
| 280 | |
| 281 | for (u32 t2 = 4; t2 < fl->idCount; t2++) { |
| 282 | if (*control & fl->groups[t2]) { |
| 283 | *control = cb(i + t, fl->ids[t2], scratch); |
| 284 | } |
| 285 | } |
| 286 | |
| 287 | if (*control & fl->groups[0]) { |
| 288 | *control = cb(i + t + 1, fl->ids[0], scratch); |
| 289 | } |
| 290 | if (*control & fl->groups[1]) { |
| 291 | *control = cb(i + t + 1, fl->ids[1], scratch); |
| 292 | } |
| 293 | if (*control & fl->groups[2]) { |
| 294 | *control = cb(i + t + 1, fl->ids[2], scratch); |
| 295 | } |
| 296 | if (*control & fl->groups[3]) { |
| 297 | *control = cb(i + t + 1, fl->ids[3], scratch); |
| 298 | } |
| 299 | |
| 300 | for (u32 t2 = 4; t2 < fl->idCount; t2++) { |
| 301 | if (*control & fl->groups[t2]) { |
| 302 | *control = cb(i + t + 1, fl->ids[t2], scratch); |
| 303 | } |
| 304 | } |
| 305 | } |
| 306 | break; |
| 307 | #else |
| 308 | // Fallback for debugging |
| 309 | default: |
| 310 | for (u32 t = 0; t < floodSize && (*control & fl->allGroups); t++) { |
| 311 | for (u32 t2 = 0; t2 < fl->idCount; t2++) { |
| 312 | if (*control & fl->groups[t2]) { |
| 313 | *control = cb(i + t, fl->ids[t2], scratch); |
| 314 | } |
| 315 | } |
| 316 | } |
| 317 | #endif |
| 318 | } |
| 319 | } |
| 320 | ptr += floodSize; |
| 321 | } else { |
| 322 | *floodBackoffPtr *= 2; |
| 323 | } |
| 324 | |
| 325 | floodout: |
| 326 | if (j + *floodBackoffPtr < mainLoopLen - 128) { |
| 327 | tryFloodDetect = buf + MAX(i,j) + *floodBackoffPtr; |
| 328 | } else { |
| 329 | tryFloodDetect = buf + mainLoopLen; // set so we never do another flood detect |
| 330 | } |
| 331 | *ptrPtr = ptr; |
| 332 | DEBUG_PRINTF("finished flood detection at %p (next check %p)\n" , |
| 333 | ptr, tryFloodDetect); |
| 334 | return tryFloodDetect; |
| 335 | } |
| 336 | |
| 337 | #endif |
| 338 | |