| 1 | /* |
| 2 | * Copyright (c) 2015-2017, Intel Corporation |
| 3 | * |
| 4 | * Redistribution and use in source and binary forms, with or without |
| 5 | * modification, are permitted provided that the following conditions are met: |
| 6 | * |
| 7 | * * Redistributions of source code must retain the above copyright notice, |
| 8 | * this list of conditions and the following disclaimer. |
| 9 | * * Redistributions in binary form must reproduce the above copyright |
| 10 | * notice, this list of conditions and the following disclaimer in the |
| 11 | * documentation and/or other materials provided with the distribution. |
| 12 | * * Neither the name of Intel Corporation nor the names of its contributors |
| 13 | * may be used to endorse or promote products derived from this software |
| 14 | * without specific prior written permission. |
| 15 | * |
| 16 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 17 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 18 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 19 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 20 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 21 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 22 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 23 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 24 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 25 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 26 | * POSSIBILITY OF SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #include "fdr_internal.h" |
| 30 | #include "fdr_confirm.h" |
| 31 | #include "fdr_compile_internal.h" |
| 32 | #include "fdr_engine_description.h" |
| 33 | #include "grey.h" |
| 34 | #include "ue2common.h" |
| 35 | #include "util/alloc.h" |
| 36 | #include "util/bitutils.h" |
| 37 | #include "util/charreach.h" |
| 38 | #include "util/compare.h" |
| 39 | #include "util/ue2string.h" |
| 40 | #include "util/verify_types.h" |
| 41 | |
| 42 | #include <cstring> |
| 43 | #include <map> |
| 44 | #include <memory> |
| 45 | #include <string> |
| 46 | #include <vector> |
| 47 | |
| 48 | using namespace std; |
| 49 | |
| 50 | namespace ue2 { |
| 51 | |
| 52 | namespace { |
| 53 | struct FloodComparator { |
| 54 | bool operator()(const FDRFlood &f1, const FDRFlood &f2) const { |
| 55 | return std::memcmp(&f1, &f2, sizeof(f1)) < 0; |
| 56 | } |
| 57 | }; |
| 58 | } |
| 59 | |
| 60 | static |
| 61 | bool isDifferent(u8 oldC, u8 c, bool caseless) { |
| 62 | if (caseless) { |
| 63 | return mytolower(oldC) != mytolower(c); |
| 64 | } else { |
| 65 | return oldC != c; |
| 66 | } |
| 67 | } |
| 68 | |
| 69 | static |
| 70 | void updateFloodSuffix(vector<FDRFlood> &tmpFlood, u8 c, u32 suffix) { |
| 71 | FDRFlood &fl = tmpFlood[c]; |
| 72 | fl.suffix = MAX(fl.suffix, suffix + 1); |
| 73 | DEBUG_PRINTF("Updated Flood Suffix for char 0x%02x to %u\n" , c, fl.suffix); |
| 74 | } |
| 75 | |
| 76 | static |
| 77 | void addFlood(vector<FDRFlood> &tmpFlood, u8 c, const hwlmLiteral &lit, |
| 78 | u32 suffix) { |
| 79 | FDRFlood &fl = tmpFlood[c]; |
| 80 | fl.suffix = MAX(fl.suffix, suffix + 1); |
| 81 | if (fl.idCount < FDR_FLOOD_MAX_IDS) { |
| 82 | fl.ids[fl.idCount] = lit.id; |
| 83 | fl.allGroups |= lit.groups; |
| 84 | fl.groups[fl.idCount] = lit.groups; |
| 85 | // when idCount gets to max_ids this flood no longer happens |
| 86 | // only incremented one more time to avoid arithmetic overflow |
| 87 | DEBUG_PRINTF("Added Flood for char '%c' suffix=%u len[%hu]=%u\n" , |
| 88 | c, fl.suffix, fl.idCount, suffix); |
| 89 | fl.idCount++; |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | bytecode_ptr<u8> setupFDRFloodControl(const vector<hwlmLiteral> &lits, |
| 94 | const EngineDescription &eng, |
| 95 | const Grey &grey) { |
| 96 | vector<FDRFlood> tmpFlood(N_CHARS); |
| 97 | u32 default_suffix = eng.getDefaultFloodSuffixLength(); |
| 98 | |
| 99 | // zero everything to avoid spurious distinctions in the compares |
| 100 | memset(&tmpFlood[0], 0, N_CHARS * sizeof(FDRFlood)); |
| 101 | |
| 102 | for (u32 c = 0; c < N_CHARS; c++) { |
| 103 | tmpFlood[c].suffix = default_suffix; |
| 104 | } |
| 105 | |
| 106 | for (const auto &lit : lits) { |
| 107 | DEBUG_PRINTF("lit: '%s'%s\n" , escapeString(lit.s).c_str(), |
| 108 | lit.nocase ? " (nocase)" : "" ); |
| 109 | u32 litSize = verify_u32(lit.s.size()); |
| 110 | u32 maskSize = (u32)lit.msk.size(); |
| 111 | u8 c = lit.s[litSize - 1]; |
| 112 | bool nocase = ourisalpha(c) ? lit.nocase : false; |
| 113 | |
| 114 | if (nocase && maskSize && (lit.msk[maskSize - 1] & CASE_BIT)) { |
| 115 | c = (lit.cmp[maskSize - 1] & CASE_BIT) ? mytolower(c) : mytoupper(c); |
| 116 | nocase = false; |
| 117 | } |
| 118 | |
| 119 | u32 iEnd = MAX(litSize, maskSize); |
| 120 | u32 upSuffix = iEnd; // upSuffix is used as an upper case suffix length |
| 121 | // for case-less, or as a suffix length for case-sensitive; |
| 122 | u32 loSuffix = iEnd; // loSuffix used only for case-less as a lower case suffix |
| 123 | // length; |
| 124 | |
| 125 | for (u32 i = 0; i < iEnd; i++) { |
| 126 | if (i < litSize) { |
| 127 | if (isDifferent(c, lit.s[litSize - i - 1], lit.nocase)) { |
| 128 | DEBUG_PRINTF("non-flood char in literal[%u]: " |
| 129 | "0x%02x != 0x%02x\n" , |
| 130 | i, c, lit.s[litSize - i - 1]); |
| 131 | upSuffix = MIN(upSuffix, i); |
| 132 | loSuffix = MIN(loSuffix, i); // makes sense only for case-less |
| 133 | break; |
| 134 | } |
| 135 | } |
| 136 | if (i < maskSize) { |
| 137 | u8 m = lit.msk[maskSize - i - 1]; |
| 138 | u8 cm = lit.cmp[maskSize - i - 1] & m; |
| 139 | if(nocase) { |
| 140 | if ((mytoupper(c) & m) != cm) { |
| 141 | DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n" , |
| 142 | i, mytoupper(c), cm); |
| 143 | upSuffix = MIN(upSuffix, i); |
| 144 | } |
| 145 | if ((mytolower(c) & m) != cm) { |
| 146 | DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n" , |
| 147 | i, mytolower(c), cm); |
| 148 | loSuffix = MIN(loSuffix, i); |
| 149 | } |
| 150 | if (loSuffix != iEnd && upSuffix != iEnd) { |
| 151 | break; |
| 152 | } |
| 153 | } else if ((c & m) != cm) { |
| 154 | DEBUG_PRINTF("non-flood char in mask[%u] %c != %c\n" , i, c, cm); |
| 155 | upSuffix = MIN(upSuffix, i); |
| 156 | break; |
| 157 | } |
| 158 | } |
| 159 | } |
| 160 | if(upSuffix != iEnd) { |
| 161 | updateFloodSuffix(tmpFlood, nocase ? mytoupper(c) : c, upSuffix); |
| 162 | } else { |
| 163 | addFlood(tmpFlood, nocase ? mytoupper(c) : c, lit, upSuffix); |
| 164 | } |
| 165 | if (nocase) { |
| 166 | if(loSuffix != iEnd) { |
| 167 | updateFloodSuffix(tmpFlood, mytolower(c), loSuffix); |
| 168 | } else { |
| 169 | addFlood(tmpFlood, mytolower(c), lit, loSuffix); |
| 170 | } |
| 171 | } |
| 172 | } |
| 173 | |
| 174 | #ifdef DEBUG |
| 175 | for (u32 i = 0; i < N_CHARS; i++) { |
| 176 | FDRFlood &fl = tmpFlood[i]; |
| 177 | if (!fl.idCount) { |
| 178 | continue; |
| 179 | } |
| 180 | |
| 181 | printf("i is %02x fl->idCount is %hd fl->suffix is %d fl->allGroups is " |
| 182 | "%016llx\n" , i, fl.idCount, fl.suffix, fl.allGroups); |
| 183 | for (u32 j = 0; j < fl.idCount; j++) { |
| 184 | printf("j is %d fl.groups[j] %016llx\n" , j, fl.groups[j]); |
| 185 | } |
| 186 | } |
| 187 | #endif |
| 188 | |
| 189 | // If flood detection has been switched off in the grey box, we comply by |
| 190 | // setting idCount too high for all floods. |
| 191 | if (!grey.fdrAllowFlood) { |
| 192 | for (auto &fl : tmpFlood) { |
| 193 | fl.idCount = FDR_FLOOD_MAX_IDS; |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | map<FDRFlood, CharReach, FloodComparator> flood2chars; |
| 198 | for (u32 i = 0; i < N_CHARS; i++) { |
| 199 | FDRFlood fl = tmpFlood[i]; |
| 200 | flood2chars[fl].set(i); |
| 201 | } |
| 202 | |
| 203 | u32 nDistinctFloods = flood2chars.size(); |
| 204 | size_t = sizeof(u32) * N_CHARS; |
| 205 | size_t floodStructSize = sizeof(FDRFlood) * nDistinctFloods; |
| 206 | size_t totalSize = ROUNDUP_16(floodHeaderSize + floodStructSize); |
| 207 | |
| 208 | auto buf = make_zeroed_bytecode_ptr<u8>(totalSize, 16); |
| 209 | assert(buf); // otherwise would have thrown std::bad_alloc |
| 210 | |
| 211 | u32 * = (u32 *)buf.get(); |
| 212 | FDRFlood *layoutFlood = (FDRFlood *)(buf.get() + floodHeaderSize); |
| 213 | |
| 214 | u32 currentFloodIndex = 0; |
| 215 | for (const auto &m : flood2chars) { |
| 216 | const FDRFlood &fl = m.first; |
| 217 | const CharReach &cr = m.second; |
| 218 | layoutFlood[currentFloodIndex] = fl; |
| 219 | for (size_t c = cr.find_first(); c != cr.npos; c = cr.find_next(c)) { |
| 220 | floodHeader[c] = currentFloodIndex; |
| 221 | } |
| 222 | currentFloodIndex++; |
| 223 | } |
| 224 | |
| 225 | DEBUG_PRINTF("made a flood structure with %zu + %zu = %zu\n" , |
| 226 | floodHeaderSize, floodStructSize, totalSize); |
| 227 | |
| 228 | return buf; |
| 229 | } |
| 230 | |
| 231 | } // namespace ue2 |
| 232 | |