| 1 | /* | 
| 2 |  * Copyright (c) 2015-2017, Intel Corporation | 
| 3 |  * | 
| 4 |  * Redistribution and use in source and binary forms, with or without | 
| 5 |  * modification, are permitted provided that the following conditions are met: | 
| 6 |  * | 
| 7 |  *  * Redistributions of source code must retain the above copyright notice, | 
| 8 |  *    this list of conditions and the following disclaimer. | 
| 9 |  *  * Redistributions in binary form must reproduce the above copyright | 
| 10 |  *    notice, this list of conditions and the following disclaimer in the | 
| 11 |  *    documentation and/or other materials provided with the distribution. | 
| 12 |  *  * Neither the name of Intel Corporation nor the names of its contributors | 
| 13 |  *    may be used to endorse or promote products derived from this software | 
| 14 |  *    without specific prior written permission. | 
| 15 |  * | 
| 16 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | 
| 17 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
| 18 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
| 19 |  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | 
| 20 |  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 
| 21 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 
| 22 |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 
| 23 |  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 
| 24 |  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 
| 25 |  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 
| 26 |  * POSSIBILITY OF SUCH DAMAGE. | 
| 27 |  */ | 
| 28 |  | 
| 29 | /** \file | 
| 30 |  * \brief Reverse acceleration analysis. | 
| 31 |  */ | 
| 32 | #include "ng_revacc.h" | 
| 33 |  | 
| 34 | #include "grey.h" | 
| 35 | #include "ng_holder.h" | 
| 36 | #include "ue2common.h" | 
| 37 | #include "nfa/accel.h" | 
| 38 | #include "nfa/nfa_internal.h" | 
| 39 | #include "util/bitutils.h" | 
| 40 | #include "util/charreach.h" | 
| 41 | #include "util/graph_range.h" | 
| 42 |  | 
| 43 | #include <set> | 
| 44 |  | 
| 45 | using namespace std; | 
| 46 |  | 
| 47 | namespace ue2 { | 
| 48 |  | 
| 49 | static | 
| 50 | bool isPseudoNoCaseChar(const CharReach &cr) { | 
| 51 |     return cr.count() == 2 && !(cr.find_first() & 32) | 
| 52 |         && cr.test(cr.find_first() | 32); | 
| 53 | } | 
| 54 |  | 
| 55 | static | 
| 56 | bool lookForEodSchemes(const RevAccInfo &rev_info, const u32 minWidth, | 
| 57 |                        NFA *nfa) { | 
| 58 |     DEBUG_PRINTF("pure eod triggered pattern\n" ); | 
| 59 |  | 
| 60 |     /* 2 char */ | 
| 61 |     for (u8 nocase = 0; nocase < 2; nocase++) { | 
| 62 |         for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) { | 
| 63 |             const CharReach &cr = rev_info.acceptEodReach[i]; | 
| 64 |             const CharReach &cr2 = rev_info.acceptEodReach[i - 1]; | 
| 65 |  | 
| 66 |             if (!nocase && cr.count() == 1 && cr2.count() == 1) { | 
| 67 |                 assert(i < minWidth); | 
| 68 |                 if (i >= minWidth) { | 
| 69 |                     goto single; | 
| 70 |                 } | 
| 71 |                 nfa->rAccelType = ACCEL_RDEOD; | 
| 72 |                 nfa->rAccelData.array[0] = (u8)cr.find_first(); | 
| 73 |                 nfa->rAccelData.array[1] = (u8)cr2.find_first(); | 
| 74 |                 nfa->rAccelOffset = i + 1; | 
| 75 |                 DEBUG_PRINTF("raccel eod x2 %u %04hx\n" , | 
| 76 |                              nfa->rAccelOffset, nfa->rAccelData.dc); | 
| 77 |                 return true; | 
| 78 |             } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr)) | 
| 79 |                        && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) { | 
| 80 |                 assert(i < minWidth); | 
| 81 |                 if (i >= minWidth) { | 
| 82 |                     goto single; | 
| 83 |                 } | 
| 84 |                 nfa->rAccelType = ACCEL_RDEOD_NOCASE; | 
| 85 |                 nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR;  /* uppercase */ | 
| 86 |                 nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR; | 
| 87 |                 nfa->rAccelOffset = i + 1; | 
| 88 |                 DEBUG_PRINTF("raccel nc eod x2 %u %04hx\n" , | 
| 89 |                              nfa->rAccelOffset, nfa->rAccelData.dc); | 
| 90 |                 return true; | 
| 91 |             } | 
| 92 |         } | 
| 93 |     } | 
| 94 |  | 
| 95 |  single: | 
| 96 |     /* 1 char */ | 
| 97 |     for (u8 nocase = 0; nocase < 2; nocase++) { | 
| 98 |         for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) { | 
| 99 |             const CharReach &cr = rev_info.acceptEodReach[i]; | 
| 100 |             if (!nocase && cr.count() == 1) { | 
| 101 |                 assert(i < minWidth); | 
| 102 |                 if (i >= minWidth) { | 
| 103 |                     return false; | 
| 104 |                 } | 
| 105 |                 nfa->rAccelType = ACCEL_REOD; | 
| 106 |                 nfa->rAccelData.c = (u8) cr.find_first(); | 
| 107 |                 nfa->rAccelOffset = i + 1; | 
| 108 |                 DEBUG_PRINTF("raccel eod %u %02hhx\n" , | 
| 109 |                              nfa->rAccelOffset, nfa->rAccelData.c); | 
| 110 |                 return true; | 
| 111 |             } else if (nocase && isPseudoNoCaseChar(cr)) { | 
| 112 |                 assert(i < minWidth); | 
| 113 |                 if (i >= minWidth) { | 
| 114 |                     return false; | 
| 115 |                 } | 
| 116 |                 nfa->rAccelType = ACCEL_REOD_NOCASE; | 
| 117 |                 nfa->rAccelData.c = (u8)cr.find_first(); /* uppercase */ | 
| 118 |                 nfa->rAccelOffset = i + 1; | 
| 119 |                 DEBUG_PRINTF("raccel nc eod %u %02hhx\n" , | 
| 120 |                              nfa->rAccelOffset, nfa->rAccelData.c); | 
| 121 |                 return true; | 
| 122 |             } | 
| 123 |         } | 
| 124 |     } | 
| 125 |  | 
| 126 |     return false; | 
| 127 | } | 
| 128 |  | 
| 129 | static | 
| 130 | bool lookForFloatingSchemes(const RevAccInfo &rev_info, | 
| 131 |                             const u32 minWidth, NFA *nfa) { | 
| 132 |     /* 2 char */ | 
| 133 |     for (u8 nocase = 0; nocase < 2; nocase++) { | 
| 134 |         for (u8 i = 1; i < MAX_RACCEL_OFFSET; i++) { | 
| 135 |             CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i]; | 
| 136 |             CharReach cr2 = rev_info.acceptEodReach[i - 1] | 
| 137 |                             | rev_info.acceptReach[i - 1]; | 
| 138 |             if (!nocase && cr.count() == 1 && cr2.count() == 1) { | 
| 139 |                 assert((u8)(i - 1) < minWidth); | 
| 140 |                 if (i > minWidth) { | 
| 141 |                     goto single; | 
| 142 |                 } | 
| 143 |                 nfa->rAccelType = ACCEL_RDVERM; | 
| 144 |                 nfa->rAccelData.array[0] = (u8)cr.find_first(); | 
| 145 |                 nfa->rAccelData.array[1] = (u8)cr2.find_first(); | 
| 146 |                 nfa->rAccelOffset = i; | 
| 147 |                 DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx\n" , | 
| 148 |                              nfa->rAccelOffset, nfa->rAccelData.array[0], | 
| 149 |                              nfa->rAccelData.array[1]); | 
| 150 |                 return true; | 
| 151 |             } else if (nocase && (cr.count() == 1 || isPseudoNoCaseChar(cr)) | 
| 152 |                         && (cr2.count() == 1 || isPseudoNoCaseChar(cr2))) { | 
| 153 |                 assert((u8)(i - 1) < minWidth); | 
| 154 |                 if (i > minWidth) { | 
| 155 |                     goto single; | 
| 156 |                 } | 
| 157 |                 nfa->rAccelType = ACCEL_RDVERM_NOCASE; | 
| 158 |                 nfa->rAccelData.array[0] = (u8)cr.find_first() & CASE_CLEAR; | 
| 159 |                 nfa->rAccelData.array[1] = (u8)cr2.find_first() & CASE_CLEAR; | 
| 160 |                 nfa->rAccelOffset = i; | 
| 161 |                 DEBUG_PRINTF("raccel dverm %u %02hhx%02hhx nc\n" , | 
| 162 |                              nfa->rAccelOffset, nfa->rAccelData.array[0], | 
| 163 |                              nfa->rAccelData.array[1]); | 
| 164 |                 return true; | 
| 165 |             } | 
| 166 |         } | 
| 167 |     } | 
| 168 |  | 
| 169 |  single: | 
| 170 |     /* 1 char */ | 
| 171 |     for (u8 nocase = 0; nocase < 2; nocase++) { | 
| 172 |         for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) { | 
| 173 |             CharReach cr = rev_info.acceptEodReach[i] | rev_info.acceptReach[i]; | 
| 174 |             if (!nocase && cr.count() == 1) { | 
| 175 |                 assert(i < minWidth); | 
| 176 |                 if (i >= minWidth) { | 
| 177 |                     return false; | 
| 178 |                 } | 
| 179 |                 nfa->rAccelType = ACCEL_RVERM; | 
| 180 |                 nfa->rAccelData.c = (u8)cr.find_first(); | 
| 181 |                 nfa->rAccelOffset = i + 1; | 
| 182 |                 DEBUG_PRINTF("raccel verm %u %02hhx\n" , nfa->rAccelOffset, | 
| 183 |                              nfa->rAccelData.c); | 
| 184 |                 return true; | 
| 185 |             } else if (nocase && isPseudoNoCaseChar(cr)) { | 
| 186 |                 assert(i < minWidth); | 
| 187 |                 if (i >= minWidth) { | 
| 188 |                     return false; | 
| 189 |                 } | 
| 190 |                 nfa->rAccelType = ACCEL_RVERM_NOCASE; | 
| 191 |                 nfa->rAccelData.c = (u8)cr.find_first(); /* 'uppercase' char */ | 
| 192 |                 nfa->rAccelOffset = i + 1; | 
| 193 |                 DEBUG_PRINTF("raccel nc verm %u %02hhx\n" , nfa->rAccelOffset, | 
| 194 |                              nfa->rAccelData.c); | 
| 195 |                 return true; | 
| 196 |             } | 
| 197 |         } | 
| 198 |     } | 
| 199 |  | 
| 200 |     return false; | 
| 201 | } | 
| 202 |  | 
| 203 | void buildReverseAcceleration(NFA *nfa, const RevAccInfo &rev_info, | 
| 204 |                               u32 min_width, bool eod_only) { | 
| 205 |     assert(nfa); | 
| 206 |  | 
| 207 |     if (!rev_info.valid) { | 
| 208 |         return; | 
| 209 |     } | 
| 210 |  | 
| 211 |     nfa->rAccelOffset = 1; | 
| 212 |  | 
| 213 |     assert(rev_info.acceptReach[0].any() || rev_info.acceptEodReach[0].any()); | 
| 214 |     if (rev_info.acceptReach[0].none() && rev_info.acceptEodReach[0].none()) { | 
| 215 |         DEBUG_PRINTF("expected path to accept\n" ); | 
| 216 |         return; | 
| 217 |     } | 
| 218 |  | 
| 219 |     if (rev_info.acceptReach[0].none()) { | 
| 220 |         /* eod only */ | 
| 221 |  | 
| 222 |         if (lookForEodSchemes(rev_info, min_width, nfa)) { | 
| 223 |             assert(nfa->rAccelOffset <= min_width); | 
| 224 |             return; | 
| 225 |         } | 
| 226 |     } | 
| 227 |  | 
| 228 |     if (eod_only) { | 
| 229 |         return; | 
| 230 |     } | 
| 231 |  | 
| 232 |     if (!lookForFloatingSchemes(rev_info, min_width, nfa)) { | 
| 233 |         DEBUG_PRINTF("failed to accelerate\n" ); | 
| 234 |     } | 
| 235 | } | 
| 236 |  | 
| 237 | static | 
| 238 | void populateRevAccelInfo(const NGHolder &g, NFAVertex terminal, | 
| 239 |                           vector<CharReach> *reach) { | 
| 240 |     set<NFAVertex> vset; | 
| 241 |  | 
| 242 |     for (auto v : inv_adjacent_vertices_range(terminal, g)) { | 
| 243 |         if (!is_special(v, g)) { | 
| 244 |             vset.insert(v); | 
| 245 |         } | 
| 246 |     } | 
| 247 |  | 
| 248 |     for (u8 offset = 0; offset < MAX_RACCEL_OFFSET; offset++) { | 
| 249 |         set<NFAVertex> next; | 
| 250 |  | 
| 251 |         for (auto v : vset) { | 
| 252 |             const CharReach &cr = g[v].char_reach; | 
| 253 |             (*reach)[offset] |= cr; | 
| 254 |  | 
| 255 |             DEBUG_PRINTF("off %u adding %zu to %zu\n" , offset, cr.count(), | 
| 256 |                          (*reach)[offset].count()); | 
| 257 |  | 
| 258 |             for (auto u : inv_adjacent_vertices_range(v, g)) { | 
| 259 |                 if (u == g.start || u == g.startDs) { | 
| 260 |                     /* kill all subsequent offsets by setting to dot, setting | 
| 261 |                      * to dot is in someways not accurate as there may be no | 
| 262 |                      * data at all but neither case can be accelerated */ | 
| 263 |                     for (u8 i = offset + 1; i < MAX_RACCEL_OFFSET; i++) { | 
| 264 |                         (*reach)[i].setall(); | 
| 265 |                     } | 
| 266 |                     break; | 
| 267 |                 } else if (!is_special(u, g)) { | 
| 268 |                     next.insert(u); | 
| 269 |                 } | 
| 270 |             } | 
| 271 |         } | 
| 272 |  | 
| 273 |         swap(vset, next); | 
| 274 |     } | 
| 275 | } | 
| 276 |  | 
| 277 | void populateReverseAccelerationInfo(RevAccInfo &rai, const NGHolder &g) { | 
| 278 |     DEBUG_PRINTF("pop rev info\n" ); | 
| 279 |     populateRevAccelInfo(g, g.accept, &rai.acceptReach); | 
| 280 |     populateRevAccelInfo(g, g.acceptEod, &rai.acceptEodReach); | 
| 281 |     rai.valid = true; | 
| 282 | } | 
| 283 |  | 
| 284 | void mergeReverseAccelerationInfo(RevAccInfo &dest, const RevAccInfo &vic) { | 
| 285 |     DEBUG_PRINTF("merging ra\n" ); | 
| 286 |  | 
| 287 |     dest.valid &= vic.valid; | 
| 288 |  | 
| 289 |     for (u8 i = 0; i < MAX_RACCEL_OFFSET; i++) { | 
| 290 |         dest.acceptReach[i]    |= vic.acceptReach[i]; | 
| 291 |         dest.acceptEodReach[i] |= vic.acceptEodReach[i]; | 
| 292 |     } | 
| 293 | } | 
| 294 |  | 
| 295 | RevAccInfo::RevAccInfo(void) | 
| 296 |     : valid(false), acceptReach(MAX_RACCEL_OFFSET), | 
| 297 |       acceptEodReach(MAX_RACCEL_OFFSET) {} | 
| 298 |  | 
| 299 | } // namespace ue2 | 
| 300 |  |